385 files changed, 26368 insertions, 9199 deletions
diff --git a/libc/sysdeps/generic/ldconfig.h b/libc/sysdeps/generic/ldconfig.h
index d96089d49..037f95995 100644
--- a/libc/sysdeps/generic/ldconfig.h
+++ b/libc/sysdeps/generic/ldconfig.h
@@ -21,24 +21,27 @@
 
 #include <stdint.h>
 
-#define FLAG_ANY		-1
-#define FLAG_TYPE_MASK		0x00ff
-#define FLAG_LIBC4		0x0000
-#define FLAG_ELF		0x0001
-#define FLAG_ELF_LIBC5		0x0002
-#define FLAG_ELF_LIBC6		0x0003
-#define FLAG_REQUIRED_MASK	0xff00
-#define FLAG_SPARC_LIB64	0x0100
-#define FLAG_IA64_LIB64		0x0200
-#define FLAG_X8664_LIB64	0x0300
-#define FLAG_S390_LIB64		0x0400
-#define FLAG_POWERPC_LIB64	0x0500
-#define FLAG_MIPS64_LIBN32	0x0600
-#define FLAG_MIPS64_LIBN64	0x0700
-#define FLAG_X8664_LIBX32	0x0800
-#define FLAG_ARM_LIBHF		0x0900
-#define FLAG_AARCH64_LIB64	0x0a00
-#define FLAG_ARM_LIBSF		0x0b00
+#define FLAG_ANY			-1
+#define FLAG_TYPE_MASK			0x00ff
+#define FLAG_LIBC4			0x0000
+#define FLAG_ELF			0x0001
+#define FLAG_ELF_LIBC5			0x0002
+#define FLAG_ELF_LIBC6			0x0003
+#define FLAG_REQUIRED_MASK		0xff00
+#define FLAG_SPARC_LIB64		0x0100
+#define FLAG_IA64_LIB64			0x0200
+#define FLAG_X8664_LIB64		0x0300
+#define FLAG_S390_LIB64			0x0400
+#define FLAG_POWERPC_LIB64		0x0500
+#define FLAG_MIPS64_LIBN32		0x0600
+#define FLAG_MIPS64_LIBN64		0x0700
+#define FLAG_X8664_LIBX32		0x0800
+#define FLAG_ARM_LIBHF			0x0900
+#define FLAG_AARCH64_LIB64		0x0a00
+#define FLAG_ARM_LIBSF			0x0b00
+#define FLAG_MIPS_LIB32_NAN2008		0x0c00
+#define FLAG_MIPS64_LIBN32_NAN2008	0x0d00
+#define FLAG_MIPS64_LIBN64_NAN2008	0x0e00
 
 /* Name of auxiliary cache.  */
 #define _PATH_LDCONFIG_AUX_CACHE "/var/cache/ldconfig/aux-cache"
diff --git a/libc/sysdeps/generic/math_private.h b/libc/sysdeps/generic/math_private.h
index c0fc03d38..9b881a3e7 100644
--- a/libc/sysdeps/generic/math_private.h
+++ b/libc/sysdeps/generic/math_private.h
@@ -356,10 +356,8 @@ extern void __dubcos (double __x, double __dx, double __v[]);
 extern double __halfulp (double __x, double __y);
 extern double __sin32 (double __x, double __res, double __res1);
 extern double __cos32 (double __x, double __res, double __res1);
-extern double __mpsin (double __x, double __dx);
-extern double __mpcos (double __x, double __dx);
-extern double __mpsin1 (double __x);
-extern double __mpcos1 (double __x);
+extern double __mpsin (double __x, double __dx, bool __range_reduce);
+extern double __mpcos (double __x, double __dx, bool __range_reduce);
 extern double __slowexp (double __x);
 extern double __slowpow (double __x, double __y, double __z);
 extern void __docos (double __x, double __dx, double __v[]);
diff --git a/libc/sysdeps/generic/stackguard-macros.h b/libc/sysdeps/generic/stackguard-macros.h
index ababf65d3..b4a6b23ff 100644
--- a/libc/sysdeps/generic/stackguard-macros.h
+++ b/libc/sysdeps/generic/stackguard-macros.h
@@ -2,3 +2,11 @@
 
 extern uintptr_t __stack_chk_guard;
 #define STACK_CHK_GUARD __stack_chk_guard
+
+#ifdef PTRGUARD_LOCAL
+extern uintptr_t __pointer_chk_guard_local;
+# define POINTER_CHK_GUARD __pointer_chk_guard_local
+#else
+extern uintptr_t __pointer_chk_guard;
+# define POINTER_CHK_GUARD __pointer_chk_guard
+#endif
diff --git a/libc/sysdeps/i386/ffs.c b/libc/sysdeps/i386/ffs.c
index 47496dcf7..77d2e60b7 100644
--- a/libc/sysdeps/i386/ffs.c
+++ b/libc/sysdeps/i386/ffs.c
@@ -41,6 +41,7 @@ __ffs (x)
   return cnt;
 }
 weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
 libc_hidden_builtin_def (ffs)
 #undef ffsl
 weak_alias (__ffs, ffsl)
diff --git a/libc/sysdeps/i386/i686/ffs.c b/libc/sysdeps/i386/i686/ffs.c
index 5c97050e8..cde1c9956 100644
--- a/libc/sysdeps/i386/i686/ffs.c
+++ b/libc/sysdeps/i386/i686/ffs.c
@@ -39,6 +39,7 @@ __ffs (x)
   return cnt + 1;
 }
 weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
 libc_hidden_builtin_def (ffs)
 #undef ffsl
 weak_alias (__ffs, ffsl)
diff --git a/libc/sysdeps/i386/stackguard-macros.h b/libc/sysdeps/i386/stackguard-macros.h
index 8c31e197e..039762927 100644
--- a/libc/sysdeps/i386/stackguard-macros.h
+++ b/libc/sysdeps/i386/stackguard-macros.h
@@ -2,3 +2,11 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("movl %%gs:0x14, %0" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+  ({							\
+     uintptr_t x;					\
+     asm ("movl %%gs:%c1, %0" : "=r" (x)		\
+	  : "i" (offsetof (tcbhead_t, pointer_guard)));	\
+     x;							\
+   })
diff --git a/libc/sysdeps/ieee754/dbl-64/MathLib.h b/libc/sysdeps/ieee754/dbl-64/MathLib.h
index 23f71980b..cf56606e3 100644
--- a/libc/sysdeps/ieee754/dbl-64/MathLib.h
+++ b/libc/sysdeps/ieee754/dbl-64/MathLib.h
@@ -33,14 +33,14 @@
 /* It returns the original status of these modes.                   */
 /* See further explanations of usage in DPChange.h                  */
 /********************************************************************/
-unsigned short Init_Lib(void);
+unsigned short Init_Lib (void);
 
 /********************************************************************/
 /* Function that changes the precision and rounding modes to the    */
 /* specified by the argument received. See further explanations in  */
 /* DPChange.h                                                       */
 /********************************************************************/
-void Exit_Lib(unsigned short);
+void Exit_Lib (unsigned short);
 
 
 /* The  asin() function calculates the arc sine of its argument.    */
@@ -48,7 +48,7 @@ void Exit_Lib(unsigned short);
 /* (between -PI/2 and PI/2).                                        */
 /* If the argument is greater than 1 or less than -1 it returns     */
 /* a NaN.                                                           */
-double uasin(double );
+double uasin (double);
 
 
 /* The  acos() function calculates the arc cosine of its argument.  */
@@ -56,47 +56,45 @@ double uasin(double );
 /* (between -PI/2 and PI/2).                                        */
 /* If the argument is greater than 1 or less than -1 it returns     */
 /* a NaN.                                                           */
-double uacos(double );
+double uacos (double);
 
 /* The  atan() function calculates the arctanget of its argument.   */
 /* The  function returns the arc tangent in radians                 */
 /* (between -PI/2 and PI/2).                                        */
-double uatan(double );
+double uatan (double);
 
 
 /* The uatan2() function calculates the arc tangent of the two arguments x   */
 /* and y (x is the right argument and y is the left one).The signs of both   */
 /* arguments are used to determine the quadrant of the result.               */
 /* The function returns the result in radians, which is between -PI and PI   */
-double uatan2(double ,double );
+double uatan2 (double, double);
 
 /* Compute log(x). The base of log is e (natural logarithm)         */
-double ulog(double );
+double ulog (double);
 
 /* Compute e raised to the power of argument x.                     */
-double uexp(double );
+double uexp (double);
 
 /* Compute sin(x). The argument x is assumed to be given in radians.*/
-double usin(double );
+double usin (double);
 
 /* Compute cos(x). The argument x is assumed to be given in radians.*/
-double ucos(double );
+double ucos (double);
 
 /* Compute tan(x). The argument x is assumed to be given in radians.*/
-double utan(double );
+double utan (double);
 
 /* Compute the square root of non-negative argument x.              */
 /* If x is negative the returned value is NaN.                      */
-double usqrt(double );
+double usqrt (double);
 
 /* Compute x raised to the power of y, where x is the left argument */
 /* and y is the right argument. The function returns a NaN if x<0.  */
 /* If x equals zero it returns -inf                                 */
-double upow(double , double );
+double upow (double, double);
 
 /* Computing x mod y, where x is the left argument and y is the     */
 /* right one.                                                       */
-double uremainder(double , double );
-
-
+double uremainder (double, double);
 #endif
diff --git a/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c b/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c
index f2294de5a..087d64377 100644
--- a/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c
+++ b/libc/sysdeps/ieee754/dbl-64/dbl2mpn.c
@@ -40,14 +40,14 @@ __mpn_extract_double (mp_ptr res_ptr, mp_size_t size,
 #if BITS_PER_MP_LIMB == 32
   res_ptr[0] = u.ieee.mantissa1; /* Low-order 32 bits of fraction.  */
   res_ptr[1] = u.ieee.mantissa0; /* High-order 20 bits.  */
-  #define N 2
+  # define N 2
 #elif BITS_PER_MP_LIMB == 64
   /* Hopefully the compiler will combine the two bitfield extracts
      and this composition into just the original quadword extract.  */
   res_ptr[0] = ((mp_limb_t) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
-  #define N 1
+  # define N 1
 #else
-  #error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
+  # error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
 #endif
 /* The format does not fill the last limb.  There are some zeros.  */
 #define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
@@ -73,7 +73,7 @@ __mpn_extract_double (mp_ptr res_ptr, mp_size_t size,
 #if N == 2
 	      res_ptr[N - 1] = res_ptr[1] << cnt
 			       | (N - 1)
-			         * (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
+			       * (res_ptr[0] >> (BITS_PER_MP_LIMB - cnt));
 	      res_ptr[0] <<= cnt;
 #else
 	      res_ptr[N - 1] <<= cnt;
diff --git a/libc/sysdeps/ieee754/dbl-64/dla.h b/libc/sysdeps/ieee754/dbl-64/dla.h
index b09f00ec7..6666982bf 100644
--- a/libc/sysdeps/ieee754/dbl-64/dla.h
+++ b/libc/sysdeps/ieee754/dbl-64/dla.h
@@ -61,13 +61,13 @@
 /* storage variables of type double.                                   */
 
 #ifdef DLA_FMS
-# define  EMULV(x,y,z,zz,p,hx,tx,hy,ty)          \
-	   z=x*y; zz=DLA_FMS(x,y,z);
+# define  EMULV(x, y, z, zz, p, hx, tx, hy, ty)          \
+  z = x * y; zz = DLA_FMS (x, y, z);
 #else
-# define  EMULV(x,y,z,zz,p,hx,tx,hy,ty)          \
-	   p=CN*(x);  hx=((x)-p)+p;  tx=(x)-hx; \
-	   p=CN*(y);  hy=((y)-p)+p;  ty=(y)-hy; \
-	   z=(x)*(y); zz=(((hx*hy-z)+hx*ty)+tx*hy)+tx*ty;
+# define  EMULV(x, y, z, zz, p, hx, tx, hy, ty)          \
+  p = CN * (x);  hx = ((x) - p) + p;  tx = (x) - hx; \
+  p = CN * (y);  hy = ((y) - p) + p;  ty = (y) - hy; \
+  z = (x) * (y); zz = (((hx * hy - z) + hx * ty) + tx * hy) + tx * ty;
 #endif
 
 
@@ -93,11 +93,11 @@
 /* are assumed to be double-length numbers. r,s are temporary           */
 /* storage variables of type double.                                    */
 
-#define  ADD2(x,xx,y,yy,z,zz,r,s)                    \
-	   r=(x)+(y);  s=(ABS(x)>ABS(y)) ?           \
-		       (((((x)-r)+(y))+(yy))+(xx)) : \
-		       (((((y)-r)+(x))+(xx))+(yy));  \
-	   z=r+s;  zz=(r-z)+s;
+#define  ADD2(x, xx, y, yy, z, zz, r, s)                   \
+  r = (x) + (y);  s = (ABS (x) > ABS (y)) ?                \
+		      (((((x) - r) + (y)) + (yy)) + (xx)) : \
+		      (((((y) - r) + (x)) + (xx)) + (yy));  \
+  z = r + s;  zz = (r - z) + s;
 
 
 /* Double-length subtraction, Dekker. The macro produces a double-length  */
@@ -106,11 +106,11 @@
 /* are assumed to be double-length numbers. r,s are temporary             */
 /* storage variables of type double.                                      */
 
-#define  SUB2(x,xx,y,yy,z,zz,r,s)                    \
-	   r=(x)-(y);  s=(ABS(x)>ABS(y)) ?           \
-		       (((((x)-r)-(y))-(yy))+(xx)) : \
-		       ((((x)-((y)+r))+(xx))-(yy));  \
-	   z=r+s;  zz=(r-z)+s;
+#define  SUB2(x, xx, y, yy, z, zz, r, s)                   \
+  r = (x) - (y);  s = (ABS (x) > ABS (y)) ?                \
+		      (((((x) - r) - (y)) - (yy)) + (xx)) : \
+		      ((((x) - ((y) + r)) + (xx)) - (yy));  \
+  z = r + s;  zz = (r - z) + s;
 
 
 /* Double-length multiplication, Dekker. The macro produces a double-length  */
@@ -119,9 +119,9 @@
 /* are assumed to be double-length numbers. p,hx,tx,hy,ty,q,c,cc are         */
 /* temporary storage variables of type double.                               */
 
-#define  MUL2(x,xx,y,yy,z,zz,p,hx,tx,hy,ty,q,c,cc)  \
-	   MUL12(x,y,c,cc,p,hx,tx,hy,ty,q)          \
-	   cc=((x)*(yy)+(xx)*(y))+cc;   z=c+cc;   zz=(c-z)+cc;
+#define  MUL2(x, xx, y, yy, z, zz, p, hx, tx, hy, ty, q, c, cc)  \
+  MUL12 (x, y, c, cc, p, hx, tx, hy, ty, q)                      \
+  cc = ((x) * (yy) + (xx) * (y)) + cc;   z = c + cc;   zz = (c - z) + cc;
 
 
 /* Double-length division, Dekker. The macro produces a double-length        */
@@ -142,18 +142,18 @@
 /* are assumed to be double-length numbers. r,rr,s,ss,u,uu,w                 */
 /* are temporary storage variables of type double.                           */
 
-#define  ADD2A(x,xx,y,yy,z,zz,r,rr,s,ss,u,uu,w)                        \
-	   r=(x)+(y);                                                  \
-	   if (ABS(x)>ABS(y)) { rr=((x)-r)+(y);  s=(rr+(yy))+(xx); }   \
-	   else               { rr=((y)-r)+(x);  s=(rr+(xx))+(yy); }   \
-	   if (rr!=0.0) {                                              \
-	     z=r+s;  zz=(r-z)+s; }                                     \
-	   else {                                                      \
-	     ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)+(yy)) : (((yy)-s)+(xx)); \
-	     u=r+s;                                                    \
-	     uu=(ABS(r)>ABS(s))   ? ((r-u)+s)   : ((s-u)+r)  ;         \
-	     w=uu+ss;  z=u+w;                                          \
-	     zz=(ABS(u)>ABS(w))   ? ((u-z)+w)   : ((w-z)+u)  ; }
+#define  ADD2A(x, xx, y, yy, z, zz, r, rr, s, ss, u, uu, w)                 \
+  r = (x) + (y);                                                            \
+  if (ABS (x) > ABS (y)) { rr = ((x) - r) + (y);  s = (rr + (yy)) + (xx); } \
+  else               { rr = ((y) - r) + (x);  s = (rr + (xx)) + (yy); }     \
+  if (rr != 0.0) {                                                          \
+      z = r + s;  zz = (r - z) + s; }                                       \
+  else {                                                                    \
+      ss = (ABS (xx) > ABS (yy)) ? (((xx) - s) + (yy)) : (((yy) - s) + (xx));\
+      u = r + s;                                                            \
+      uu = (ABS (r) > ABS (s))   ? ((r - u) + s)   : ((s - u) + r);         \
+      w = uu + ss;  z = u + w;                                              \
+      zz = (ABS (u) > ABS (w))   ? ((u - z) + w)   : ((w - z) + u); }
 
 
 /* Double-length subtraction, slower but more accurate than SUB2.            */
@@ -163,15 +163,15 @@
 /* are assumed to be double-length numbers. r,rr,s,ss,u,uu,w                 */
 /* are temporary storage variables of type double.                           */
 
-#define  SUB2A(x,xx,y,yy,z,zz,r,rr,s,ss,u,uu,w)                        \
-	   r=(x)-(y);                                                  \
-	   if (ABS(x)>ABS(y)) { rr=((x)-r)-(y);  s=(rr-(yy))+(xx); }   \
-	   else               { rr=(x)-((y)+r);  s=(rr+(xx))-(yy); }   \
-	   if (rr!=0.0) {                                              \
-	     z=r+s;  zz=(r-z)+s; }                                     \
-	   else {                                                      \
-	     ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)-(yy)) : ((xx)-((yy)+s)); \
-	     u=r+s;                                                    \
-	     uu=(ABS(r)>ABS(s))   ? ((r-u)+s)   : ((s-u)+r)  ;         \
-	     w=uu+ss;  z=u+w;                                          \
-	     zz=(ABS(u)>ABS(w))   ? ((u-z)+w)   : ((w-z)+u)  ; }
+#define  SUB2A(x, xx, y, yy, z, zz, r, rr, s, ss, u, uu, w)                   \
+  r = (x) - (y);                                                              \
+  if (ABS (x) > ABS (y)) { rr = ((x) - r) - (y);  s = (rr - (yy)) + (xx); }   \
+  else               { rr = (x) - ((y) + r);  s = (rr + (xx)) - (yy); }       \
+  if (rr != 0.0) {                                                            \
+      z = r + s;  zz = (r - z) + s; }                                         \
+  else {                                                                      \
+      ss = (ABS (xx) > ABS (yy)) ? (((xx) - s) - (yy)) : ((xx) - ((yy) + s)); \
+      u = r + s;                                                              \
+      uu = (ABS (r) > ABS (s))   ? ((r - u) + s)   : ((s - u) + r);           \
+      w = uu + ss;  z = u + w;                                                \
+      zz = (ABS (u) > ABS (w))   ? ((u - z) + w)   : ((w - z) + u); }
diff --git a/libc/sysdeps/ieee754/dbl-64/dosincos.c b/libc/sysdeps/ieee754/dbl-64/dosincos.c
index 00726285a..e1c8836b7 100644
--- a/libc/sysdeps/ieee754/dbl-64/dosincos.c
+++ b/libc/sysdeps/ieee754/dbl-64/dosincos.c
@@ -57,49 +57,52 @@ extern const union
 
 void
 SECTION
-__dubsin(double x, double dx, double v[]) {
-  double r,s,c,cc,d,dd,d2,dd2,e,ee,
-    sn,ssn,cs,ccs,ds,dss,dc,dcc;
+__dubsin (double x, double dx, double v[])
+{
+  double r, s, c, cc, d, dd, d2, dd2, e, ee,
+	 sn, ssn, cs, ccs, ds, dss, dc, dcc;
 #ifndef DLA_FMS
-  double p,hx,tx,hy,ty,q;
+  double p, hx, tx, hy, ty, q;
 #endif
   mynumber u;
   int4 k;
 
-  u.x=x+big.x;
-  k = u.i[LOW_HALF]<<2;
-  x=x-(u.x-big.x);
-  d=x+dx;
-  dd=(x-d)+dx;
-	 /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
-  MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
-  sn=__sincostab.x[k];     /*                                  */
-  ssn=__sincostab.x[k+1];  /*      sin(Xi) and cos(Xi)         */
-  cs=__sincostab.x[k+2];   /*                                  */
-  ccs=__sincostab.x[k+3];  /*                                  */
-  MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);  /* Taylor    */
-  ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
-  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);      /* series    */
-  ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
-  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);      /* for sin   */
-  MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(ds,dss,d,dd,ds,dss,r,s);                         /* ds=sin(t) */
-
-  MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc); ;/* Taylor    */
-  ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);      /* series    */
-  ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);      /* for cos   */
-  ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);      /* dc=cos(t) */
-
-  MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
-  MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  SUB2(e,ee,dc,dcc,e,ee,r,s);
-  ADD2(e,ee,sn,ssn,e,ee,r,s);                    /* e+ee=sin(x+dx) */
-
-  v[0]=e;
-  v[1]=ee;
+  u.x = x + big.x;
+  k = u.i[LOW_HALF] << 2;
+  x = x - (u.x - big.x);
+  d = x + dx;
+  dd = (x - d) + dx;
+  /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
+  MUL2 (d, dd, d, dd, d2, dd2, p, hx, tx, hy, ty, q, c, cc);
+  sn = __sincostab.x[k];       /*                                  */
+  ssn = __sincostab.x[k + 1];  /*      sin(Xi) and cos(Xi)         */
+  cs = __sincostab.x[k + 2];   /*                                  */
+  ccs = __sincostab.x[k + 3];  /*                                  */
+  /* Taylor series for sin ds=sin(t) */
+  MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s);
+  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s);
+  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, d, dd, ds, dss, r, s);
+
+  /* Taylor series for cos dc=cos(t) */
+  MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+
+  MUL2 (cs, ccs, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc);
+  MUL2 (dc, dcc, sn, ssn, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  SUB2 (e, ee, dc, dcc, e, ee, r, s);
+  ADD2 (e, ee, sn, ssn, e, ee, r, s);                    /* e+ee=sin(x+dx) */
+
+  v[0] = e;
+  v[1] = ee;
 }
 /**********************************************************************/
 /* Routine receive Double-Length number (x+dx) and computes cos(x+dx) */
@@ -110,64 +113,65 @@ __dubsin(double x, double dx, double v[]) {
 
 void
 SECTION
-__dubcos(double x, double dx, double v[]) {
-  double r,s,c,cc,d,dd,d2,dd2,e,ee,
-    sn,ssn,cs,ccs,ds,dss,dc,dcc;
+__dubcos (double x, double dx, double v[])
+{
+  double r, s, c, cc, d, dd, d2, dd2, e, ee,
+	 sn, ssn, cs, ccs, ds, dss, dc, dcc;
 #ifndef DLA_FMS
-  double p,hx,tx,hy,ty,q;
+  double p, hx, tx, hy, ty, q;
 #endif
   mynumber u;
   int4 k;
-  u.x=x+big.x;
-  k = u.i[LOW_HALF]<<2;
-  x=x-(u.x-big.x);
-  d=x+dx;
-  dd=(x-d)+dx;  /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
-  MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
-  sn=__sincostab.x[k];     /*                                  */
-  ssn=__sincostab.x[k+1];  /*      sin(Xi) and cos(Xi)         */
-  cs=__sincostab.x[k+2];   /*                                  */
-  ccs=__sincostab.x[k+3];  /*                                  */
-  MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
-  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
-  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(ds,dss,d,dd,ds,dss,r,s);
-
-  MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-
-  MUL2(cs,ccs,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
-  MUL2(dc,dcc,sn,ssn,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-
-  MUL2(d2,dd2,s7.x,ss7.x,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(ds,dss,s5.x,ss5.x,ds,dss,r,s);
-  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(ds,dss,s3.x,ss3.x,ds,dss,r,s);
-  MUL2(d2,dd2,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  MUL2(d,dd,ds,dss,ds,dss,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(ds,dss,d,dd,ds,dss,r,s);
-  MUL2(d2,dd2,c8.x,cc8.x,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(dc,dcc,c6.x,cc6.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(dc,dcc,c4.x,cc4.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(dc,dcc,c2.x,cc2.x,dc,dcc,r,s);
-  MUL2(d2,dd2,dc,dcc,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  MUL2(sn,ssn,ds,dss,e,ee,p,hx,tx,hy,ty,q,c,cc);
-  MUL2(dc,dcc,cs,ccs,dc,dcc,p,hx,tx,hy,ty,q,c,cc);
-  ADD2(e,ee,dc,dcc,e,ee,r,s);
-  SUB2(cs,ccs,e,ee,e,ee,r,s);
-
-  v[0]=e;
-  v[1]=ee;
+  u.x = x + big.x;
+  k = u.i[LOW_HALF] << 2;
+  x = x - (u.x - big.x);
+  d = x + dx;
+  dd = (x - d) + dx;  /* cos(x+dx)=cos(Xi+t)=cos(Xi)cos(t) - sin(Xi)sin(t) */
+  MUL2 (d, dd, d, dd, d2, dd2, p, hx, tx, hy, ty, q, c, cc);
+  sn = __sincostab.x[k];     /*                                  */
+  ssn = __sincostab.x[k + 1];  /*      sin(Xi) and cos(Xi)         */
+  cs = __sincostab.x[k + 2];   /*                                  */
+  ccs = __sincostab.x[k + 3];  /*                                  */
+  MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s);
+  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s);
+  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, d, dd, ds, dss, r, s);
+
+  MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+
+  MUL2 (cs, ccs, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc);
+  MUL2 (dc, dcc, sn, ssn, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+
+  MUL2 (d2, dd2, s7.x, ss7.x, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, s5.x, ss5.x, ds, dss, r, s);
+  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, s3.x, ss3.x, ds, dss, r, s);
+  MUL2 (d2, dd2, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  MUL2 (d, dd, ds, dss, ds, dss, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (ds, dss, d, dd, ds, dss, r, s);
+  MUL2 (d2, dd2, c8.x, cc8.x, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c6.x, cc6.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c4.x, cc4.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (dc, dcc, c2.x, cc2.x, dc, dcc, r, s);
+  MUL2 (d2, dd2, dc, dcc, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  MUL2 (sn, ssn, ds, dss, e, ee, p, hx, tx, hy, ty, q, c, cc);
+  MUL2 (dc, dcc, cs, ccs, dc, dcc, p, hx, tx, hy, ty, q, c, cc);
+  ADD2 (e, ee, dc, dcc, e, ee, r, s);
+  SUB2 (cs, ccs, e, ee, e, ee, r, s);
+
+  v[0] = e;
+  v[1] = ee;
 }
 /**********************************************************************/
 /* Routine receive Double-Length number (x+dx) and computes cos(x+dx) */
@@ -175,29 +179,45 @@ __dubcos(double x, double dx, double v[]) {
 /**********************************************************************/
 void
 SECTION
-__docos(double x, double dx, double v[]) {
-  double y,yy,p,w[2];
-  if (x>0) {y=x; yy=dx;}
-     else {y=-x; yy=-dx;}
-  if (y<0.5*hp0.x)                                 /*  y< PI/4    */
-	   {__dubcos(y,yy,w); v[0]=w[0]; v[1]=w[1];}
-     else if (y<1.5*hp0.x) {                       /* y< 3/4 * PI */
-       p=hp0.x-y;  /* p = PI/2 - y */
-       yy=hp1.x-yy;
-       y=p+yy;
-       yy=(p-y)+yy;
-       if (y>0) {__dubsin(y,yy,w); v[0]=w[0]; v[1]=w[1];}
-				       /* cos(x) = sin ( 90 -  x ) */
-	 else {__dubsin(-y,-yy,w); v[0]=-w[0]; v[1]=-w[1];
-	 }
-     }
-  else { /* y>= 3/4 * PI */
-    p=2.0*hp0.x-y;    /* p = PI- y */
-    yy=2.0*hp1.x-yy;
-    y=p+yy;
-    yy=(p-y)+yy;
-    __dubcos(y,yy,w);
-    v[0]=-w[0];
-    v[1]=-w[1];
-  }
+__docos (double x, double dx, double v[])
+{
+  double y, yy, p, w[2];
+  if (x > 0)
+    {
+      y = x; yy = dx;
+    }
+  else
+    {
+      y = -x; yy = -dx;
+    }
+  if (y < 0.5 * hp0.x)                                 /*  y< PI/4    */
+    {
+      __dubcos (y, yy, w); v[0] = w[0]; v[1] = w[1];
+    }
+  else if (y < 1.5 * hp0.x)                        /* y< 3/4 * PI */
+    {
+      p = hp0.x - y; /* p = PI/2 - y */
+      yy = hp1.x - yy;
+      y = p + yy;
+      yy = (p - y) + yy;
+      if (y > 0)
+	{
+	  __dubsin (y, yy, w); v[0] = w[0]; v[1] = w[1];
+	}
+      /* cos(x) = sin ( 90 -  x ) */
+      else
+	{
+	  __dubsin (-y, -yy, w); v[0] = -w[0]; v[1] = -w[1];
+	}
+    }
+  else   /* y>= 3/4 * PI */
+    {
+      p = 2.0 * hp0.x - y; /* p = PI- y */
+      yy = 2.0 * hp1.x - yy;
+      y = p + yy;
+      yy = (p - y) + yy;
+      __dubcos (y, yy, w);
+      v[0] = -w[0];
+      v[1] = -w[1];
+    }
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/e_acosh.c b/libc/sysdeps/ieee754/dbl-64/e_acosh.c
index b24a6f645..c1f3590f7 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_acosh.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_acosh.c
@@ -28,31 +28,42 @@
 #include <math_private.h>
 
 static const double
-one	= 1.0,
-ln2	= 6.93147180559945286227e-01;  /* 0x3FE62E42, 0xFEFA39EF */
+  one = 1.0,
+  ln2 = 6.93147180559945286227e-01;    /* 0x3FE62E42, 0xFEFA39EF */
 
 double
-__ieee754_acosh(double x)
+__ieee754_acosh (double x)
 {
-	double t;
-	int32_t hx;
-	u_int32_t lx;
-	EXTRACT_WORDS(hx,lx,x);
-	if(hx<0x3ff00000) {		/* x < 1 */
-	    return (x-x)/(x-x);
-	} else if(hx >=0x41b00000) {	/* x > 2**28 */
-	    if(hx >=0x7ff00000) {	/* x is inf of NaN */
-		return x+x;
-	    } else
-		return __ieee754_log(x)+ln2;	/* acosh(huge)=log(2x) */
-	} else if(((hx-0x3ff00000)|lx)==0) {
-	    return 0.0;			/* acosh(1) = 0 */
-	} else if (hx > 0x40000000) {	/* 2**28 > x > 2 */
-	    t=x*x;
-	    return __ieee754_log(2.0*x-one/(x+__ieee754_sqrt(t-one)));
-	} else {			/* 1<x<2 */
-	    t = x-one;
-	    return __log1p(t+__ieee754_sqrt(2.0*t+t*t));
+  double t;
+  int32_t hx;
+  u_int32_t lx;
+  EXTRACT_WORDS (hx, lx, x);
+  if (hx < 0x3ff00000)                  /* x < 1 */
+    {
+      return (x - x) / (x - x);
+    }
+  else if (hx >= 0x41b00000)            /* x > 2**28 */
+    {
+      if (hx >= 0x7ff00000)             /* x is inf of NaN */
+	{
+	  return x + x;
 	}
+      else
+	return __ieee754_log (x) + ln2;         /* acosh(huge)=log(2x) */
+    }
+  else if (((hx - 0x3ff00000) | lx) == 0)
+    {
+      return 0.0;                       /* acosh(1) = 0 */
+    }
+  else if (hx > 0x40000000)             /* 2**28 > x > 2 */
+    {
+      t = x * x;
+      return __ieee754_log (2.0 * x - one / (x + __ieee754_sqrt (t - one)));
+    }
+  else                                  /* 1<x<2 */
+    {
+      t = x - one;
+      return __log1p (t + __ieee754_sqrt (2.0 * t + t * t));
+    }
 }
 strong_alias (__ieee754_acosh, __acosh_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_atan2.c b/libc/sysdeps/ieee754/dbl-64/e_atan2.c
index 4ebe9c01f..e36305cda 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_atan2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -42,6 +42,7 @@
 #include "uatan.tbl"
 #include "atnat2.h"
 #include <math_private.h>
+#include <stap-probe.h>
 
 #ifndef SECTION
 # define SECTION
@@ -71,14 +72,14 @@ __ieee754_atan2 (double y, double x)
   int i, de, ux, dx, uy, dy;
   static const int pr[MM] = { 6, 8, 10, 20, 32 };
   double ax, ay, u, du, u9, ua, v, vv, dv, t1, t2, t3, t7, t8,
-    z, zz, cor, s1, ss1, s2, ss2;
+	 z, zz, cor, s1, ss1, s2, ss2;
 #ifndef DLA_FMS
   double t4, t5, t6;
 #endif
   number num;
 
-  static const int ep = 59768832,	/*  57*16**5   */
-    em = -59768832;		/* -57*16**5   */
+  static const int ep = 59768832,      /*  57*16**5   */
+		   em = -59768832;      /* -57*16**5   */
 
   /* x=NaN or y=NaN */
   num.d = x;
@@ -293,17 +294,17 @@ __ieee754_atan2 (double y, double x)
 	  if (i < 112)
 	    {
 	      if (i < 48)
-		u9 = u91.d;	/* u < 1/4	*/
+		u9 = u91.d;     /* u < 1/4	*/
 	      else
 		u9 = u92.d;
-	    }		/* 1/4 <= u < 1/2 */
+	    }           /* 1/4 <= u < 1/2 */
 	  else
 	    {
 	      if (i < 176)
-		u9 = u93.d;	/* 1/2 <= u < 3/4 */
+		u9 = u93.d;     /* 1/2 <= u < 3/4 */
 	      else
 		u9 = u94.d;
-	    }		/* 3/4 <= u <= 1  */
+	    }           /* 3/4 <= u <= 1  */
 	  if ((z = t1 + (zz - u9 * t1)) == t1 + (zz + u9 * t1))
 	    return signArctan2 (y, z);
 
@@ -311,9 +312,9 @@ __ieee754_atan2 (double y, double x)
 	  EADD (t1, du, v, vv);
 	  s1 = v * (hij[i][11].d
 		    + v * (hij[i][12].d
-			   +  v * (hij[i][13].d
-				   + v * (hij[i][14].d
-					  + v * hij[i][15].d))));
+			   + v * (hij[i][13].d
+				  + v * (hij[i][14].d
+					 + v * hij[i][15].d))));
 	  ADD2 (hij[i][9].d, hij[i][10].d, s1, 0, s2, ss2, t1, t2);
 	  MUL2 (v, vv, s2, ss2, s1, ss1, t1, t2, t3, t4, t5, t6, t7, t8);
 	  ADD2 (hij[i][7].d, hij[i][8].d, s1, ss1, s2, ss2, t1, t2);
@@ -597,7 +598,11 @@ atan2Mp (double x, double y, const int pr[])
       __mp_dbl (&mpz1, &z1, p);
       __mp_dbl (&mpz2, &z2, p);
       if (z1 == z2)
-	return z1;
+	{
+	  LIBC_PROBE (slowatan2, 4, &p, &x, &y, &z1);
+	  return z1;
+	}
     }
+  LIBC_PROBE (slowatan2_inexact, 4, &p, &x, &y, &z1);
   return z1;			/*if impossible to do exact computing */
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/e_cosh.c b/libc/sysdeps/ieee754/dbl-64/e_cosh.c
index 229d5a2fb..6caf943ed 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_cosh.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_cosh.c
@@ -34,49 +34,55 @@
 #include <math.h>
 #include <math_private.h>
 
-static const double one = 1.0, half=0.5, huge = 1.0e300;
+static const double one = 1.0, half = 0.5, huge = 1.0e300;
 
 double
 __ieee754_cosh (double x)
 {
-	double t,w;
-	int32_t ix;
-	u_int32_t lx;
+  double t, w;
+  int32_t ix;
+  u_int32_t lx;
 
-    /* High word of |x|. */
-	GET_HIGH_WORD(ix,x);
-	ix &= 0x7fffffff;
+  /* High word of |x|. */
+  GET_HIGH_WORD (ix, x);
+  ix &= 0x7fffffff;
 
-    /* |x| in [0,22] */
-	if (ix < 0x40360000) {
-	    /* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */
-		if(ix<0x3fd62e43) {
-		    t = __expm1(fabs(x));
-		    w = one+t;
-		    if (ix<0x3c800000) return w;	/* cosh(tiny) = 1 */
-		    return one+(t*t)/(w+w);
-		}
-
-	    /* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|)/2; */
-		t = __ieee754_exp(fabs(x));
-		return half*t+half/t;
+  /* |x| in [0,22] */
+  if (ix < 0x40360000)
+    {
+      /* |x| in [0,0.5*ln2], return 1+expm1(|x|)^2/(2*exp(|x|)) */
+      if (ix < 0x3fd62e43)
+	{
+	  t = __expm1 (fabs (x));
+	  w = one + t;
+	  if (ix < 0x3c800000)
+	    return w;                                   /* cosh(tiny) = 1 */
+	  return one + (t * t) / (w + w);
 	}
 
-    /* |x| in [22, log(maxdouble)] return half*exp(|x|) */
-	if (ix < 0x40862e42)  return half*__ieee754_exp(fabs(x));
+      /* |x| in [0.5*ln2,22], return (exp(|x|)+1/exp(|x|)/2; */
+      t = __ieee754_exp (fabs (x));
+      return half * t + half / t;
+    }
 
-    /* |x| in [log(maxdouble), overflowthresold] */
-	GET_LOW_WORD(lx,x);
-	if (ix<0x408633ce || ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
-	    w = __ieee754_exp(half*fabs(x));
-	    t = half*w;
-	    return t*w;
-	}
+  /* |x| in [22, log(maxdouble)] return half*exp(|x|) */
+  if (ix < 0x40862e42)
+    return half * __ieee754_exp (fabs (x));
+
+  /* |x| in [log(maxdouble), overflowthresold] */
+  GET_LOW_WORD (lx, x);
+  if (ix < 0x408633ce || ((ix == 0x408633ce) && (lx <= (u_int32_t) 0x8fb9f87d)))
+    {
+      w = __ieee754_exp (half * fabs (x));
+      t = half * w;
+      return t * w;
+    }
 
-    /* x is INF or NaN */
-	if(ix>=0x7ff00000) return x*x;
+  /* x is INF or NaN */
+  if (ix >= 0x7ff00000)
+    return x * x;
 
-    /* |x| > overflowthresold, cosh(x) overflow */
-	return huge*huge;
+  /* |x| > overflowthresold, cosh(x) overflow */
+  return huge * huge;
 }
 strong_alias (__ieee754_cosh, __cosh_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_exp.c b/libc/sysdeps/ieee754/dbl-64/e_exp.c
index 07cc4a91b..df3aa5efa 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_exp.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_exp.c
@@ -44,221 +44,299 @@
 # define SECTION
 #endif
 
-double __slowexp(double);
+double __slowexp (double);
 
-/***************************************************************************/
-/* An ultimate exp routine. Given an IEEE double machine number x          */
-/* it computes the correctly rounded (to nearest) value of e^x             */
-/***************************************************************************/
+/* An ultimate exp routine. Given an IEEE double machine number x it computes
+   the correctly rounded (to nearest) value of e^x.  */
 double
 SECTION
-__ieee754_exp(double x) {
+__ieee754_exp (double x)
+{
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
-  mynumber junk1, junk2, binexp  = {{0,0}};
-  int4 i,j,m,n,ex;
+  mynumber junk1, junk2, binexp = {{0, 0}};
+  int4 i, j, m, n, ex;
   double retval;
 
   SET_RESTORE_ROUND (FE_TONEAREST);
 
   junk1.x = x;
   m = junk1.i[HIGH_HALF];
-  n = m&hugeint;
-
-  if (n > smallint && n < bigint) {
-
-    y = x*log2e.x + three51.x;
-    bexp = y - three51.x;      /*  multiply the result by 2**bexp        */
-
-    junk1.x = y;
-
-    eps = bexp*ln_two2.x;      /* x = bexp*ln(2) + t - eps               */
-    t = x - bexp*ln_two1.x;
-
-    y = t + three33.x;
-    base = y - three33.x;      /* t rounded to a multiple of 2**-18      */
-    junk2.x = y;
-    del = (t - base) - eps;    /*  x = bexp*ln(2) + base + del           */
-    eps = del + del*del*(p3.x*del + p2.x);
-
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
-
-    i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-    j = (junk2.i[LOW_HALF]&511)<<1;
-
-    al = coar.x[i]*fine.x[j];
-    bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-
-    rem=(bet + bet*eps)+al*eps;
-    res = al + rem;
-    cor = (al - res) + rem;
-    if  (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
-    else { retval = __slowexp(x); goto ret; } /*if error is over bound */
-  }
+  n = m & hugeint;
+
+  if (n > smallint && n < bigint)
+    {
+      y = x * log2e.x + three51.x;
+      bexp = y - three51.x;	/*  multiply the result by 2**bexp        */
+
+      junk1.x = y;
+
+      eps = bexp * ln_two2.x;	/* x = bexp*ln(2) + t - eps               */
+      t = x - bexp * ln_two1.x;
+
+      y = t + three33.x;
+      base = y - three33.x;	/* t rounded to a multiple of 2**-18      */
+      junk2.x = y;
+      del = (t - base) - eps;	/*  x = bexp*ln(2) + base + del           */
+      eps = del + del * del * (p3.x * del + p2.x);
+
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
+
+      i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+      j = (junk2.i[LOW_HALF] & 511) << 1;
+
+      al = coar.x[i] * fine.x[j];
+      bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	     + coar.x[i + 1] * fine.x[j + 1]);
+
+      rem = (bet + bet * eps) + al * eps;
+      res = al + rem;
+      cor = (al - res) + rem;
+      if (res == (res + cor * err_0))
+	{
+	  retval = res * binexp.x;
+	  goto ret;
+	}
+      else
+	{
+	  retval = __slowexp (x);
+	  goto ret;
+	}			/*if error is over bound */
+    }
 
-  if (n <= smallint) { retval = 1.0; goto ret; }
+  if (n <= smallint)
+    {
+      retval = 1.0;
+      goto ret;
+    }
 
-  if (n >= badint) {
-    if (n > infint) { retval = x+x; goto ret; }               /* x is NaN */
-    if (n < infint) { retval = (x>0) ? (hhuge*hhuge) : (tiny*tiny); goto ret; }
-    /* x is finite,  cause either overflow or underflow  */
-    if (junk1.i[LOW_HALF] != 0) { retval = x+x; goto ret; } /*  x is NaN  */
-    retval = (x>0)?inf.x:zero;             /* |x| = inf;  return either inf or 0 */
-    goto ret;
-  }
+  if (n >= badint)
+    {
+      if (n > infint)
+	{
+	  retval = x + x;
+	  goto ret;
+	}			/* x is NaN */
+      if (n < infint)
+	{
+	  retval = (x > 0) ? (hhuge * hhuge) : (tiny * tiny);
+	  goto ret;
+	}
+      /* x is finite,  cause either overflow or underflow  */
+      if (junk1.i[LOW_HALF] != 0)
+	{
+	  retval = x + x;
+	  goto ret;
+	}			/*  x is NaN  */
+      retval = (x > 0) ? inf.x : zero;	/* |x| = inf;  return either inf or 0 */
+      goto ret;
+    }
 
-  y = x*log2e.x + three51.x;
+  y = x * log2e.x + three51.x;
   bexp = y - three51.x;
   junk1.x = y;
-  eps = bexp*ln_two2.x;
-  t = x - bexp*ln_two1.x;
+  eps = bexp * ln_two2.x;
+  t = x - bexp * ln_two1.x;
   y = t + three33.x;
   base = y - three33.x;
   junk2.x = y;
   del = (t - base) - eps;
-  eps = del + del*del*(p3.x*del + p2.x);
-  i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-  j = (junk2.i[LOW_HALF]&511)<<1;
-  al = coar.x[i]*fine.x[j];
-  bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-  rem=(bet + bet*eps)+al*eps;
+  eps = del + del * del * (p3.x * del + p2.x);
+  i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+  j = (junk2.i[LOW_HALF] & 511) << 1;
+  al = coar.x[i] * fine.x[j];
+  bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	 + coar.x[i + 1] * fine.x[j + 1]);
+  rem = (bet + bet * eps) + al * eps;
   res = al + rem;
   cor = (al - res) + rem;
-  if (m>>31) {
-    ex=junk1.i[LOW_HALF];
-    if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
-    if (ex >=-1022) {
-      binexp.i[HIGH_HALF] = (1023+ex)<<20;
-      if  (res == (res+cor*err_0)) { retval = res*binexp.x; goto ret; }
-      else { retval = __slowexp(x); goto ret; } /*if error is over bound */
+  if (m >> 31)
+    {
+      ex = junk1.i[LOW_HALF];
+      if (res < 1.0)
+	{
+	  res += res;
+	  cor += cor;
+	  ex -= 1;
+	}
+      if (ex >= -1022)
+	{
+	  binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+	  if (res == (res + cor * err_0))
+	    {
+	      retval = res * binexp.x;
+	      goto ret;
+	    }
+	  else
+	    {
+	      retval = __slowexp (x);
+	      goto ret;
+	    }			/*if error is over bound */
+	}
+      ex = -(1022 + ex);
+      binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+      res *= binexp.x;
+      cor *= binexp.x;
+      eps = 1.0000000001 + err_0 * binexp.x;
+      t = 1.0 + res;
+      y = ((1.0 - t) + res) + cor;
+      res = t + y;
+      cor = (t - res) + y;
+      if (res == (res + eps * cor))
+	{
+	  binexp.i[HIGH_HALF] = 0x00100000;
+	  retval = (res - 1.0) * binexp.x;
+	  goto ret;
+	}
+      else
+	{
+	  retval = __slowexp (x);
+	  goto ret;
+	}			/*   if error is over bound    */
     }
-    ex = -(1022+ex);
-    binexp.i[HIGH_HALF] = (1023-ex)<<20;
-    res*=binexp.x;
-    cor*=binexp.x;
-    eps=1.0000000001+err_0*binexp.x;
-    t=1.0+res;
-    y = ((1.0-t)+res)+cor;
-    res=t+y;
-    cor = (t-res)+y;
-    if (res == (res + eps*cor))
-    { binexp.i[HIGH_HALF] = 0x00100000;
-      retval = (res-1.0)*binexp.x;
-      goto ret;
+  else
+    {
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+      if (res == (res + cor * err_0))
+	{
+	  retval = res * binexp.x * t256.x;
+	  goto ret;
+	}
+      else
+	{
+	  retval = __slowexp (x);
+	  goto ret;
+	}
     }
-    else { retval = __slowexp(x); goto ret; } /*   if error is over bound    */
-  }
-  else {
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
-    if (res == (res+cor*err_0)) { retval = res*binexp.x*t256.x; goto ret; }
-    else { retval = __slowexp(x); goto ret; }
-  }
- ret:
+ret:
   return retval;
 }
 #ifndef __ieee754_exp
 strong_alias (__ieee754_exp, __exp_finite)
 #endif
 
-/************************************************************************/
-/* Compute e^(x+xx)(Double-Length number) .The routine also receive     */
-/* bound of error of previous calculation .If after computing exp       */
-/* error bigger than allows routine return non positive number          */
-/*else return   e^(x + xx)   (always positive )                         */
-/************************************************************************/
-
+/* Compute e^(x+xx).  The routine also receives bound of error of previous
+   calculation.  If after computing exp the error exceeds the allowed bounds,
+   the routine returns a non-positive number.  Otherwise it returns the
+   computed result, which is always positive.  */
 double
 SECTION
-__exp1(double x, double xx, double error) {
+__exp1 (double x, double xx, double error)
+{
   double bexp, t, eps, del, base, y, al, bet, res, rem, cor;
-  mynumber junk1, junk2, binexp  = {{0,0}};
-  int4 i,j,m,n,ex;
+  mynumber junk1, junk2, binexp = {{0, 0}};
+  int4 i, j, m, n, ex;
 
   junk1.x = x;
   m = junk1.i[HIGH_HALF];
-  n = m&hugeint;                 /* no sign */
-
-  if (n > smallint && n < bigint) {
-    y = x*log2e.x + three51.x;
-    bexp = y - three51.x;      /*  multiply the result by 2**bexp        */
+  n = m & hugeint;		/* no sign */
 
-    junk1.x = y;
+  if (n > smallint && n < bigint)
+    {
+      y = x * log2e.x + three51.x;
+      bexp = y - three51.x;	/*  multiply the result by 2**bexp        */
 
-    eps = bexp*ln_two2.x;      /* x = bexp*ln(2) + t - eps               */
-    t = x - bexp*ln_two1.x;
+      junk1.x = y;
 
-    y = t + three33.x;
-    base = y - three33.x;      /* t rounded to a multiple of 2**-18      */
-    junk2.x = y;
-    del = (t - base) + (xx-eps);    /*  x = bexp*ln(2) + base + del      */
-    eps = del + del*del*(p3.x*del + p2.x);
+      eps = bexp * ln_two2.x;	/* x = bexp*ln(2) + t - eps               */
+      t = x - bexp * ln_two1.x;
 
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+1023)<<20;
+      y = t + three33.x;
+      base = y - three33.x;	/* t rounded to a multiple of 2**-18      */
+      junk2.x = y;
+      del = (t - base) + (xx - eps);	/*  x = bexp*ln(2) + base + del      */
+      eps = del + del * del * (p3.x * del + p2.x);
 
-    i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-    j = (junk2.i[LOW_HALF]&511)<<1;
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 1023) << 20;
 
-    al = coar.x[i]*fine.x[j];
-    bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
+      i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+      j = (junk2.i[LOW_HALF] & 511) << 1;
 
-    rem=(bet + bet*eps)+al*eps;
-    res = al + rem;
-    cor = (al - res) + rem;
-    if  (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
-    else return -10.0;
-  }
+      al = coar.x[i] * fine.x[j];
+      bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	     + coar.x[i + 1] * fine.x[j + 1]);
 
-  if (n <= smallint) return 1.0; /*  if x->0 e^x=1 */
+      rem = (bet + bet * eps) + al * eps;
+      res = al + rem;
+      cor = (al - res) + rem;
+      if (res == (res + cor * (1.0 + error + err_1)))
+	return res * binexp.x;
+      else
+	return -10.0;
+    }
 
-  if (n >= badint) {
-    if (n > infint) return(zero/zero);    /* x is NaN,  return invalid */
-    if (n < infint) return ( (x>0) ? (hhuge*hhuge) : (tiny*tiny) );
-    /* x is finite,  cause either overflow or underflow  */
-    if (junk1.i[LOW_HALF] != 0)  return (zero/zero);        /*  x is NaN  */
-    return ((x>0)?inf.x:zero );   /* |x| = inf;  return either inf or 0 */
-  }
+  if (n <= smallint)
+    return 1.0;			/*  if x->0 e^x=1 */
+
+  if (n >= badint)
+    {
+      if (n > infint)
+	return (zero / zero);	/* x is NaN,  return invalid */
+      if (n < infint)
+	return ((x > 0) ? (hhuge * hhuge) : (tiny * tiny));
+      /* x is finite,  cause either overflow or underflow  */
+      if (junk1.i[LOW_HALF] != 0)
+	return (zero / zero);	/*  x is NaN  */
+      return ((x > 0) ? inf.x : zero);	/* |x| = inf;  return either inf or 0 */
+    }
 
-  y = x*log2e.x + three51.x;
+  y = x * log2e.x + three51.x;
   bexp = y - three51.x;
   junk1.x = y;
-  eps = bexp*ln_two2.x;
-  t = x - bexp*ln_two1.x;
+  eps = bexp * ln_two2.x;
+  t = x - bexp * ln_two1.x;
   y = t + three33.x;
   base = y - three33.x;
   junk2.x = y;
-  del = (t - base) + (xx-eps);
-  eps = del + del*del*(p3.x*del + p2.x);
-  i = ((junk2.i[LOW_HALF]>>8)&0xfffffffe)+356;
-  j = (junk2.i[LOW_HALF]&511)<<1;
-  al = coar.x[i]*fine.x[j];
-  bet =(coar.x[i]*fine.x[j+1] + coar.x[i+1]*fine.x[j]) + coar.x[i+1]*fine.x[j+1];
-  rem=(bet + bet*eps)+al*eps;
+  del = (t - base) + (xx - eps);
+  eps = del + del * del * (p3.x * del + p2.x);
+  i = ((junk2.i[LOW_HALF] >> 8) & 0xfffffffe) + 356;
+  j = (junk2.i[LOW_HALF] & 511) << 1;
+  al = coar.x[i] * fine.x[j];
+  bet = ((coar.x[i] * fine.x[j + 1] + coar.x[i + 1] * fine.x[j])
+	 + coar.x[i + 1] * fine.x[j + 1]);
+  rem = (bet + bet * eps) + al * eps;
   res = al + rem;
   cor = (al - res) + rem;
-  if (m>>31) {
-    ex=junk1.i[LOW_HALF];
-    if (res < 1.0) {res+=res; cor+=cor; ex-=1;}
-    if (ex >=-1022) {
-      binexp.i[HIGH_HALF] = (1023+ex)<<20;
-      if  (res == (res+cor*(1.0+error+err_1))) return res*binexp.x;
-      else return -10.0;
+  if (m >> 31)
+    {
+      ex = junk1.i[LOW_HALF];
+      if (res < 1.0)
+	{
+	  res += res;
+	  cor += cor;
+	  ex -= 1;
+	}
+      if (ex >= -1022)
+	{
+	  binexp.i[HIGH_HALF] = (1023 + ex) << 20;
+	  if (res == (res + cor * (1.0 + error + err_1)))
+	    return res * binexp.x;
+	  else
+	    return -10.0;
+	}
+      ex = -(1022 + ex);
+      binexp.i[HIGH_HALF] = (1023 - ex) << 20;
+      res *= binexp.x;
+      cor *= binexp.x;
+      eps = 1.00000000001 + (error + err_1) * binexp.x;
+      t = 1.0 + res;
+      y = ((1.0 - t) + res) + cor;
+      res = t + y;
+      cor = (t - res) + y;
+      if (res == (res + eps * cor))
+	{
+	  binexp.i[HIGH_HALF] = 0x00100000;
+	  return (res - 1.0) * binexp.x;
+	}
+      else
+	return -10.0;
+    }
+  else
+    {
+      binexp.i[HIGH_HALF] = (junk1.i[LOW_HALF] + 767) << 20;
+      if (res == (res + cor * (1.0 + error + err_1)))
+	return res * binexp.x * t256.x;
+      else
+	return -10.0;
     }
-    ex = -(1022+ex);
-    binexp.i[HIGH_HALF] = (1023-ex)<<20;
-    res*=binexp.x;
-    cor*=binexp.x;
-    eps=1.00000000001+(error+err_1)*binexp.x;
-    t=1.0+res;
-    y = ((1.0-t)+res)+cor;
-    res=t+y;
-    cor = (t-res)+y;
-    if (res == (res + eps*cor))
-      {binexp.i[HIGH_HALF] = 0x00100000; return (res-1.0)*binexp.x;}
-    else return -10.0;
-  }
-  else {
-    binexp.i[HIGH_HALF] =(junk1.i[LOW_HALF]+767)<<20;
-    if  (res == (res+cor*(1.0+error+err_1)))
-      return res*binexp.x*t256.x;
-    else return -10.0;
-  }
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/e_exp2.c b/libc/sysdeps/ieee754/dbl-64/e_exp2.c
index 96ec735e3..e1ba940e6 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_exp2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_exp2.c
@@ -46,7 +46,7 @@ __ieee754_exp2 (double x)
   if (__builtin_expect (isless (x, himark), 1))
     {
       /* Exceptional cases:  */
-      if (__builtin_expect (! isgreaterequal (x, lomark), 0))
+      if (__builtin_expect (!isgreaterequal (x, lomark), 0))
 	{
 	  if (__isinf (x))
 	    /* e^-inf == 0, with no error.  */
@@ -93,7 +93,7 @@ __ieee754_exp2 (double x)
 	/* 3. Compute ex2 = 2^(t/512+e+ex).  */
 	ex2_u.d = exp2_accuratetable[tval & 511];
 	tval >>= 9;
-	unsafe = abs(tval) >= -DBL_MIN_EXP - 1;
+	unsafe = abs (tval) >= -DBL_MIN_EXP - 1;
 	ex2_u.ieee.exponent += tval >> unsafe;
 	scale_u.d = 1.0;
 	scale_u.ieee.exponent += tval - (tval >> unsafe);
@@ -106,7 +106,7 @@ __ieee754_exp2 (double x)
 		 * x + .055504110254308625)
 		* x + .240226506959100583)
 	       * x + .69314718055994495) * ex2_u.d;
-        math_opt_barrier (x22);
+	math_opt_barrier (x22);
       }
 
       /* 5. Return (2^x2-1) * 2^(t/512+e+ex) + 2^(t/512+e+ex).  */
@@ -119,6 +119,6 @@ __ieee754_exp2 (double x)
     }
   else
     /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
-    return TWO1023*x;
+    return TWO1023 * x;
 }
 strong_alias (__ieee754_exp2, __exp2_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_fmod.c b/libc/sysdeps/ieee754/dbl-64/e_fmod.c
index b8548fae4..c83c2aedb 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_fmod.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_fmod.c
@@ -18,111 +18,156 @@
 #include <math.h>
 #include <math_private.h>
 
-static const double one = 1.0, Zero[] = {0.0, -0.0,};
+static const double one = 1.0, Zero[] = { 0.0, -0.0, };
 
 double
 __ieee754_fmod (double x, double y)
 {
-	int32_t n,hx,hy,hz,ix,iy,sx,i;
-	u_int32_t lx,ly,lz;
+  int32_t n, hx, hy, hz, ix, iy, sx, i;
+  u_int32_t lx, ly, lz;
 
-	EXTRACT_WORDS(hx,lx,x);
-	EXTRACT_WORDS(hy,ly,y);
-	sx = hx&0x80000000;		/* sign of x */
-	hx ^=sx;		/* |x| */
-	hy &= 0x7fffffff;	/* |y| */
+  EXTRACT_WORDS (hx, lx, x);
+  EXTRACT_WORDS (hy, ly, y);
+  sx = hx & 0x80000000;                 /* sign of x */
+  hx ^= sx;                     /* |x| */
+  hy &= 0x7fffffff;             /* |y| */
 
-    /* purge off exception values */
-	if((hy|ly)==0||(hx>=0x7ff00000)||	/* y=0,or x not finite */
-	  ((hy|((ly|-ly)>>31))>0x7ff00000))	/* or y is NaN */
-	    return (x*y)/(x*y);
-	if(hx<=hy) {
-	    if((hx<hy)||(lx<ly)) return x;	/* |x|<|y| return x */
-	    if(lx==ly)
-		return Zero[(u_int32_t)sx>>31];	/* |x|=|y| return x*0*/
-	}
+  /* purge off exception values */
+  if ((hy | ly) == 0 || (hx >= 0x7ff00000) ||   /* y=0,or x not finite */
+      ((hy | ((ly | -ly) >> 31)) > 0x7ff00000)) /* or y is NaN */
+    return (x * y) / (x * y);
+  if (hx <= hy)
+    {
+      if ((hx < hy) || (lx < ly))
+	return x;                               /* |x|<|y| return x */
+      if (lx == ly)
+	return Zero[(u_int32_t) sx >> 31];      /* |x|=|y| return x*0*/
+    }
 
-    /* determine ix = ilogb(x) */
-	if(__builtin_expect(hx<0x00100000, 0)) {	/* subnormal x */
-	    if(hx==0) {
-		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
-	    } else {
-		for (ix = -1022,i=(hx<<11); i>0; i<<=1) ix -=1;
-	    }
-	} else ix = (hx>>20)-1023;
+  /* determine ix = ilogb(x) */
+  if (__builtin_expect (hx < 0x00100000, 0))            /* subnormal x */
+    {
+      if (hx == 0)
+	{
+	  for (ix = -1043, i = lx; i > 0; i <<= 1)
+	    ix -= 1;
+	}
+      else
+	{
+	  for (ix = -1022, i = (hx << 11); i > 0; i <<= 1)
+	    ix -= 1;
+	}
+    }
+  else
+    ix = (hx >> 20) - 1023;
 
-    /* determine iy = ilogb(y) */
-	if(__builtin_expect(hy<0x00100000, 0)) {	/* subnormal y */
-	    if(hy==0) {
-		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
-	    } else {
-		for (iy = -1022,i=(hy<<11); i>0; i<<=1) iy -=1;
-	    }
-	} else iy = (hy>>20)-1023;
+  /* determine iy = ilogb(y) */
+  if (__builtin_expect (hy < 0x00100000, 0))            /* subnormal y */
+    {
+      if (hy == 0)
+	{
+	  for (iy = -1043, i = ly; i > 0; i <<= 1)
+	    iy -= 1;
+	}
+      else
+	{
+	  for (iy = -1022, i = (hy << 11); i > 0; i <<= 1)
+	    iy -= 1;
+	}
+    }
+  else
+    iy = (hy >> 20) - 1023;
 
-    /* set up {hx,lx}, {hy,ly} and align y to x */
-	if(__builtin_expect(ix >= -1022, 1))
-	    hx = 0x00100000|(0x000fffff&hx);
-	else {		/* subnormal x, shift x to normal */
-	    n = -1022-ix;
-	    if(n<=31) {
-		hx = (hx<<n)|(lx>>(32-n));
-		lx <<= n;
-	    } else {
-		hx = lx<<(n-32);
-		lx = 0;
-	    }
+  /* set up {hx,lx}, {hy,ly} and align y to x */
+  if (__builtin_expect (ix >= -1022, 1))
+    hx = 0x00100000 | (0x000fffff & hx);
+  else                  /* subnormal x, shift x to normal */
+    {
+      n = -1022 - ix;
+      if (n <= 31)
+	{
+	  hx = (hx << n) | (lx >> (32 - n));
+	  lx <<= n;
+	}
+      else
+	{
+	  hx = lx << (n - 32);
+	  lx = 0;
+	}
+    }
+  if (__builtin_expect (iy >= -1022, 1))
+    hy = 0x00100000 | (0x000fffff & hy);
+  else                  /* subnormal y, shift y to normal */
+    {
+      n = -1022 - iy;
+      if (n <= 31)
+	{
+	  hy = (hy << n) | (ly >> (32 - n));
+	  ly <<= n;
 	}
-	if(__builtin_expect(iy >= -1022, 1))
-	    hy = 0x00100000|(0x000fffff&hy);
-	else {		/* subnormal y, shift y to normal */
-	    n = -1022-iy;
-	    if(n<=31) {
-		hy = (hy<<n)|(ly>>(32-n));
-		ly <<= n;
-	    } else {
-		hy = ly<<(n-32);
-		ly = 0;
-	    }
+      else
+	{
+	  hy = ly << (n - 32);
+	  ly = 0;
 	}
+    }
 
-    /* fix point fmod */
-	n = ix - iy;
-	while(n--) {
-	    hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
-	    if(hz<0){hx = hx+hx+(lx>>31); lx = lx+lx;}
-	    else {
-		if((hz|lz)==0)		/* return sign(x)*0 */
-		    return Zero[(u_int32_t)sx>>31];
-		hx = hz+hz+(lz>>31); lx = lz+lz;
-	    }
+  /* fix point fmod */
+  n = ix - iy;
+  while (n--)
+    {
+      hz = hx - hy; lz = lx - ly; if (lx < ly)
+	hz -= 1;
+      if (hz < 0)
+	{
+	  hx = hx + hx + (lx >> 31); lx = lx + lx;
 	}
-	hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
-	if(hz>=0) {hx=hz;lx=lz;}
+      else
+	{
+	  if ((hz | lz) == 0)           /* return sign(x)*0 */
+	    return Zero[(u_int32_t) sx >> 31];
+	  hx = hz + hz + (lz >> 31); lx = lz + lz;
+	}
+    }
+  hz = hx - hy; lz = lx - ly; if (lx < ly)
+    hz -= 1;
+  if (hz >= 0)
+    {
+      hx = hz; lx = lz;
+    }
 
-    /* convert back to floating value and restore the sign */
-	if((hx|lx)==0)			/* return sign(x)*0 */
-	    return Zero[(u_int32_t)sx>>31];
-	while(hx<0x00100000) {		/* normalize x */
-	    hx = hx+hx+(lx>>31); lx = lx+lx;
-	    iy -= 1;
+  /* convert back to floating value and restore the sign */
+  if ((hx | lx) == 0)                   /* return sign(x)*0 */
+    return Zero[(u_int32_t) sx >> 31];
+  while (hx < 0x00100000)               /* normalize x */
+    {
+      hx = hx + hx + (lx >> 31); lx = lx + lx;
+      iy -= 1;
+    }
+  if (__builtin_expect (iy >= -1022, 1))        /* normalize output */
+    {
+      hx = ((hx - 0x00100000) | ((iy + 1023) << 20));
+      INSERT_WORDS (x, hx | sx, lx);
+    }
+  else                          /* subnormal output */
+    {
+      n = -1022 - iy;
+      if (n <= 20)
+	{
+	  lx = (lx >> n) | ((u_int32_t) hx << (32 - n));
+	  hx >>= n;
+	}
+      else if (n <= 31)
+	{
+	  lx = (hx << (32 - n)) | (lx >> n); hx = sx;
 	}
-	if(__builtin_expect(iy>= -1022, 1)) {	/* normalize output */
-	    hx = ((hx-0x00100000)|((iy+1023)<<20));
-	    INSERT_WORDS(x,hx|sx,lx);
-	} else {		/* subnormal output */
-	    n = -1022 - iy;
-	    if(n<=20) {
-		lx = (lx>>n)|((u_int32_t)hx<<(32-n));
-		hx >>= n;
-	    } else if (n<=31) {
-		lx = (hx<<(32-n))|(lx>>n); hx = sx;
-	    } else {
-		lx = hx>>(n-32); hx = sx;
-	    }
-	    INSERT_WORDS(x,hx|sx,lx);
-	    x *= one;		/* create necessary signal */
+      else
+	{
+	  lx = hx >> (n - 32); hx = sx;
 	}
-	return x;		/* exact output */
+      INSERT_WORDS (x, hx | sx, lx);
+      x *= one;                 /* create necessary signal */
+    }
+  return x;                     /* exact output */
 }
 strong_alias (__ieee754_fmod, __fmod_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c b/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c
index 5b17f7b5a..13e389d7c 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_gamma_r.c
@@ -135,7 +135,7 @@ __ieee754_gamma_r (double x, int *signgamp)
       *signgamp = 0;
       return (x - x) / (x - x);
     }
-  if (__builtin_expect ((unsigned int) hx == 0xfff00000 && lx==0, 0))
+  if (__builtin_expect ((unsigned int) hx == 0xfff00000 && lx == 0, 0))
     {
       /* x == -Inf.  According to ISO this is NaN.  */
       *signgamp = 0;
diff --git a/libc/sysdeps/ieee754/dbl-64/e_hypot.c b/libc/sysdeps/ieee754/dbl-64/e_hypot.c
index 2dd681cf1..9f4321efe 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_hypot.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_hypot.c
@@ -46,76 +46,101 @@
 #include <math_private.h>
 
 double
-__ieee754_hypot(double x, double y)
+__ieee754_hypot (double x, double y)
 {
-	double a,b,t1,t2,y1,y2,w;
-	int32_t j,k,ha,hb;
+  double a, b, t1, t2, y1, y2, w;
+  int32_t j, k, ha, hb;
 
-	GET_HIGH_WORD(ha,x);
-	ha &= 0x7fffffff;
-	GET_HIGH_WORD(hb,y);
-	hb &= 0x7fffffff;
-	if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
-	SET_HIGH_WORD(a,ha);	/* a <- |a| */
-	SET_HIGH_WORD(b,hb);	/* b <- |b| */
-	if((ha-hb)>0x3c00000) {return a+b;} /* x/y > 2**60 */
-	k=0;
-	if(__builtin_expect(ha > 0x5f300000, 0)) {	/* a>2**500 */
-	   if(ha >= 0x7ff00000) {	/* Inf or NaN */
-	       u_int32_t low;
-	       w = a+b;			/* for sNaN */
-	       GET_LOW_WORD(low,a);
-	       if(((ha&0xfffff)|low)==0) w = a;
-	       GET_LOW_WORD(low,b);
-	       if(((hb^0x7ff00000)|low)==0) w = b;
-	       return w;
-	   }
-	   /* scale a and b by 2**-600 */
-	   ha -= 0x25800000; hb -= 0x25800000;	k += 600;
-	   SET_HIGH_WORD(a,ha);
-	   SET_HIGH_WORD(b,hb);
+  GET_HIGH_WORD (ha, x);
+  ha &= 0x7fffffff;
+  GET_HIGH_WORD (hb, y);
+  hb &= 0x7fffffff;
+  if (hb > ha)
+    {
+      a = y; b = x; j = ha; ha = hb; hb = j;
+    }
+  else
+    {
+      a = x; b = y;
+    }
+  SET_HIGH_WORD (a, ha);        /* a <- |a| */
+  SET_HIGH_WORD (b, hb);        /* b <- |b| */
+  if ((ha - hb) > 0x3c00000)
+    {
+      return a + b;
+    }                                       /* x/y > 2**60 */
+  k = 0;
+  if (__builtin_expect (ha > 0x5f300000, 0))            /* a>2**500 */
+    {
+      if (ha >= 0x7ff00000)             /* Inf or NaN */
+	{
+	  u_int32_t low;
+	  w = a + b;                    /* for sNaN */
+	  GET_LOW_WORD (low, a);
+	  if (((ha & 0xfffff) | low) == 0)
+	    w = a;
+	  GET_LOW_WORD (low, b);
+	  if (((hb ^ 0x7ff00000) | low) == 0)
+	    w = b;
+	  return w;
 	}
-	if(__builtin_expect(hb < 0x20b00000, 0)) {	/* b < 2**-500 */
-	    if(hb <= 0x000fffff) {	/* subnormal b or 0 */
-		u_int32_t low;
-		GET_LOW_WORD(low,b);
-		if((hb|low)==0) return a;
-		t1=0;
-		SET_HIGH_WORD(t1,0x7fd00000);	/* t1=2^1022 */
-		b *= t1;
-		a *= t1;
-		k -= 1022;
-	    } else {		/* scale a and b by 2^600 */
-		ha += 0x25800000;	/* a *= 2^600 */
-		hb += 0x25800000;	/* b *= 2^600 */
-		k -= 600;
-		SET_HIGH_WORD(a,ha);
-		SET_HIGH_WORD(b,hb);
-	    }
+      /* scale a and b by 2**-600 */
+      ha -= 0x25800000; hb -= 0x25800000;  k += 600;
+      SET_HIGH_WORD (a, ha);
+      SET_HIGH_WORD (b, hb);
+    }
+  if (__builtin_expect (hb < 0x20b00000, 0))            /* b < 2**-500 */
+    {
+      if (hb <= 0x000fffff)             /* subnormal b or 0 */
+	{
+	  u_int32_t low;
+	  GET_LOW_WORD (low, b);
+	  if ((hb | low) == 0)
+	    return a;
+	  t1 = 0;
+	  SET_HIGH_WORD (t1, 0x7fd00000);       /* t1=2^1022 */
+	  b *= t1;
+	  a *= t1;
+	  k -= 1022;
 	}
-    /* medium size a and b */
-	w = a-b;
-	if (w>b) {
-	    t1 = 0;
-	    SET_HIGH_WORD(t1,ha);
-	    t2 = a-t1;
-	    w  = __ieee754_sqrt(t1*t1-(b*(-b)-t2*(a+t1)));
-	} else {
-	    a  = a+a;
-	    y1 = 0;
-	    SET_HIGH_WORD(y1,hb);
-	    y2 = b - y1;
-	    t1 = 0;
-	    SET_HIGH_WORD(t1,ha+0x00100000);
-	    t2 = a - t1;
-	    w  = __ieee754_sqrt(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+      else                      /* scale a and b by 2^600 */
+	{
+	  ha += 0x25800000;             /* a *= 2^600 */
+	  hb += 0x25800000;             /* b *= 2^600 */
+	  k -= 600;
+	  SET_HIGH_WORD (a, ha);
+	  SET_HIGH_WORD (b, hb);
 	}
-	if(k!=0) {
-	    u_int32_t high;
-	    t1 = 1.0;
-	    GET_HIGH_WORD(high,t1);
-	    SET_HIGH_WORD(t1,high+(k<<20));
-	    return t1*w;
-	} else return w;
+    }
+  /* medium size a and b */
+  w = a - b;
+  if (w > b)
+    {
+      t1 = 0;
+      SET_HIGH_WORD (t1, ha);
+      t2 = a - t1;
+      w = __ieee754_sqrt (t1 * t1 - (b * (-b) - t2 * (a + t1)));
+    }
+  else
+    {
+      a = a + a;
+      y1 = 0;
+      SET_HIGH_WORD (y1, hb);
+      y2 = b - y1;
+      t1 = 0;
+      SET_HIGH_WORD (t1, ha + 0x00100000);
+      t2 = a - t1;
+      w = __ieee754_sqrt (t1 * y1 - (w * (-w) - (t1 * y2 + t2 * b)));
+    }
+  if (k != 0)
+    {
+      u_int32_t high;
+      t1 = 1.0;
+      GET_HIGH_WORD (high, t1);
+      SET_HIGH_WORD (t1, high + (k << 20));
+      return t1 * w;
+    }
+  else
+    return w;
 }
 strong_alias (__ieee754_hypot, __hypot_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_ilogb.c b/libc/sysdeps/ieee754/dbl-64/e_ilogb.c
index 0452a71fb..1e338a59c 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_ilogb.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_ilogb.c
@@ -25,30 +25,39 @@ static char rcsid[] = "$NetBSD: s_ilogb.c,v 1.9 1995/05/10 20:47:28 jtc Exp $";
 #include <math.h>
 #include <math_private.h>
 
-int __ieee754_ilogb(double x)
+int
+__ieee754_ilogb (double x)
 {
-	int32_t hx,lx,ix;
+  int32_t hx, lx, ix;
 
-	GET_HIGH_WORD(hx,x);
-	hx &= 0x7fffffff;
-	if(hx<0x00100000) {
-	    GET_LOW_WORD(lx,x);
-	    if((hx|lx)==0)
-		return FP_ILOGB0;	/* ilogb(0) = FP_ILOGB0 */
-	    else			/* subnormal x */
-		if(hx==0) {
-		    for (ix = -1043; lx>0; lx<<=1) ix -=1;
-		} else {
-		    for (ix = -1022,hx<<=11; hx>0; hx<<=1) ix -=1;
-		}
-	    return ix;
+  GET_HIGH_WORD (hx, x);
+  hx &= 0x7fffffff;
+  if (hx < 0x00100000)
+    {
+      GET_LOW_WORD (lx, x);
+      if ((hx | lx) == 0)
+	return FP_ILOGB0;               /* ilogb(0) = FP_ILOGB0 */
+      else                              /* subnormal x */
+      if (hx == 0)
+	{
+	  for (ix = -1043; lx > 0; lx <<= 1)
+	    ix -= 1;
 	}
-	else if (hx<0x7ff00000) return (hx>>20)-1023;
-	else if (FP_ILOGBNAN != INT_MAX) {
-	    /* ISO C99 requires ilogb(+-Inf) == INT_MAX.  */
-	    GET_LOW_WORD(lx,x);
-	    if(((hx^0x7ff00000)|lx) == 0)
-		return INT_MAX;
+      else
+	{
+	  for (ix = -1022, hx <<= 11; hx > 0; hx <<= 1)
+	    ix -= 1;
 	}
-	return FP_ILOGBNAN;
+      return ix;
+    }
+  else if (hx < 0x7ff00000)
+    return (hx >> 20) - 1023;
+  else if (FP_ILOGBNAN != INT_MAX)
+    {
+      /* ISO C99 requires ilogb(+-Inf) == INT_MAX.  */
+      GET_LOW_WORD (lx, x);
+      if (((hx ^ 0x7ff00000) | lx) == 0)
+	return INT_MAX;
+    }
+  return FP_ILOGBNAN;
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/e_j0.c b/libc/sysdeps/ieee754/dbl-64/e_j0.c
index d641a0914..d165e8092 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_j0.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_j0.c
@@ -11,7 +11,7 @@
  */
 /* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
    for performance improvement on pipelined processors.
-*/
+ */
 
 /* __ieee754_j0(x), __ieee754_y0(x)
  * Bessel function of the first and second kinds of order zero.
@@ -61,154 +61,166 @@
 #include <math.h>
 #include <math_private.h>
 
-static double pzero(double), qzero(double);
+static double pzero (double), qzero (double);
 
 static const double
-huge	= 1e300,
-one	= 1.0,
-invsqrtpi=  5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
-tpi      =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
-		/* R0/S0 on [0, 2.00] */
-R[]  =  {0.0, 0.0, 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
- -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
-  1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
- -4.61832688532103189199e-09}, /* 0xBE33D5E7, 0x73D63FCE */
-S[]  =  {0.0, 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
-  1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
-  5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
-  1.16614003333790000205e-09}; /* 0x3E1408BC, 0xF4745D8F */
+  huge = 1e300,
+  one = 1.0,
+  invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
+  tpi = 6.36619772367581382433e-01,     /* 0x3FE45F30, 0x6DC9C883 */
+/* R0/S0 on [0, 2.00] */
+  R[] = { 0.0, 0.0, 1.56249999999999947958e-02, /* 0x3F8FFFFF, 0xFFFFFFFD */
+	  -1.89979294238854721751e-04, /* 0xBF28E6A5, 0xB61AC6E9 */
+	  1.82954049532700665670e-06, /* 0x3EBEB1D1, 0x0C503919 */
+	  -4.61832688532103189199e-09 }, /* 0xBE33D5E7, 0x73D63FCE */
+  S[] = { 0.0, 1.56191029464890010492e-02, /* 0x3F8FFCE8, 0x82C8C2A4 */
+	  1.16926784663337450260e-04, /* 0x3F1EA6D2, 0xDD57DBF4 */
+	  5.13546550207318111446e-07, /* 0x3EA13B54, 0xCE84D5A9 */
+	  1.16614003333790000205e-09 }; /* 0x3E1408BC, 0xF4745D8F */
 
 static const double zero = 0.0;
 
 double
-__ieee754_j0(double x)
+__ieee754_j0 (double x)
 {
-	double z, s,c,ss,cc,r,u,v,r1,r2,s1,s2,z2,z4;
-	int32_t hx,ix;
+  double z, s, c, ss, cc, r, u, v, r1, r2, s1, s2, z2, z4;
+  int32_t hx, ix;
 
-	GET_HIGH_WORD(hx,x);
-	ix = hx&0x7fffffff;
-	if(ix>=0x7ff00000) return one/(x*x);
-	x = fabs(x);
-	if(ix >= 0x40000000) {	/* |x| >= 2.0 */
-		__sincos (x, &s, &c);
-		ss = s-c;
-		cc = s+c;
-		if(ix<0x7fe00000) {  /* make sure x+x not overflow */
-		    z = -__cos(x+x);
-		    if ((s*c)<zero) cc = z/ss;
-		    else	    ss = z/cc;
-		}
-	/*
-	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
-	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
-	 */
-		if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrt(x);
-		else {
-		    u = pzero(x); v = qzero(x);
-		    z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrt(x);
-		}
-		return z;
-	}
-	if(ix<0x3f200000) {	/* |x| < 2**-13 */
-	  math_force_eval(huge+x);	/* raise inexact if x != 0 */
-	  if(ix<0x3e400000) return one;	/* |x|<2**-27 */
-	  else	      return one - 0.25*x*x;
+  GET_HIGH_WORD (hx, x);
+  ix = hx & 0x7fffffff;
+  if (ix >= 0x7ff00000)
+    return one / (x * x);
+  x = fabs (x);
+  if (ix >= 0x40000000)         /* |x| >= 2.0 */
+    {
+      __sincos (x, &s, &c);
+      ss = s - c;
+      cc = s + c;
+      if (ix < 0x7fe00000)           /* make sure x+x not overflow */
+	{
+	  z = -__cos (x + x);
+	  if ((s * c) < zero)
+	    cc = z / ss;
+	  else
+	    ss = z / cc;
 	}
-	z = x*x;
-#ifdef DO_NOT_USE_THIS
-	r =  z*(R02+z*(R03+z*(R04+z*R05)));
-	s =  one+z*(S01+z*(S02+z*(S03+z*S04)));
-#else
-	r1 = z*R[2]; z2=z*z;
-	r2 = R[3]+z*R[4]; z4=z2*z2;
-	r  = r1 + z2*r2 + z4*R[5];
-	s1 = one+z*S[1];
-	s2 = S[2]+z*S[3];
-	s = s1 + z2*s2 + z4*S[4];
-#endif
-	if(ix < 0x3FF00000) {	/* |x| < 1.00 */
-	    return one + z*(-0.25+(r/s));
-	} else {
-	    u = 0.5*x;
-	    return((one+u)*(one-u)+z*(r/s));
+      /*
+       * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
+       * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
+       */
+      if (ix > 0x48000000)
+	z = (invsqrtpi * cc) / __ieee754_sqrt (x);
+      else
+	{
+	  u = pzero (x); v = qzero (x);
+	  z = invsqrtpi * (u * cc - v * ss) / __ieee754_sqrt (x);
 	}
+      return z;
+    }
+  if (ix < 0x3f200000)          /* |x| < 2**-13 */
+    {
+      math_force_eval (huge + x);       /* raise inexact if x != 0 */
+      if (ix < 0x3e400000)
+	return one;                     /* |x|<2**-27 */
+      else
+	return one - 0.25 * x * x;
+    }
+  z = x * x;
+  r1 = z * R[2]; z2 = z * z;
+  r2 = R[3] + z * R[4]; z4 = z2 * z2;
+  r = r1 + z2 * r2 + z4 * R[5];
+  s1 = one + z * S[1];
+  s2 = S[2] + z * S[3];
+  s = s1 + z2 * s2 + z4 * S[4];
+  if (ix < 0x3FF00000)          /* |x| < 1.00 */
+    {
+      return one + z * (-0.25 + (r / s));
+    }
+  else
+    {
+      u = 0.5 * x;
+      return ((one + u) * (one - u) + z * (r / s));
+    }
 }
 strong_alias (__ieee754_j0, __j0_finite)
 
 static const double
-U[]  = {-7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
-  1.76666452509181115538e-01, /* 0x3FC69D01, 0x9DE9E3FC */
- -1.38185671945596898896e-02, /* 0xBF8C4CE8, 0xB16CFA97 */
-  3.47453432093683650238e-04, /* 0x3F36C54D, 0x20B29B6B */
- -3.81407053724364161125e-06, /* 0xBECFFEA7, 0x73D25CAD */
-  1.95590137035022920206e-08, /* 0x3E550057, 0x3B4EABD4 */
- -3.98205194132103398453e-11}, /* 0xBDC5E43D, 0x693FB3C8 */
-V[]  =  {1.27304834834123699328e-02, /* 0x3F8A1270, 0x91C9C71A */
-  7.60068627350353253702e-05, /* 0x3F13ECBB, 0xF578C6C1 */
-  2.59150851840457805467e-07, /* 0x3E91642D, 0x7FF202FD */
-  4.41110311332675467403e-10}; /* 0x3DFE5018, 0x3BD6D9EF */
+U[] = { -7.38042951086872317523e-02, /* 0xBFB2E4D6, 0x99CBD01F */
+	 1.76666452509181115538e-01,  /* 0x3FC69D01, 0x9DE9E3FC */
+	-1.38185671945596898896e-02,  /* 0xBF8C4CE8, 0xB16CFA97 */
+	 3.47453432093683650238e-04,  /* 0x3F36C54D, 0x20B29B6B */
+	-3.81407053724364161125e-06,  /* 0xBECFFEA7, 0x73D25CAD */
+	 1.95590137035022920206e-08,  /* 0x3E550057, 0x3B4EABD4 */
+	-3.98205194132103398453e-11 }, /* 0xBDC5E43D, 0x693FB3C8 */
+V[] = { 1.27304834834123699328e-02,   /* 0x3F8A1270, 0x91C9C71A */
+	 7.60068627350353253702e-05,   /* 0x3F13ECBB, 0xF578C6C1 */
+	 2.59150851840457805467e-07,   /* 0x3E91642D, 0x7FF202FD */
+	 4.41110311332675467403e-10 }; /* 0x3DFE5018, 0x3BD6D9EF */
 
 double
-__ieee754_y0(double x)
+__ieee754_y0 (double x)
 {
-	double z, s,c,ss,cc,u,v,z2,z4,z6,u1,u2,u3,v1,v2;
-	int32_t hx,ix,lx;
+  double z, s, c, ss, cc, u, v, z2, z4, z6, u1, u2, u3, v1, v2;
+  int32_t hx, ix, lx;
 
-	EXTRACT_WORDS(hx,lx,x);
-	ix = 0x7fffffff&hx;
-    /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0, y0(0) is -inf.  */
-	if(ix>=0x7ff00000) return  one/(x+x*x);
-	if((ix|lx)==0) return -HUGE_VAL+x; /* -inf and overflow exception.  */
-	if(hx<0) return zero/(zero*x);
-	if(ix >= 0x40000000) {  /* |x| >= 2.0 */
-	/* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
-	 * where x0 = x-pi/4
-	 *      Better formula:
-	 *              cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
-	 *                      =  1/sqrt(2) * (sin(x) + cos(x))
-	 *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
-	 *                      =  1/sqrt(2) * (sin(x) - cos(x))
-	 * To avoid cancellation, use
-	 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-	 * to compute the worse one.
-	 */
-		__sincos (x, &s, &c);
-		ss = s-c;
-		cc = s+c;
-	/*
-	 * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
-	 * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
-	 */
-		if(ix<0x7fe00000) {  /* make sure x+x not overflow */
-		    z = -__cos(x+x);
-		    if ((s*c)<zero) cc = z/ss;
-		    else            ss = z/cc;
-		}
-		if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrt(x);
-		else {
-		    u = pzero(x); v = qzero(x);
-		    z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrt(x);
-		}
-		return z;
+  EXTRACT_WORDS (hx, lx, x);
+  ix = 0x7fffffff & hx;
+  /* Y0(NaN) is NaN, y0(-inf) is Nan, y0(inf) is 0, y0(0) is -inf.  */
+  if (ix >= 0x7ff00000)
+    return one / (x + x * x);
+  if ((ix | lx) == 0)
+    return -HUGE_VAL + x;                  /* -inf and overflow exception.  */
+  if (hx < 0)
+    return zero / (zero * x);
+  if (ix >= 0x40000000)         /* |x| >= 2.0 */
+    {   /* y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x0)+q0(x)*cos(x0))
+		 * where x0 = x-pi/4
+		 *      Better formula:
+		 *              cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4)
+		 *                      =  1/sqrt(2) * (sin(x) + cos(x))
+		 *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
+		 *                      =  1/sqrt(2) * (sin(x) - cos(x))
+		 * To avoid cancellation, use
+		 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+		 * to compute the worse one.
+		 */
+      __sincos (x, &s, &c);
+      ss = s - c;
+      cc = s + c;
+      /*
+       * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x)
+       * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x)
+       */
+      if (ix < 0x7fe00000)           /* make sure x+x not overflow */
+	{
+	  z = -__cos (x + x);
+	  if ((s * c) < zero)
+	    cc = z / ss;
+	  else
+	    ss = z / cc;
 	}
-	if(ix<=0x3e400000) {	/* x < 2**-27 */
-	    return(U[0] + tpi*__ieee754_log(x));
+      if (ix > 0x48000000)
+	z = (invsqrtpi * ss) / __ieee754_sqrt (x);
+      else
+	{
+	  u = pzero (x); v = qzero (x);
+	  z = invsqrtpi * (u * ss + v * cc) / __ieee754_sqrt (x);
 	}
-	z = x*x;
-#ifdef DO_NOT_USE_THIS
-	u = u00+z*(u01+z*(u02+z*(u03+z*(u04+z*(u05+z*u06)))));
-	v = one+z*(v01+z*(v02+z*(v03+z*v04)));
-#else
-	u1 = U[0]+z*U[1]; z2=z*z;
-	u2 = U[2]+z*U[3]; z4=z2*z2;
-	u3 = U[4]+z*U[5]; z6=z4*z2;
-	u = u1 + z2*u2 + z4*u3 + z6*U[6];
-	v1 = one+z*V[0];
-	v2 = V[1]+z*V[2];
-	v = v1 + z2*v2 + z4*V[3];
-#endif
-	return(u/v + tpi*(__ieee754_j0(x)*__ieee754_log(x)));
+      return z;
+    }
+  if (ix <= 0x3e400000)         /* x < 2**-27 */
+    {
+      return (U[0] + tpi * __ieee754_log (x));
+    }
+  z = x * x;
+  u1 = U[0] + z * U[1]; z2 = z * z;
+  u2 = U[2] + z * U[3]; z4 = z2 * z2;
+  u3 = U[4] + z * U[5]; z6 = z4 * z2;
+  u = u1 + z2 * u2 + z4 * u3 + z6 * U[6];
+  v1 = one + z * V[0];
+  v2 = V[1] + z * V[2];
+  v = v1 + z2 * v2 + z4 * V[3];
+  return (u / v + tpi * (__ieee754_j0 (x) * __ieee754_log (x)));
 }
 strong_alias (__ieee754_y0, __y0_finite)
 
@@ -286,33 +298,43 @@ static const double pS2[5] = {
 };
 
 static double
-pzero(double x)
+pzero (double x)
 {
-	const double *p,*q;
-	double z,r,s,z2,z4,r1,r2,r3,s1,s2,s3;
-	int32_t ix;
-	GET_HIGH_WORD(ix,x);
-	ix &= 0x7fffffff;
-	if (ix>=0x41b00000)    {return one;}
-	else if(ix>=0x40200000){p = pR8; q= pS8;}
-	else if(ix>=0x40122E8B){p = pR5; q= pS5;}
-	else if(ix>=0x4006DB6D){p = pR3; q= pS3;}
-	else if(ix>=0x40000000){p = pR2; q= pS2;}
-	z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
-	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
-	s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
-#else
-	r1 = p[0]+z*p[1]; z2=z*z;
-	r2 = p[2]+z*p[3]; z4=z2*z2;
-	r3 = p[4]+z*p[5];
-	r = r1 + z2*r2 + z4*r3;
-	s1 = one+z*q[0];
-	s2 = q[1]+z*q[2];
-	s3 = q[3]+z*q[4];
-	s = s1 + z2*s2 + z4*s3;
-#endif
-	return one+ r/s;
+  const double *p, *q;
+  double z, r, s, z2, z4, r1, r2, r3, s1, s2, s3;
+  int32_t ix;
+  GET_HIGH_WORD (ix, x);
+  ix &= 0x7fffffff;
+  if (ix >= 0x41b00000)
+    {
+      return one;
+    }
+  else if (ix >= 0x40200000)
+    {
+      p = pR8; q = pS8;
+    }
+  else if (ix >= 0x40122E8B)
+    {
+      p = pR5; q = pS5;
+    }
+  else if (ix >= 0x4006DB6D)
+    {
+      p = pR3; q = pS3;
+    }
+  else if (ix >= 0x40000000)
+    {
+      p = pR2; q = pS2;
+    }
+  z = one / (x * x);
+  r1 = p[0] + z * p[1]; z2 = z * z;
+  r2 = p[2] + z * p[3]; z4 = z2 * z2;
+  r3 = p[4] + z * p[5];
+  r = r1 + z2 * r2 + z4 * r3;
+  s1 = one + z * q[0];
+  s2 = q[1] + z * q[2];
+  s3 = q[3] + z * q[4];
+  s = s1 + z2 * s2 + z4 * s3;
+  return one + r / s;
 }
 
 
@@ -394,31 +416,41 @@ static const double qS2[6] = {
 };
 
 static double
-qzero(double x)
+qzero (double x)
 {
-	const double *p,*q;
-	double s,r,z,z2,z4,z6,r1,r2,r3,s1,s2,s3;
-	int32_t ix;
-	GET_HIGH_WORD(ix,x);
-	ix &= 0x7fffffff;
-	if (ix>=0x41b00000)    {return -.125/x;}
-	else if(ix>=0x40200000){p = qR8; q= qS8;}
-	else if(ix>=0x40122E8B){p = qR5; q= qS5;}
-	else if(ix>=0x4006DB6D){p = qR3; q= qS3;}
-	else if(ix>=0x40000000){p = qR2; q= qS2;}
-	z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
-	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
-	s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
-#else
-	r1 = p[0]+z*p[1]; z2=z*z;
-	r2 = p[2]+z*p[3]; z4=z2*z2;
-	r3 = p[4]+z*p[5]; z6=z4*z2;
-	r= r1 + z2*r2 + z4*r3;
-	s1 = one+z*q[0];
-	s2 = q[1]+z*q[2];
-	s3 = q[3]+z*q[4];
-	s = s1 + z2*s2 + z4*s3 +z6*q[5];
-#endif
-	return (-.125 + r/s)/x;
+  const double *p, *q;
+  double s, r, z, z2, z4, z6, r1, r2, r3, s1, s2, s3;
+  int32_t ix;
+  GET_HIGH_WORD (ix, x);
+  ix &= 0x7fffffff;
+  if (ix >= 0x41b00000)
+    {
+      return -.125 / x;
+    }
+  else if (ix >= 0x40200000)
+    {
+      p = qR8; q = qS8;
+    }
+  else if (ix >= 0x40122E8B)
+    {
+      p = qR5; q = qS5;
+    }
+  else if (ix >= 0x4006DB6D)
+    {
+      p = qR3; q = qS3;
+    }
+  else if (ix >= 0x40000000)
+    {
+      p = qR2; q = qS2;
+    }
+  z = one / (x * x);
+  r1 = p[0] + z * p[1]; z2 = z * z;
+  r2 = p[2] + z * p[3]; z4 = z2 * z2;
+  r3 = p[4] + z * p[5]; z6 = z4 * z2;
+  r = r1 + z2 * r2 + z4 * r3;
+  s1 = one + z * q[0];
+  s2 = q[1] + z * q[2];
+  s3 = q[3] + z * q[4];
+  s = s1 + z2 * s2 + z4 * s3 + z6 * q[5];
+  return (-.125 + r / s) / x;
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/e_j1.c b/libc/sysdeps/ieee754/dbl-64/e_j1.c
index cca5f20b4..ab754c6ee 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_j1.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_j1.c
@@ -11,7 +11,7 @@
  */
 /* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/26,
    for performance improvement on pipelined processors.
-*/
+ */
 
 /* __ieee754_j1(x), __ieee754_y1(x)
  * Bessel function of the first and second kinds of order zero.
@@ -61,76 +61,81 @@
 #include <math.h>
 #include <math_private.h>
 
-static double pone(double), qone(double);
+static double pone (double), qone (double);
 
 static const double
-huge    = 1e300,
-one	= 1.0,
-invsqrtpi=  5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
-tpi      =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
-	/* R0/S0 on [0,2] */
-R[]  = {-6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
-  1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
- -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
-  4.96727999609584448412e-08}, /* 0x3E6AAAFA, 0x46CA0BD9 */
-S[]  =  {0.0, 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
-  1.85946785588630915560e-04, /* 0x3F285F56, 0xB9CDF664 */
-  1.17718464042623683263e-06, /* 0x3EB3BFF8, 0x333F8498 */
-  5.04636257076217042715e-09, /* 0x3E35AC88, 0xC97DFF2C */
-  1.23542274426137913908e-11}; /* 0x3DAB2ACF, 0xCFB97ED8 */
+  huge = 1e300,
+  one = 1.0,
+  invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
+  tpi = 6.36619772367581382433e-01,     /* 0x3FE45F30, 0x6DC9C883 */
+/* R0/S0 on [0,2] */
+  R[] = { -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
+	  1.40705666955189706048e-03,   /* 0x3F570D9F, 0x98472C61 */
+	  -1.59955631084035597520e-05,  /* 0xBEF0C5C6, 0xBA169668 */
+	  4.96727999609584448412e-08 }, /* 0x3E6AAAFA, 0x46CA0BD9 */
+  S[] = { 0.0, 1.91537599538363460805e-02, /* 0x3F939D0B, 0x12637E53 */
+	  1.85946785588630915560e-04,   /* 0x3F285F56, 0xB9CDF664 */
+	  1.17718464042623683263e-06,   /* 0x3EB3BFF8, 0x333F8498 */
+	  5.04636257076217042715e-09,   /* 0x3E35AC88, 0xC97DFF2C */
+	  1.23542274426137913908e-11 }; /* 0x3DAB2ACF, 0xCFB97ED8 */
 
-static const double zero    = 0.0;
+static const double zero = 0.0;
 
 double
-__ieee754_j1(double x)
+__ieee754_j1 (double x)
 {
-	double z, s,c,ss,cc,r,u,v,y,r1,r2,s1,s2,s3,z2,z4;
-	int32_t hx,ix;
+  double z, s, c, ss, cc, r, u, v, y, r1, r2, s1, s2, s3, z2, z4;
+  int32_t hx, ix;
 
-	GET_HIGH_WORD(hx,x);
-	ix = hx&0x7fffffff;
-	if(__builtin_expect(ix>=0x7ff00000, 0)) return one/x;
-	y = fabs(x);
-	if(ix >= 0x40000000) {	/* |x| >= 2.0 */
-		__sincos (y, &s, &c);
-		ss = -s-c;
-		cc = s-c;
-		if(ix<0x7fe00000) {  /* make sure y+y not overflow */
-		    z = __cos(y+y);
-		    if ((s*c)>zero) cc = z/ss;
-		    else	    ss = z/cc;
-		}
-	/*
-	 * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
-	 * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
-	 */
-		if(ix>0x48000000) z = (invsqrtpi*cc)/__ieee754_sqrt(y);
-		else {
-		    u = pone(y); v = qone(y);
-		    z = invsqrtpi*(u*cc-v*ss)/__ieee754_sqrt(y);
-		}
-		if(hx<0) return -z;
-		else	 return  z;
+  GET_HIGH_WORD (hx, x);
+  ix = hx & 0x7fffffff;
+  if (__builtin_expect (ix >= 0x7ff00000, 0))
+    return one / x;
+  y = fabs (x);
+  if (ix >= 0x40000000)         /* |x| >= 2.0 */
+    {
+      __sincos (y, &s, &c);
+      ss = -s - c;
+      cc = s - c;
+      if (ix < 0x7fe00000)           /* make sure y+y not overflow */
+	{
+	  z = __cos (y + y);
+	  if ((s * c) > zero)
+	    cc = z / ss;
+	  else
+	    ss = z / cc;
 	}
-	if(__builtin_expect(ix<0x3e400000, 0)) {	/* |x|<2**-27 */
-	    if(huge+x>one) return 0.5*x;/* inexact if x!=0 necessary */
+      /*
+       * j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
+       * y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
+       */
+      if (ix > 0x48000000)
+	z = (invsqrtpi * cc) / __ieee754_sqrt (y);
+      else
+	{
+	  u = pone (y); v = qone (y);
+	  z = invsqrtpi * (u * cc - v * ss) / __ieee754_sqrt (y);
 	}
-	z = x*x;
-#ifdef DO_NOT_USE_THIS
-	r =  z*(r00+z*(r01+z*(r02+z*r03)));
-	s =  one+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
-	r *= x;
-#else
-	r1 = z*R[0]; z2=z*z;
-	r2 = R[1]+z*R[2]; z4=z2*z2;
-	r = r1 + z2*r2 + z4*R[3];
-	r *= x;
-	s1 = one+z*S[1];
-	s2 = S[2]+z*S[3];
-	s3 = S[4]+z*S[5];
-	s = s1 + z2*s2 + z4*s3;
-#endif
-	return(x*0.5+r/s);
+      if (hx < 0)
+	return -z;
+      else
+	return z;
+    }
+  if (__builtin_expect (ix < 0x3e400000, 0))            /* |x|<2**-27 */
+    {
+      if (huge + x > one)
+	return 0.5 * x;                 /* inexact if x!=0 necessary */
+    }
+  z = x * x;
+  r1 = z * R[0]; z2 = z * z;
+  r2 = R[1] + z * R[2]; z4 = z2 * z2;
+  r = r1 + z2 * r2 + z4 * R[3];
+  r *= x;
+  s1 = one + z * S[1];
+  s2 = S[2] + z * S[3];
+  s3 = S[4] + z * S[5];
+  s = s1 + z2 * s2 + z4 * s3;
+  return (x * 0.5 + r / s);
 }
 strong_alias (__ieee754_j1, __j1_finite)
 
@@ -150,62 +155,67 @@ static const double V0[5] = {
 };
 
 double
-__ieee754_y1(double x)
+__ieee754_y1 (double x)
 {
-	double z, s,c,ss,cc,u,v,u1,u2,v1,v2,v3,z2,z4;
-	int32_t hx,ix,lx;
+  double z, s, c, ss, cc, u, v, u1, u2, v1, v2, v3, z2, z4;
+  int32_t hx, ix, lx;
 
-	EXTRACT_WORDS(hx,lx,x);
-	ix = 0x7fffffff&hx;
-    /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
-	if(__builtin_expect(ix>=0x7ff00000, 0)) return  one/(x+x*x);
-	if(__builtin_expect((ix|lx)==0, 0))
-		return -HUGE_VAL+x; /* -inf and overflow exception.  */;
-	if(__builtin_expect(hx<0, 0)) return zero/(zero*x);
-	if(ix >= 0x40000000) {  /* |x| >= 2.0 */
-		__sincos (x, &s, &c);
-		ss = -s-c;
-		cc = s-c;
-		if(ix<0x7fe00000) {  /* make sure x+x not overflow */
-		    z = __cos(x+x);
-		    if ((s*c)>zero) cc = z/ss;
-		    else            ss = z/cc;
-		}
-	/* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
-	 * where x0 = x-3pi/4
-	 *      Better formula:
-	 *              cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
-	 *                      =  1/sqrt(2) * (sin(x) - cos(x))
-	 *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
-	 *                      = -1/sqrt(2) * (cos(x) + sin(x))
-	 * To avoid cancellation, use
-	 *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
-	 * to compute the worse one.
-	 */
-		if(ix>0x48000000) z = (invsqrtpi*ss)/__ieee754_sqrt(x);
-		else {
-		    u = pone(x); v = qone(x);
-		    z = invsqrtpi*(u*ss+v*cc)/__ieee754_sqrt(x);
-		}
-		return z;
+  EXTRACT_WORDS (hx, lx, x);
+  ix = 0x7fffffff & hx;
+  /* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */
+  if (__builtin_expect (ix >= 0x7ff00000, 0))
+    return one / (x + x * x);
+  if (__builtin_expect ((ix | lx) == 0, 0))
+    return -HUGE_VAL + x;
+  /* -inf and overflow exception.  */;
+  if (__builtin_expect (hx < 0, 0))
+    return zero / (zero * x);
+  if (ix >= 0x40000000)         /* |x| >= 2.0 */
+    {
+      __sincos (x, &s, &c);
+      ss = -s - c;
+      cc = s - c;
+      if (ix < 0x7fe00000)           /* make sure x+x not overflow */
+	{
+	  z = __cos (x + x);
+	  if ((s * c) > zero)
+	    cc = z / ss;
+	  else
+	    ss = z / cc;
 	}
-	if(__builtin_expect(ix<=0x3c900000, 0)) {    /* x < 2**-54 */
-	    return(-tpi/x);
+      /* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
+       * where x0 = x-3pi/4
+       *      Better formula:
+       *              cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
+       *                      =  1/sqrt(2) * (sin(x) - cos(x))
+       *              sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
+       *                      = -1/sqrt(2) * (cos(x) + sin(x))
+       * To avoid cancellation, use
+       *              sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
+       * to compute the worse one.
+       */
+      if (ix > 0x48000000)
+	z = (invsqrtpi * ss) / __ieee754_sqrt (x);
+      else
+	{
+	  u = pone (x); v = qone (x);
+	  z = invsqrtpi * (u * ss + v * cc) / __ieee754_sqrt (x);
 	}
-	z = x*x;
-#ifdef DO_NOT_USE_THIS
-	u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
-	v = one+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
-#else
-	u1 = U0[0]+z*U0[1];z2=z*z;
-	u2 = U0[2]+z*U0[3];z4=z2*z2;
-	u  = u1 + z2*u2 + z4*U0[4];
-	v1 = one+z*V0[0];
-	v2 = V0[1]+z*V0[2];
-	v3 = V0[3]+z*V0[4];
-	v = v1 + z2*v2 + z4*v3;
-#endif
-	return(x*(u/v) + tpi*(__ieee754_j1(x)*__ieee754_log(x)-one/x));
+      return z;
+    }
+  if (__builtin_expect (ix <= 0x3c900000, 0))        /* x < 2**-54 */
+    {
+      return (-tpi / x);
+    }
+  z = x * x;
+  u1 = U0[0] + z * U0[1]; z2 = z * z;
+  u2 = U0[2] + z * U0[3]; z4 = z2 * z2;
+  u = u1 + z2 * u2 + z4 * U0[4];
+  v1 = one + z * V0[0];
+  v2 = V0[1] + z * V0[2];
+  v3 = V0[3] + z * V0[4];
+  v = v1 + z2 * v2 + z4 * v3;
+  return (x * (u / v) + tpi * (__ieee754_j1 (x) * __ieee754_log (x) - one / x));
 }
 strong_alias (__ieee754_y1, __y1_finite)
 
@@ -267,7 +277,7 @@ static const double ps3[5] = {
   1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */
 };
 
-static const double pr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
+static const double pr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
   1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */
   1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */
   2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */
@@ -284,33 +294,43 @@ static const double ps2[5] = {
 };
 
 static double
-pone(double x)
+pone (double x)
 {
-	const double *p,*q;
-	double z,r,s,r1,r2,r3,s1,s2,s3,z2,z4;
-	int32_t ix;
-	GET_HIGH_WORD(ix,x);
-	ix &= 0x7fffffff;
-	if (ix>=0x41b00000)    {return one;}
-	else if(ix>=0x40200000){p = pr8; q= ps8;}
-	else if(ix>=0x40122E8B){p = pr5; q= ps5;}
-	else if(ix>=0x4006DB6D){p = pr3; q= ps3;}
-	else if(ix>=0x40000000){p = pr2; q= ps2;}
-	z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
-	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
-	s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
-#else
-	r1 = p[0]+z*p[1]; z2=z*z;
-	r2 = p[2]+z*p[3]; z4=z2*z2;
-	r3 = p[4]+z*p[5];
-	r = r1 + z2*r2 + z4*r3;
-	s1 = one+z*q[0];
-	s2 = q[1]+z*q[2];
-	s3 = q[3]+z*q[4];
-	s = s1 + z2*s2 + z4*s3;
-#endif
-	return one+ r/s;
+  const double *p, *q;
+  double z, r, s, r1, r2, r3, s1, s2, s3, z2, z4;
+  int32_t ix;
+  GET_HIGH_WORD (ix, x);
+  ix &= 0x7fffffff;
+  if (ix >= 0x41b00000)
+    {
+      return one;
+    }
+  else if (ix >= 0x40200000)
+    {
+      p = pr8; q = ps8;
+    }
+  else if (ix >= 0x40122E8B)
+    {
+      p = pr5; q = ps5;
+    }
+  else if (ix >= 0x4006DB6D)
+    {
+      p = pr3; q = ps3;
+    }
+  else if (ix >= 0x40000000)
+    {
+      p = pr2; q = ps2;
+    }
+  z = one / (x * x);
+  r1 = p[0] + z * p[1]; z2 = z * z;
+  r2 = p[2] + z * p[3]; z4 = z2 * z2;
+  r3 = p[4] + z * p[5];
+  r = r1 + z2 * r2 + z4 * r3;
+  s1 = one + z * q[0];
+  s2 = q[1] + z * q[2];
+  s3 = q[3] + z * q[4];
+  s = s1 + z2 * s2 + z4 * s3;
+  return one + r / s;
 }
 
 
@@ -375,7 +395,7 @@ static const double qs3[6] = {
  -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */
 };
 
-static const double qr2[6] = {/* for x in [2.8570,2]=1/[0.3499,0.5] */
+static const double qr2[6] = { /* for x in [2.8570,2]=1/[0.3499,0.5] */
  -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */
  -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */
  -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */
@@ -393,31 +413,41 @@ static const double qs2[6] = {
 };
 
 static double
-qone(double x)
+qone (double x)
 {
-	const double *p,*q;
-	double  s,r,z,r1,r2,r3,s1,s2,s3,z2,z4,z6;
-	int32_t ix;
-	GET_HIGH_WORD(ix,x);
-	ix &= 0x7fffffff;
-	if (ix>=0x41b00000)    {return .375/x;}
-	else if(ix>=0x40200000){p = qr8; q= qs8;}
-	else if(ix>=0x40122E8B){p = qr5; q= qs5;}
-	else if(ix>=0x4006DB6D){p = qr3; q= qs3;}
-	else if(ix>=0x40000000){p = qr2; q= qs2;}
-	z = one/(x*x);
-#ifdef DO_NOT_USE_THIS
-	r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
-	s = one+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
-#else
-	r1 = p[0]+z*p[1]; z2=z*z;
-	r2 = p[2]+z*p[3]; z4=z2*z2;
-	r3 = p[4]+z*p[5]; z6=z4*z2;
-	r = r1 + z2*r2 + z4*r3;
-	s1 = one+z*q[0];
-	s2 = q[1]+z*q[2];
-	s3 = q[3]+z*q[4];
-	s = s1 + z2*s2 + z4*s3 + z6*q[5];
-#endif
-	return (.375 + r/s)/x;
+  const double *p, *q;
+  double s, r, z, r1, r2, r3, s1, s2, s3, z2, z4, z6;
+  int32_t ix;
+  GET_HIGH_WORD (ix, x);
+  ix &= 0x7fffffff;
+  if (ix >= 0x41b00000)
+    {
+      return .375 / x;
+    }
+  else if (ix >= 0x40200000)
+    {
+      p = qr8; q = qs8;
+    }
+  else if (ix >= 0x40122E8B)
+    {
+      p = qr5; q = qs5;
+    }
+  else if (ix >= 0x4006DB6D)
+    {
+      p = qr3; q = qs3;
+    }
+  else if (ix >= 0x40000000)
+    {
+      p = qr2; q = qs2;
+    }
+  z = one / (x * x);
+  r1 = p[0] + z * p[1]; z2 = z * z;
+  r2 = p[2] + z * p[3]; z4 = z2 * z2;
+  r3 = p[4] + z * p[5]; z6 = z4 * z2;
+  r = r1 + z2 * r2 + z4 * r3;
+  s1 = one + z * q[0];
+  s2 = q[1] + z * q[2];
+  s3 = q[3] + z * q[4];
+  s = s1 + z2 * s2 + z4 * s3 + z6 * q[5];
+  return (.375 + r / s) / x;
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/e_jn.c b/libc/sysdeps/ieee754/dbl-64/e_jn.c
index 0d2a24c93..f48e43a0d 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_jn.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_jn.c
@@ -41,246 +41,284 @@
 #include <math_private.h>
 
 static const double
-invsqrtpi=  5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
-two   =  2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
-one   =  1.00000000000000000000e+00; /* 0x3FF00000, 0x00000000 */
+  invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
+  two = 2.00000000000000000000e+00,  /* 0x40000000, 0x00000000 */
+  one = 1.00000000000000000000e+00;  /* 0x3FF00000, 0x00000000 */
 
-static const double zero  =  0.00000000000000000000e+00;
+static const double zero = 0.00000000000000000000e+00;
 
 double
-__ieee754_jn(int n, double x)
+__ieee754_jn (int n, double x)
 {
-	int32_t i,hx,ix,lx, sgn;
-	double a, b, temp, di;
-	double z, w;
+  int32_t i, hx, ix, lx, sgn;
+  double a, b, temp, di;
+  double z, w;
 
-    /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
-     * Thus, J(-n,x) = J(n,-x)
-     */
-	EXTRACT_WORDS(hx,lx,x);
-	ix = 0x7fffffff&hx;
-    /* if J(n,NaN) is NaN */
-	if(__builtin_expect((ix|((u_int32_t)(lx|-lx))>>31)>0x7ff00000, 0))
-		return x+x;
-	if(n<0){
-		n = -n;
-		x = -x;
-		hx ^= 0x80000000;
+  /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
+   * Thus, J(-n,x) = J(n,-x)
+   */
+  EXTRACT_WORDS (hx, lx, x);
+  ix = 0x7fffffff & hx;
+  /* if J(n,NaN) is NaN */
+  if (__builtin_expect ((ix | ((u_int32_t) (lx | -lx)) >> 31) > 0x7ff00000, 0))
+    return x + x;
+  if (n < 0)
+    {
+      n = -n;
+      x = -x;
+      hx ^= 0x80000000;
+    }
+  if (n == 0)
+    return (__ieee754_j0 (x));
+  if (n == 1)
+    return (__ieee754_j1 (x));
+  sgn = (n & 1) & (hx >> 31);   /* even n -- 0, odd n -- sign(x) */
+  x = fabs (x);
+  if (__builtin_expect ((ix | lx) == 0 || ix >= 0x7ff00000, 0))
+    /* if x is 0 or inf */
+    b = zero;
+  else if ((double) n <= x)
+    {
+      /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
+      if (ix >= 0x52D00000)      /* x > 2**302 */
+	{ /* (x >> n**2)
+			 *	    Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+			 *	    Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+			 *	    Let s=sin(x), c=cos(x),
+			 *		xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
+			 *
+			 *		   n	sin(xn)*sqt2	cos(xn)*sqt2
+			 *		----------------------------------
+			 *		   0	 s-c		 c+s
+			 *		   1	-s-c		-c+s
+			 *		   2	-s+c		-c-s
+			 *		   3	 s+c		 c-s
+			 */
+	  double s;
+	  double c;
+	  __sincos (x, &s, &c);
+	  switch (n & 3)
+	    {
+	    case 0: temp = c + s; break;
+	    case 1: temp = -c + s; break;
+	    case 2: temp = -c - s; break;
+	    case 3: temp = c - s; break;
+	    }
+	  b = invsqrtpi * temp / __ieee754_sqrt (x);
 	}
-	if(n==0) return(__ieee754_j0(x));
-	if(n==1) return(__ieee754_j1(x));
-	sgn = (n&1)&(hx>>31);	/* even n -- 0, odd n -- sign(x) */
-	x = fabs(x);
-	if(__builtin_expect((ix|lx)==0||ix>=0x7ff00000,0))
-		/* if x is 0 or inf */
-		b = zero;
-	else if((double)n<=x) {
-		/* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */
-	    if(ix>=0x52D00000) { /* x > 2**302 */
-    /* (x >> n**2)
-     *	    Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
-     *	    Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
-     *	    Let s=sin(x), c=cos(x),
-     *		xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
-     *
-     *		   n	sin(xn)*sqt2	cos(xn)*sqt2
-     *		----------------------------------
-     *		   0	 s-c		 c+s
-     *		   1	-s-c		-c+s
-     *		   2	-s+c		-c-s
-     *		   3	 s+c		 c-s
-     */
-		double s;
-		double c;
-		__sincos (x, &s, &c);
-		switch(n&3) {
-		    case 0: temp =  c + s; break;
-		    case 1: temp = -c + s; break;
-		    case 2: temp = -c - s; break;
-		    case 3: temp =  c - s; break;
-		}
-		b = invsqrtpi*temp/__ieee754_sqrt(x);
-	    } else {
-		a = __ieee754_j0(x);
-		b = __ieee754_j1(x);
-		for(i=1;i<n;i++){
-		    temp = b;
-		    b = b*((double)(i+i)/x) - a; /* avoid underflow */
-		    a = temp;
-		}
+      else
+	{
+	  a = __ieee754_j0 (x);
+	  b = __ieee754_j1 (x);
+	  for (i = 1; i < n; i++)
+	    {
+	      temp = b;
+	      b = b * ((double) (i + i) / x) - a; /* avoid underflow */
+	      a = temp;
 	    }
-	} else {
-	    if(ix<0x3e100000) {	/* x < 2**-29 */
-    /* x is tiny, return the first Taylor expansion of J(n,x)
-     * J(n,x) = 1/n!*(x/2)^n  - ...
-     */
-		if(n>33)	/* underflow */
-		    b = zero;
-		else {
-		    temp = x*0.5; b = temp;
-		    for (a=one,i=2;i<=n;i++) {
-			a *= (double)i;		/* a = n! */
-			b *= temp;		/* b = (x/2)^n */
-		    }
-		    b = b/a;
+	}
+    }
+  else
+    {
+      if (ix < 0x3e100000)      /* x < 2**-29 */
+	{ /* x is tiny, return the first Taylor expansion of J(n,x)
+			 * J(n,x) = 1/n!*(x/2)^n  - ...
+			 */
+	  if (n > 33)           /* underflow */
+	    b = zero;
+	  else
+	    {
+	      temp = x * 0.5; b = temp;
+	      for (a = one, i = 2; i <= n; i++)
+		{
+		  a *= (double) i;              /* a = n! */
+		  b *= temp;                    /* b = (x/2)^n */
 		}
-	    } else {
-		/* use backward recurrence */
-		/*			x      x^2      x^2
-		 *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
-		 *			2n  - 2(n+1) - 2(n+2)
-		 *
-		 *			1      1        1
-		 *  (for large x)   =  ----  ------   ------   .....
-		 *			2n   2(n+1)   2(n+2)
-		 *			-- - ------ - ------ -
-		 *			 x     x         x
-		 *
-		 * Let w = 2n/x and h=2/x, then the above quotient
-		 * is equal to the continued fraction:
-		 *		    1
-		 *	= -----------------------
-		 *		       1
-		 *	   w - -----------------
-		 *			  1
-		 *		w+h - ---------
-		 *		       w+2h - ...
-		 *
-		 * To determine how many terms needed, let
-		 * Q(0) = w, Q(1) = w(w+h) - 1,
-		 * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
-		 * When Q(k) > 1e4	good for single
-		 * When Q(k) > 1e9	good for double
-		 * When Q(k) > 1e17	good for quadruple
-		 */
-	    /* determine k */
-		double t,v;
-		double q0,q1,h,tmp; int32_t k,m;
-		w  = (n+n)/(double)x; h = 2.0/(double)x;
-		q0 = w;  z = w+h; q1 = w*z - 1.0; k=1;
-		while(q1<1.0e9) {
-			k += 1; z += h;
-			tmp = z*q1 - q0;
-			q0 = q1;
-			q1 = tmp;
+	      b = b / a;
+	    }
+	}
+      else
+	{
+	  /* use backward recurrence */
+	  /*			x      x^2      x^2
+	   *  J(n,x)/J(n-1,x) =  ----   ------   ------   .....
+	   *			2n  - 2(n+1) - 2(n+2)
+	   *
+	   *			1      1        1
+	   *  (for large x)   =  ----  ------   ------   .....
+	   *			2n   2(n+1)   2(n+2)
+	   *			-- - ------ - ------ -
+	   *			 x     x         x
+	   *
+	   * Let w = 2n/x and h=2/x, then the above quotient
+	   * is equal to the continued fraction:
+	   *		    1
+	   *	= -----------------------
+	   *		       1
+	   *	   w - -----------------
+	   *			  1
+	   *		w+h - ---------
+	   *		       w+2h - ...
+	   *
+	   * To determine how many terms needed, let
+	   * Q(0) = w, Q(1) = w(w+h) - 1,
+	   * Q(k) = (w+k*h)*Q(k-1) - Q(k-2),
+	   * When Q(k) > 1e4	good for single
+	   * When Q(k) > 1e9	good for double
+	   * When Q(k) > 1e17	good for quadruple
+	   */
+	  /* determine k */
+	  double t, v;
+	  double q0, q1, h, tmp; int32_t k, m;
+	  w = (n + n) / (double) x; h = 2.0 / (double) x;
+	  q0 = w;  z = w + h; q1 = w * z - 1.0; k = 1;
+	  while (q1 < 1.0e9)
+	    {
+	      k += 1; z += h;
+	      tmp = z * q1 - q0;
+	      q0 = q1;
+	      q1 = tmp;
+	    }
+	  m = n + n;
+	  for (t = zero, i = 2 * (n + k); i >= m; i -= 2)
+	    t = one / (i / x - t);
+	  a = t;
+	  b = one;
+	  /*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
+	   *  Hence, if n*(log(2n/x)) > ...
+	   *  single 8.8722839355e+01
+	   *  double 7.09782712893383973096e+02
+	   *  long double 1.1356523406294143949491931077970765006170e+04
+	   *  then recurrent value may overflow and the result is
+	   *  likely underflow to zero
+	   */
+	  tmp = n;
+	  v = two / x;
+	  tmp = tmp * __ieee754_log (fabs (v * tmp));
+	  if (tmp < 7.09782712893383973096e+02)
+	    {
+	      for (i = n - 1, di = (double) (i + i); i > 0; i--)
+		{
+		  temp = b;
+		  b *= di;
+		  b = b / x - a;
+		  a = temp;
+		  di -= two;
 		}
-		m = n+n;
-		for(t=zero, i = 2*(n+k); i>=m; i -= 2) t = one/(i/x-t);
-		a = t;
-		b = one;
-		/*  estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n)
-		 *  Hence, if n*(log(2n/x)) > ...
-		 *  single 8.8722839355e+01
-		 *  double 7.09782712893383973096e+02
-		 *  long double 1.1356523406294143949491931077970765006170e+04
-		 *  then recurrent value may overflow and the result is
-		 *  likely underflow to zero
-		 */
-		tmp = n;
-		v = two/x;
-		tmp = tmp*__ieee754_log(fabs(v*tmp));
-		if(tmp<7.09782712893383973096e+02) {
-		    for(i=n-1,di=(double)(i+i);i>0;i--){
-			temp = b;
-			b *= di;
-			b  = b/x - a;
-			a = temp;
-			di -= two;
-		    }
-		} else {
-		    for(i=n-1,di=(double)(i+i);i>0;i--){
-			temp = b;
-			b *= di;
-			b  = b/x - a;
-			a = temp;
-			di -= two;
-		    /* scale b to avoid spurious overflow */
-			if(b>1e100) {
-			    a /= b;
-			    t /= b;
-			    b  = one;
-			}
+	    }
+	  else
+	    {
+	      for (i = n - 1, di = (double) (i + i); i > 0; i--)
+		{
+		  temp = b;
+		  b *= di;
+		  b = b / x - a;
+		  a = temp;
+		  di -= two;
+		  /* scale b to avoid spurious overflow */
+		  if (b > 1e100)
+		    {
+		      a /= b;
+		      t /= b;
+		      b = one;
 		    }
 		}
-		/* j0() and j1() suffer enormous loss of precision at and
-		 * near zero; however, we know that their zero points never
-		 * coincide, so just choose the one further away from zero.
-		 */
-		z = __ieee754_j0 (x);
-		w = __ieee754_j1 (x);
-		if (fabs (z) >= fabs (w))
-		  b = (t * z / b);
-		else
-		  b = (t * w / a);
 	    }
+	  /* j0() and j1() suffer enormous loss of precision at and
+	   * near zero; however, we know that their zero points never
+	   * coincide, so just choose the one further away from zero.
+	   */
+	  z = __ieee754_j0 (x);
+	  w = __ieee754_j1 (x);
+	  if (fabs (z) >= fabs (w))
+	    b = (t * z / b);
+	  else
+	    b = (t * w / a);
 	}
-	if(sgn==1) return -b; else return b;
+    }
+  if (sgn == 1)
+    return -b;
+  else
+    return b;
 }
 strong_alias (__ieee754_jn, __jn_finite)
 
 double
-__ieee754_yn(int n, double x)
+__ieee754_yn (int n, double x)
 {
-	int32_t i,hx,ix,lx;
-	int32_t sign;
-	double a, b, temp;
+  int32_t i, hx, ix, lx;
+  int32_t sign;
+  double a, b, temp;
 
-	EXTRACT_WORDS(hx,lx,x);
-	ix = 0x7fffffff&hx;
-    /* if Y(n,NaN) is NaN */
-	if(__builtin_expect((ix|((u_int32_t)(lx|-lx))>>31)>0x7ff00000,0))
-		return x+x;
-	if(__builtin_expect((ix|lx)==0, 0))
-		return -HUGE_VAL+x; /* -inf and overflow exception.  */;
-	if(__builtin_expect(hx<0, 0)) return zero/(zero*x);
-	sign = 1;
-	if(n<0){
-		n = -n;
-		sign = 1 - ((n&1)<<1);
+  EXTRACT_WORDS (hx, lx, x);
+  ix = 0x7fffffff & hx;
+  /* if Y(n,NaN) is NaN */
+  if (__builtin_expect ((ix | ((u_int32_t) (lx | -lx)) >> 31) > 0x7ff00000, 0))
+    return x + x;
+  if (__builtin_expect ((ix | lx) == 0, 0))
+    return -HUGE_VAL + x;
+  /* -inf and overflow exception.  */;
+  if (__builtin_expect (hx < 0, 0))
+    return zero / (zero * x);
+  sign = 1;
+  if (n < 0)
+    {
+      n = -n;
+      sign = 1 - ((n & 1) << 1);
+    }
+  if (n == 0)
+    return (__ieee754_y0 (x));
+  if (n == 1)
+    return (sign * __ieee754_y1 (x));
+  if (__builtin_expect (ix == 0x7ff00000, 0))
+    return zero;
+  if (ix >= 0x52D00000)      /* x > 2**302 */
+    { /* (x >> n**2)
+		 *	    Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+		 *	    Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
+		 *	    Let s=sin(x), c=cos(x),
+		 *		xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
+		 *
+		 *		   n	sin(xn)*sqt2	cos(xn)*sqt2
+		 *		----------------------------------
+		 *		   0	 s-c		 c+s
+		 *		   1	-s-c		-c+s
+		 *		   2	-s+c		-c-s
+		 *		   3	 s+c		 c-s
+		 */
+      double c;
+      double s;
+      __sincos (x, &s, &c);
+      switch (n & 3)
+	{
+	case 0: temp = s - c; break;
+	case 1: temp = -s - c; break;
+	case 2: temp = -s + c; break;
+	case 3: temp = s + c; break;
 	}
-	if(n==0) return(__ieee754_y0(x));
-	if(n==1) return(sign*__ieee754_y1(x));
-	if(__builtin_expect(ix==0x7ff00000, 0)) return zero;
-	if(ix>=0x52D00000) { /* x > 2**302 */
-    /* (x >> n**2)
-     *	    Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi)
-     *	    Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi)
-     *	    Let s=sin(x), c=cos(x),
-     *		xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then
-     *
-     *		   n	sin(xn)*sqt2	cos(xn)*sqt2
-     *		----------------------------------
-     *		   0	 s-c		 c+s
-     *		   1	-s-c		-c+s
-     *		   2	-s+c		-c-s
-     *		   3	 s+c		 c-s
-     */
-		double c;
-		double s;
-		__sincos (x, &s, &c);
-		switch(n&3) {
-		    case 0: temp =  s - c; break;
-		    case 1: temp = -s - c; break;
-		    case 2: temp = -s + c; break;
-		    case 3: temp =  s + c; break;
-		}
-		b = invsqrtpi*temp/__ieee754_sqrt(x);
-	} else {
-	    u_int32_t high;
-	    a = __ieee754_y0(x);
-	    b = __ieee754_y1(x);
-	/* quit if b is -inf */
-	    GET_HIGH_WORD(high,b);
-	    for(i=1;i<n&&high!=0xfff00000;i++){
-		temp = b;
-		b = ((double)(i+i)/x)*b - a;
-		GET_HIGH_WORD(high,b);
-		a = temp;
-	    }
-	    /* If B is +-Inf, set up errno accordingly.  */
-	    if (! __finite (b))
-	      __set_errno (ERANGE);
+      b = invsqrtpi * temp / __ieee754_sqrt (x);
+    }
+  else
+    {
+      u_int32_t high;
+      a = __ieee754_y0 (x);
+      b = __ieee754_y1 (x);
+      /* quit if b is -inf */
+      GET_HIGH_WORD (high, b);
+      for (i = 1; i < n && high != 0xfff00000; i++)
+	{
+	  temp = b;
+	  b = ((double) (i + i) / x) * b - a;
+	  GET_HIGH_WORD (high, b);
+	  a = temp;
 	}
-	if(sign>0) return b; else return -b;
+      /* If B is +-Inf, set up errno accordingly.  */
+      if (!__finite (b))
+	__set_errno (ERANGE);
+    }
+  if (sign > 0)
+    return b;
+  else
+    return -b;
 }
 strong_alias (__ieee754_yn, __yn_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_log.c b/libc/sysdeps/ieee754/dbl-64/e_log.c
index f9300f9ce..a7ab54435 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_log.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_log.c
@@ -39,6 +39,7 @@
 #include "mpa.h"
 #include "MathLib.h"
 #include <math_private.h>
+#include <stap-probe.h>
 
 #ifndef SECTION
 # define SECTION
@@ -55,12 +56,12 @@ SECTION
 __ieee754_log (double x)
 {
 #define M 4
-  static const int pr[M] = {8, 10, 18, 32};
+  static const int pr[M] = { 8, 10, 18, 32 };
   int i, j, n, ux, dx, p;
   double dbl_n, u, p0, q, r0, w, nln2a, luai, lubi, lvaj, lvbj,
-    sij, ssij, ttij, A, B, B0, y, y1, y2, polI, polII, sa, sb,
-    t1, t2, t7, t8, t, ra, rb, ww,
-    a0, aa0, s1, s2, ss2, s3, ss3, a1, aa1, a, aa, b, bb, c;
+	 sij, ssij, ttij, A, B, B0, y, y1, y2, polI, polII, sa, sb,
+	 t1, t2, t7, t8, t, ra, rb, ww,
+	 a0, aa0, s1, s2, ss2, s3, ss3, a1, aa1, a, aa, b, bb, c;
 #ifndef DLA_FMS
   double t3, t4, t5, t6;
 #endif
@@ -79,15 +80,15 @@ __ieee754_log (double x)
   if (__builtin_expect (ux < 0x00100000, 0))
     {
       if (__builtin_expect (((ux & 0x7fffffff) | dx) == 0, 0))
-	return MHALF / 0.0;	/* return -INF */
+	return MHALF / 0.0;     /* return -INF */
       if (__builtin_expect (ux < 0, 0))
-	return (x - x) / 0.0;	/* return NaN  */
+	return (x - x) / 0.0;   /* return NaN  */
       n -= 54;
-      x *= two54.d;		/* scale x     */
+      x *= two54.d;             /* scale x     */
       num.d = x;
     }
   if (__builtin_expect (ux >= 0x7ff00000, 0))
-    return x + x;		/* INF or NaN  */
+    return x + x;               /* INF or NaN  */
 
   /* Regular values of x */
 
@@ -242,8 +243,12 @@ stage_n:
       __mp_dbl (&mpy1, &y1, p);
       __mp_dbl (&mpy2, &y2, p);
       if (y1 == y2)
-	return y1;
+	{
+	  LIBC_PROBE (slowlog, 3, &p, &x, &y1);
+	  return y1;
+	}
     }
+  LIBC_PROBE (slowlog_inexact, 3, &p, &x, &y1);
   return y1;
 }
 
diff --git a/libc/sysdeps/ieee754/dbl-64/e_log10.c b/libc/sysdeps/ieee754/dbl-64/e_log10.c
index ab5069e58..c3d465a4a 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_log10.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_log10.c
@@ -46,10 +46,10 @@
 #include <math.h>
 #include <math_private.h>
 
-static const double two54 = 1.80143985094819840000e+16;		/* 0x43500000, 0x00000000 */
-static const double ivln10 = 4.34294481903251816668e-01;	/* 0x3FDBCB7B, 0x1526E50E */
-static const double log10_2hi = 3.01029995663611771306e-01;	/* 0x3FD34413, 0x509F6000 */
-static const double log10_2lo = 3.69423907715893078616e-13;	/* 0x3D59FEF3, 0x11F12B36 */
+static const double two54 = 1.80143985094819840000e+16;         /* 0x43500000, 0x00000000 */
+static const double ivln10 = 4.34294481903251816668e-01;        /* 0x3FDBCB7B, 0x1526E50E */
+static const double log10_2hi = 3.01029995663611771306e-01;     /* 0x3FD34413, 0x509F6000 */
+static const double log10_2lo = 3.69423907715893078616e-13;     /* 0x3D59FEF3, 0x11F12B36 */
 
 double
 __ieee754_log10 (double x)
@@ -62,13 +62,13 @@ __ieee754_log10 (double x)
 
   k = 0;
   if (hx < 0x00100000)
-    {				/* x < 2**-1022  */
+    {                           /* x < 2**-1022  */
       if (__builtin_expect (((hx & 0x7fffffff) | lx) == 0, 0))
-	return -two54 / (x - x);	/* log(+-0)=-inf */
+	return -two54 / (x - x);        /* log(+-0)=-inf */
       if (__builtin_expect (hx < 0, 0))
-	return (x - x) / (x - x);	/* log(-#) = NaN */
+	return (x - x) / (x - x);       /* log(-#) = NaN */
       k -= 54;
-      x *= two54;		/* subnormal number, scale up x */
+      x *= two54;               /* subnormal number, scale up x */
       GET_HIGH_WORD (hx, x);
     }
   if (__builtin_expect (hx >= 0x7ff00000, 0))
diff --git a/libc/sysdeps/ieee754/dbl-64/e_log2.c b/libc/sysdeps/ieee754/dbl-64/e_log2.c
index 4d5cab0ed..890a4a2bd 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_log2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_log2.c
@@ -58,14 +58,14 @@
 #include <math_private.h>
 
 static const double ln2 = 0.69314718055994530942;
-static const double two54 = 1.80143985094819840000e+16;	/* 43500000 00000000 */
-static const double Lg1 = 6.666666666666735130e-01;	/* 3FE55555 55555593 */
-static const double Lg2 = 3.999999999940941908e-01;	/* 3FD99999 9997FA04 */
-static const double Lg3 = 2.857142874366239149e-01;	/* 3FD24924 94229359 */
-static const double Lg4 = 2.222219843214978396e-01;	/* 3FCC71C5 1D8E78AF */
-static const double Lg5 = 1.818357216161805012e-01;	/* 3FC74664 96CB03DE */
-static const double Lg6 = 1.531383769920937332e-01;	/* 3FC39A09 D078C69F */
-static const double Lg7 = 1.479819860511658591e-01;	/* 3FC2F112 DF3E5244 */
+static const double two54 = 1.80143985094819840000e+16; /* 43500000 00000000 */
+static const double Lg1 = 6.666666666666735130e-01;     /* 3FE55555 55555593 */
+static const double Lg2 = 3.999999999940941908e-01;     /* 3FD99999 9997FA04 */
+static const double Lg3 = 2.857142874366239149e-01;     /* 3FD24924 94229359 */
+static const double Lg4 = 2.222219843214978396e-01;     /* 3FCC71C5 1D8E78AF */
+static const double Lg5 = 1.818357216161805012e-01;     /* 3FC74664 96CB03DE */
+static const double Lg6 = 1.531383769920937332e-01;     /* 3FC39A09 D078C69F */
+static const double Lg7 = 1.479819860511658591e-01;     /* 3FC2F112 DF3E5244 */
 
 static const double zero = 0.0;
 
@@ -80,13 +80,13 @@ __ieee754_log2 (double x)
 
   k = 0;
   if (hx < 0x00100000)
-    {				/* x < 2**-1022  */
+    {                           /* x < 2**-1022  */
       if (__builtin_expect (((hx & 0x7fffffff) | lx) == 0, 0))
-	return -two54 / (x - x);	/* log(+-0)=-inf */
+	return -two54 / (x - x);        /* log(+-0)=-inf */
       if (__builtin_expect (hx < 0, 0))
-	return (x - x) / (x - x);	/* log(-#) = NaN */
+	return (x - x) / (x - x);       /* log(-#) = NaN */
       k -= 54;
-      x *= two54;		/* subnormal number, scale up x */
+      x *= two54;               /* subnormal number, scale up x */
       GET_HIGH_WORD (hx, x);
     }
   if (__builtin_expect (hx >= 0x7ff00000, 0))
@@ -94,12 +94,12 @@ __ieee754_log2 (double x)
   k += (hx >> 20) - 1023;
   hx &= 0x000fffff;
   i = (hx + 0x95f64) & 0x100000;
-  SET_HIGH_WORD (x, hx | (i ^ 0x3ff00000));	/* normalize x or x/2 */
+  SET_HIGH_WORD (x, hx | (i ^ 0x3ff00000));     /* normalize x or x/2 */
   k += (i >> 20);
   dk = (double) k;
   f = x - 1.0;
   if ((0x000fffff & (2 + hx)) < 3)
-    {				/* |f| < 2**-20 */
+    {                           /* |f| < 2**-20 */
       if (f == zero)
 	return dk;
       R = f * f * (0.5 - 0.33333333333333333 * f);
diff --git a/libc/sysdeps/ieee754/dbl-64/e_pow.c b/libc/sysdeps/ieee754/dbl-64/e_pow.c
index 9a766e722..9cf230917 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_pow.c
@@ -49,354 +49,407 @@
 
 static const double huge = 1.0e300, tiny = 1.0e-300;
 
-double __exp1(double x, double xx, double error);
-static double log1(double x, double *delta, double *error);
-static double my_log2(double x, double *delta, double *error);
-double __slowpow(double x, double y,double z);
-static double power1(double x, double y);
-static int checkint(double x);
+double __exp1 (double x, double xx, double error);
+static double log1 (double x, double *delta, double *error);
+static double my_log2 (double x, double *delta, double *error);
+double __slowpow (double x, double y, double z);
+static double power1 (double x, double y);
+static int checkint (double x);
 
-/***************************************************************************/
-/* An ultimate power routine. Given two IEEE double machine numbers y,x    */
-/* it computes the correctly rounded (to nearest) value of X^y.            */
-/***************************************************************************/
+/* An ultimate power routine. Given two IEEE double machine numbers y, x it
+   computes the correctly rounded (to nearest) value of X^y.  */
 double
 SECTION
-__ieee754_pow(double x, double y) {
-  double z,a,aa,error, t,a1,a2,y1,y2;
-  mynumber u,v;
+__ieee754_pow (double x, double y)
+{
+  double z, a, aa, error, t, a1, a2, y1, y2;
+  mynumber u, v;
   int k;
-  int4 qx,qy;
-  v.x=y;
-  u.x=x;
-  if (v.i[LOW_HALF] == 0) { /* of y */
-    qx = u.i[HIGH_HALF]&0x7fffffff;
-    /* Is x a NaN?  */
-    if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
-      return x;
-    if (y == 1.0) return x;
-    if (y == 2.0) return x*x;
-    if (y == -1.0) return 1.0/x;
-    if (y == 0) return 1.0;
-  }
+  int4 qx, qy;
+  v.x = y;
+  u.x = x;
+  if (v.i[LOW_HALF] == 0)
+    {				/* of y */
+      qx = u.i[HIGH_HALF] & 0x7fffffff;
+      /* Is x a NaN?  */
+      if (((qx == 0x7ff00000) && (u.i[LOW_HALF] != 0)) || (qx > 0x7ff00000))
+	return x;
+      if (y == 1.0)
+	return x;
+      if (y == 2.0)
+	return x * x;
+      if (y == -1.0)
+	return 1.0 / x;
+      if (y == 0)
+	return 1.0;
+    }
   /* else */
-  if(((u.i[HIGH_HALF]>0 && u.i[HIGH_HALF]<0x7ff00000)||        /* x>0 and not x->0 */
-       (u.i[HIGH_HALF]==0 && u.i[LOW_HALF]!=0))  &&
-				      /*   2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
-      (v.i[HIGH_HALF]&0x7fffffff) < 0x4ff00000) {              /* if y<-1 or y>1   */
-    double retval;
+  if (((u.i[HIGH_HALF] > 0 && u.i[HIGH_HALF] < 0x7ff00000) ||	/* x>0 and not x->0 */
+       (u.i[HIGH_HALF] == 0 && u.i[LOW_HALF] != 0)) &&
+      /*   2^-1023< x<= 2^-1023 * 0x1.0000ffffffff */
+      (v.i[HIGH_HALF] & 0x7fffffff) < 0x4ff00000)
+    {				/* if y<-1 or y>1   */
+      double retval;
 
-    SET_RESTORE_ROUND (FE_TONEAREST);
+      SET_RESTORE_ROUND (FE_TONEAREST);
 
-    /* Avoid internal underflow for tiny y.  The exact value of y does
-       not matter if |y| <= 2**-64.  */
-    if (ABS (y) < 0x1p-64)
-      y = y < 0 ? -0x1p-64 : 0x1p-64;
-    z = log1(x,&aa,&error);                                 /* x^y  =e^(y log (X)) */
-    t = y*CN;
-    y1 = t - (t-y);
-    y2 = y - y1;
-    t = z*CN;
-    a1 = t - (t-z);
-    a2 = (z - a1)+aa;
-    a = y1*a1;
-    aa = y2*a1 + y*a2;
-    a1 = a+aa;
-    a2 = (a-a1)+aa;
-    error = error*ABS(y);
-    t = __exp1(a1,a2,1.9e16*error);     /* return -10 or 0 if wasn't computed exactly */
-    retval = (t>0)?t:power1(x,y);
+      /* Avoid internal underflow for tiny y.  The exact value of y does
+         not matter if |y| <= 2**-64.  */
+      if (ABS (y) < 0x1p-64)
+	y = y < 0 ? -0x1p-64 : 0x1p-64;
+      z = log1 (x, &aa, &error);	/* x^y  =e^(y log (X)) */
+      t = y * CN;
+      y1 = t - (t - y);
+      y2 = y - y1;
+      t = z * CN;
+      a1 = t - (t - z);
+      a2 = (z - a1) + aa;
+      a = y1 * a1;
+      aa = y2 * a1 + y * a2;
+      a1 = a + aa;
+      a2 = (a - a1) + aa;
+      error = error * ABS (y);
+      t = __exp1 (a1, a2, 1.9e16 * error);	/* return -10 or 0 if wasn't computed exactly */
+      retval = (t > 0) ? t : power1 (x, y);
 
-    return retval;
-  }
+      return retval;
+    }
 
-  if (x == 0) {
-    if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
-	|| (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000) /* NaN */
-      return y;
-    if (ABS(y) > 1.0e20) return (y>0)?0:1.0/0.0;
-    k = checkint(y);
-    if (k == -1)
-      return y < 0 ? 1.0/x : x;
-    else
-      return y < 0 ? 1.0/0.0 : 0.0;                               /* return 0 */
-  }
+  if (x == 0)
+    {
+      if (((v.i[HIGH_HALF] & 0x7fffffff) == 0x7ff00000 && v.i[LOW_HALF] != 0)
+	  || (v.i[HIGH_HALF] & 0x7fffffff) > 0x7ff00000)	/* NaN */
+	return y;
+      if (ABS (y) > 1.0e20)
+	return (y > 0) ? 0 : 1.0 / 0.0;
+      k = checkint (y);
+      if (k == -1)
+	return y < 0 ? 1.0 / x : x;
+      else
+	return y < 0 ? 1.0 / 0.0 : 0.0;	/* return 0 */
+    }
 
-  qx = u.i[HIGH_HALF]&0x7fffffff;  /*   no sign   */
-  qy = v.i[HIGH_HALF]&0x7fffffff;  /*   no sign   */
+  qx = u.i[HIGH_HALF] & 0x7fffffff;	/*   no sign   */
+  qy = v.i[HIGH_HALF] & 0x7fffffff;	/*   no sign   */
 
-  if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0)) /* NaN */
+  if (qx >= 0x7ff00000 && (qx > 0x7ff00000 || u.i[LOW_HALF] != 0))	/* NaN */
     return x;
-  if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0)) /* NaN */
+  if (qy >= 0x7ff00000 && (qy > 0x7ff00000 || v.i[LOW_HALF] != 0))	/* NaN */
     return x == 1.0 ? 1.0 : y;
 
   /* if x<0 */
-  if (u.i[HIGH_HALF] < 0) {
-    k = checkint(y);
-    if (k==0) {
-      if (qy == 0x7ff00000) {
-	if (x == -1.0) return 1.0;
-	else if (x > -1.0) return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
-	else return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
-      }
+  if (u.i[HIGH_HALF] < 0)
+    {
+      k = checkint (y);
+      if (k == 0)
+	{
+	  if (qy == 0x7ff00000)
+	    {
+	      if (x == -1.0)
+		return 1.0;
+	      else if (x > -1.0)
+		return v.i[HIGH_HALF] < 0 ? INF.x : 0.0;
+	      else
+		return v.i[HIGH_HALF] < 0 ? 0.0 : INF.x;
+	    }
+	  else if (qx == 0x7ff00000)
+	    return y < 0 ? 0.0 : INF.x;
+	  return (x - x) / (x - x);	/* y not integer and x<0 */
+	}
       else if (qx == 0x7ff00000)
-	return y < 0 ? 0.0 : INF.x;
-      return (x - x) / (x - x);                   /* y not integer and x<0 */
+	{
+	  if (k < 0)
+	    return y < 0 ? nZERO.x : nINF.x;
+	  else
+	    return y < 0 ? 0.0 : INF.x;
+	}
+      /* if y even or odd */
+      return (k == 1) ? __ieee754_pow (-x, y) : -__ieee754_pow (-x, y);
     }
-    else if (qx == 0x7ff00000)
-      {
-	if (k < 0)
-	  return y < 0 ? nZERO.x : nINF.x;
-	else
-	  return y < 0 ? 0.0 : INF.x;
-      }
-    return (k==1)?__ieee754_pow(-x,y):-__ieee754_pow(-x,y); /* if y even or odd */
-  }
   /* x>0 */
 
-  if (qx == 0x7ff00000)                              /* x= 2^-0x3ff */
+  if (qx == 0x7ff00000)		/* x= 2^-0x3ff */
     return y > 0 ? x : 0;
 
-  if (qy > 0x45f00000 && qy < 0x7ff00000) {
-    if (x == 1.0) return 1.0;
-    if (y>0) return (x>1.0)?huge*huge:tiny*tiny;
-    if (y<0) return (x<1.0)?huge*huge:tiny*tiny;
-  }
+  if (qy > 0x45f00000 && qy < 0x7ff00000)
+    {
+      if (x == 1.0)
+	return 1.0;
+      if (y > 0)
+	return (x > 1.0) ? huge * huge : tiny * tiny;
+      if (y < 0)
+	return (x < 1.0) ? huge * huge : tiny * tiny;
+    }
 
-  if (x == 1.0) return 1.0;
-  if (y>0) return (x>1.0)?INF.x:0;
-  if (y<0) return (x<1.0)?INF.x:0;
-  return 0;     /* unreachable, to make the compiler happy */
+  if (x == 1.0)
+    return 1.0;
+  if (y > 0)
+    return (x > 1.0) ? INF.x : 0;
+  if (y < 0)
+    return (x < 1.0) ? INF.x : 0;
+  return 0;			/* unreachable, to make the compiler happy */
 }
+
 #ifndef __ieee754_pow
 strong_alias (__ieee754_pow, __pow_finite)
 #endif
 
-/**************************************************************************/
-/* Computing x^y using more accurate but more slow log routine            */
-/**************************************************************************/
+/* Compute x^y using more accurate but more slow log routine.  */
 static double
 SECTION
-power1(double x, double y) {
-  double z,a,aa,error, t,a1,a2,y1,y2;
-  z = my_log2(x,&aa,&error);
-  t = y*CN;
-  y1 = t - (t-y);
+power1 (double x, double y)
+{
+  double z, a, aa, error, t, a1, a2, y1, y2;
+  z = my_log2 (x, &aa, &error);
+  t = y * CN;
+  y1 = t - (t - y);
   y2 = y - y1;
-  t = z*CN;
-  a1 = t - (t-z);
+  t = z * CN;
+  a1 = t - (t - z);
   a2 = z - a1;
-  a = y*z;
-  aa = ((y1*a1-a)+y1*a2+y2*a1)+y2*a2+aa*y;
-  a1 = a+aa;
-  a2 = (a-a1)+aa;
-  error = error*ABS(y);
-  t = __exp1(a1,a2,1.9e16*error);
-  return (t >= 0)?t:__slowpow(x,y,z);
+  a = y * z;
+  aa = ((y1 * a1 - a) + y1 * a2 + y2 * a1) + y2 * a2 + aa * y;
+  a1 = a + aa;
+  a2 = (a - a1) + aa;
+  error = error * ABS (y);
+  t = __exp1 (a1, a2, 1.9e16 * error);
+  return (t >= 0) ? t : __slowpow (x, y, z);
 }
 
-/****************************************************************************/
-/* Computing log(x) (x is left argument). The result is the returned double */
-/* + the parameter delta.                                                   */
-/* The result is bounded by error (rightmost argument)                      */
-/****************************************************************************/
+/* Compute log(x) (x is left argument). The result is the returned double + the
+   parameter DELTA.  The result is bounded by ERROR.  */
 static double
 SECTION
-log1(double x, double *delta, double *error) {
-  int i,j,m;
-  double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-  mynumber u,v;
+log1 (double x, double *delta, double *error)
+{
+  int i, j, m;
+  double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+  mynumber u, v;
 #ifdef BIG_ENDI
-  mynumber
-/**/ two52          = {{0x43300000, 0x00000000}}; /* 2**52         */
+  mynumber /**/ two52 = {{0x43300000, 0x00000000}};	/* 2**52  */
 #else
-#ifdef LITTLE_ENDI
-  mynumber
-/**/ two52          = {{0x00000000, 0x43300000}}; /* 2**52         */
-#endif
+# ifdef LITTLE_ENDI
+  mynumber /**/ two52 = {{0x00000000, 0x43300000}};	/* 2**52  */
+# endif
 #endif
 
   u.x = x;
   m = u.i[HIGH_HALF];
   *error = 0;
   *delta = 0;
-  if (m < 0x00100000)             /*  1<x<2^-1007 */
-    { x = x*t52.x; add = -52.0; u.x = x; m = u.i[HIGH_HALF];}
+  if (m < 0x00100000)		/*  1<x<2^-1007 */
+    {
+      x = x * t52.x;
+      add = -52.0;
+      u.x = x;
+      m = u.i[HIGH_HALF];
+    }
 
-  if ((m&0x000fffff) < 0x0006a09e)
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+  if ((m & 0x000fffff) < 0x0006a09e)
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+      two52.i[LOW_HALF] = (m >> 20);
+    }
   else
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+      two52.i[LOW_HALF] = (m >> 20) + 1;
+    }
 
   v.x = u.x + bigu.x;
   uu = v.x - bigu.x;
-  i = (v.i[LOW_HALF]&0x000003ff)<<2;
-  if (two52.i[LOW_HALF] == 1023)         /* nx = 0              */
-  {
-      if (i > 1192 && i < 1208)          /* |x-1| < 1.5*2**-10  */
-      {
+  i = (v.i[LOW_HALF] & 0x000003ff) << 2;
+  if (two52.i[LOW_HALF] == 1023)	/* nx = 0              */
+    {
+      if (i > 1192 && i < 1208)	/* |x-1| < 1.5*2**-10  */
+	{
 	  t = x - 1.0;
-	  t1 = (t+5.0e6)-5.0e6;
-	  t2 = t-t1;
-	  e1 = t - 0.5*t1*t1;
-	  e2 = t*t*t*(r3+t*(r4+t*(r5+t*(r6+t*(r7+t*r8)))))-0.5*t2*(t+t1);
-	  res = e1+e2;
-	  *error = 1.0e-21*ABS(t);
-	  *delta = (e1-res)+e2;
+	  t1 = (t + 5.0e6) - 5.0e6;
+	  t2 = t - t1;
+	  e1 = t - 0.5 * t1 * t1;
+	  e2 = (t * t * t * (r3 + t * (r4 + t * (r5 + t * (r6 + t
+							   * (r7 + t * r8)))))
+		- 0.5 * t2 * (t + t1));
+	  res = e1 + e2;
+	  *error = 1.0e-21 * ABS (t);
+	  *delta = (e1 - res) + e2;
 	  return res;
-      }                  /* |x-1| < 1.5*2**-10  */
+	}			/* |x-1| < 1.5*2**-10  */
       else
-      {
-	  v.x = u.x*(ui.x[i]+ui.x[i+1])+bigv.x;
-	  vv = v.x-bigv.x;
-	  j = v.i[LOW_HALF]&0x0007ffff;
-	  j = j+j+j;
-	  eps = u.x - uu*vv;
-	  e1 = eps*ui.x[i];
-	  e2 = eps*(ui.x[i+1]+vj.x[j]*(ui.x[i]+ui.x[i+1]));
-	  e = e1+e2;
-	  e2 =  ((e1-e)+e2);
-	  t=ui.x[i+2]+vj.x[j+1];
-	  t1 = t+e;
-	  t2 = (((t-t1)+e)+(ui.x[i+3]+vj.x[j+2]))+e2+e*e*(p2+e*(p3+e*p4));
-	  res=t1+t2;
+	{
+	  v.x = u.x * (ui.x[i] + ui.x[i + 1]) + bigv.x;
+	  vv = v.x - bigv.x;
+	  j = v.i[LOW_HALF] & 0x0007ffff;
+	  j = j + j + j;
+	  eps = u.x - uu * vv;
+	  e1 = eps * ui.x[i];
+	  e2 = eps * (ui.x[i + 1] + vj.x[j] * (ui.x[i] + ui.x[i + 1]));
+	  e = e1 + e2;
+	  e2 = ((e1 - e) + e2);
+	  t = ui.x[i + 2] + vj.x[j + 1];
+	  t1 = t + e;
+	  t2 = ((((t - t1) + e) + (ui.x[i + 3] + vj.x[j + 2])) + e2 + e * e
+		* (p2 + e * (p3 + e * p4)));
+	  res = t1 + t2;
 	  *error = 1.0e-24;
-	  *delta = (t1-res)+t2;
+	  *delta = (t1 - res) + t2;
 	  return res;
-      }
-  }   /* nx = 0 */
-  else                            /* nx != 0   */
-  {
+	}
+    }				/* nx = 0 */
+  else				/* nx != 0   */
+    {
       eps = u.x - uu;
-      nx = (two52.x - two52e.x)+add;
-      e1 = eps*ui.x[i];
-      e2 = eps*ui.x[i+1];
-      e=e1+e2;
-      e2 = (e1-e)+e2;
-      t=nx*ln2a.x+ui.x[i+2];
-      t1=t+e;
-      t2=(((t-t1)+e)+nx*ln2b.x+ui.x[i+3]+e2)+e*e*(q2+e*(q3+e*(q4+e*(q5+e*q6))));
-      res = t1+t2;
+      nx = (two52.x - two52e.x) + add;
+      e1 = eps * ui.x[i];
+      e2 = eps * ui.x[i + 1];
+      e = e1 + e2;
+      e2 = (e1 - e) + e2;
+      t = nx * ln2a.x + ui.x[i + 2];
+      t1 = t + e;
+      t2 = ((((t - t1) + e) + nx * ln2b.x + ui.x[i + 3] + e2) + e * e
+	    * (q2 + e * (q3 + e * (q4 + e * (q5 + e * q6)))));
+      res = t1 + t2;
       *error = 1.0e-21;
-      *delta = (t1-res)+t2;
+      *delta = (t1 - res) + t2;
       return res;
-  }                                /* nx != 0   */
+    }				/* nx != 0   */
 }
 
-/****************************************************************************/
-/* More slow but more accurate routine of log                               */
-/* Computing log(x)(x is left argument).The result is return double + delta.*/
-/* The result is bounded by error (right argument)                           */
-/****************************************************************************/
+/* Slower but more accurate routine of log.  The returned result is double +
+   DELTA.  The result is bounded by ERROR.  */
 static double
 SECTION
-my_log2(double x, double *delta, double *error) {
-  int i,j,m;
-  double uu,vv,eps,nx,e,e1,e2,t,t1,t2,res,add=0;
-  double ou1,ou2,lu1,lu2,ov,lv1,lv2,a,a1,a2;
-  double y,yy,z,zz,j1,j2,j7,j8;
+my_log2 (double x, double *delta, double *error)
+{
+  int i, j, m;
+  double uu, vv, eps, nx, e, e1, e2, t, t1, t2, res, add = 0;
+  double ou1, ou2, lu1, lu2, ov, lv1, lv2, a, a1, a2;
+  double y, yy, z, zz, j1, j2, j7, j8;
 #ifndef DLA_FMS
-  double j3,j4,j5,j6;
+  double j3, j4, j5, j6;
 #endif
-  mynumber u,v;
+  mynumber u, v;
 #ifdef BIG_ENDI
-  mynumber
-/**/ two52          = {{0x43300000, 0x00000000}}; /* 2**52         */
+  mynumber /**/ two52 = {{0x43300000, 0x00000000}};	/* 2**52  */
 #else
-#ifdef LITTLE_ENDI
-  mynumber
-/**/ two52          = {{0x00000000, 0x43300000}}; /* 2**52         */
-#endif
+# ifdef LITTLE_ENDI
+  mynumber /**/ two52 = {{0x00000000, 0x43300000}};	/* 2**52  */
+# endif
 #endif
 
   u.x = x;
   m = u.i[HIGH_HALF];
   *error = 0;
   *delta = 0;
-  add=0;
-  if (m<0x00100000) {  /* x < 2^-1022 */
-    x = x*t52.x;  add = -52.0; u.x = x; m = u.i[HIGH_HALF]; }
+  add = 0;
+  if (m < 0x00100000)
+    {				/* x < 2^-1022 */
+      x = x * t52.x;
+      add = -52.0;
+      u.x = x;
+      m = u.i[HIGH_HALF];
+    }
 
-  if ((m&0x000fffff) < 0x0006a09e)
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3ff00000; two52.i[LOW_HALF]=(m>>20); }
+  if ((m & 0x000fffff) < 0x0006a09e)
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3ff00000;
+      two52.i[LOW_HALF] = (m >> 20);
+    }
   else
-    {u.i[HIGH_HALF] = (m&0x000fffff)|0x3fe00000; two52.i[LOW_HALF]=(m>>20)+1; }
+    {
+      u.i[HIGH_HALF] = (m & 0x000fffff) | 0x3fe00000;
+      two52.i[LOW_HALF] = (m >> 20) + 1;
+    }
 
   v.x = u.x + bigu.x;
   uu = v.x - bigu.x;
-  i = (v.i[LOW_HALF]&0x000003ff)<<2;
+  i = (v.i[LOW_HALF] & 0x000003ff) << 2;
   /*------------------------------------- |x-1| < 2**-11-------------------------------  */
-  if ((two52.i[LOW_HALF] == 1023)  && (i == 1200))
-  {
+  if ((two52.i[LOW_HALF] == 1023) && (i == 1200))
+    {
       t = x - 1.0;
-      EMULV(t,s3,y,yy,j1,j2,j3,j4,j5);
-      ADD2(-0.5,0,y,yy,z,zz,j1,j2);
-      MUL2(t,0,z,zz,y,yy,j1,j2,j3,j4,j5,j6,j7,j8);
-      MUL2(t,0,y,yy,z,zz,j1,j2,j3,j4,j5,j6,j7,j8);
+      EMULV (t, s3, y, yy, j1, j2, j3, j4, j5);
+      ADD2 (-0.5, 0, y, yy, z, zz, j1, j2);
+      MUL2 (t, 0, z, zz, y, yy, j1, j2, j3, j4, j5, j6, j7, j8);
+      MUL2 (t, 0, y, yy, z, zz, j1, j2, j3, j4, j5, j6, j7, j8);
 
-      e1 = t+z;
-      e2 = (((t-e1)+z)+zz)+t*t*t*(ss3+t*(s4+t*(s5+t*(s6+t*(s7+t*s8)))));
-      res = e1+e2;
-      *error = 1.0e-25*ABS(t);
-      *delta = (e1-res)+e2;
+      e1 = t + z;
+      e2 = ((((t - e1) + z) + zz) + t * t * t
+	    * (ss3 + t * (s4 + t * (s5 + t * (s6 + t * (s7 + t * s8))))));
+      res = e1 + e2;
+      *error = 1.0e-25 * ABS (t);
+      *delta = (e1 - res) + e2;
       return res;
-  }
+    }
   /*----------------------------- |x-1| > 2**-11  --------------------------  */
   else
-  {          /*Computing log(x) according to log table                        */
-      nx = (two52.x - two52e.x)+add;
+    {				/*Computing log(x) according to log table                        */
+      nx = (two52.x - two52e.x) + add;
       ou1 = ui.x[i];
-      ou2 = ui.x[i+1];
-      lu1 = ui.x[i+2];
-      lu2 = ui.x[i+3];
-      v.x = u.x*(ou1+ou2)+bigv.x;
-      vv = v.x-bigv.x;
-      j = v.i[LOW_HALF]&0x0007ffff;
-      j = j+j+j;
-      eps = u.x - uu*vv;
-      ov  = vj.x[j];
-      lv1 = vj.x[j+1];
-      lv2 = vj.x[j+2];
-      a = (ou1+ou2)*(1.0+ov);
-      a1 = (a+1.0e10)-1.0e10;
-      a2 = a*(1.0-a1*uu*vv);
-      e1 = eps*a1;
-      e2 = eps*a2;
-      e = e1+e2;
-      e2 = (e1-e)+e2;
-      t=nx*ln2a.x+lu1+lv1;
-      t1 = t+e;
-      t2 = (((t-t1)+e)+(lu2+lv2+nx*ln2b.x+e2))+e*e*(p2+e*(p3+e*p4));
-      res=t1+t2;
+      ou2 = ui.x[i + 1];
+      lu1 = ui.x[i + 2];
+      lu2 = ui.x[i + 3];
+      v.x = u.x * (ou1 + ou2) + bigv.x;
+      vv = v.x - bigv.x;
+      j = v.i[LOW_HALF] & 0x0007ffff;
+      j = j + j + j;
+      eps = u.x - uu * vv;
+      ov = vj.x[j];
+      lv1 = vj.x[j + 1];
+      lv2 = vj.x[j + 2];
+      a = (ou1 + ou2) * (1.0 + ov);
+      a1 = (a + 1.0e10) - 1.0e10;
+      a2 = a * (1.0 - a1 * uu * vv);
+      e1 = eps * a1;
+      e2 = eps * a2;
+      e = e1 + e2;
+      e2 = (e1 - e) + e2;
+      t = nx * ln2a.x + lu1 + lv1;
+      t1 = t + e;
+      t2 = ((((t - t1) + e) + (lu2 + lv2 + nx * ln2b.x + e2)) + e * e
+	    * (p2 + e * (p3 + e * p4)));
+      res = t1 + t2;
       *error = 1.0e-27;
-      *delta = (t1-res)+t2;
+      *delta = (t1 - res) + t2;
       return res;
-  }
+    }
 }
 
-/**********************************************************************/
-/* Routine receives a double x and checks if it is an integer. If not */
-/* it returns 0, else it returns 1 if even or -1 if odd.              */
-/**********************************************************************/
+/* This function receives a double x and checks if it is an integer.  If not,
+   it returns 0, else it returns 1 if even or -1 if odd.  */
 static int
 SECTION
-checkint(double x) {
-  union {int4 i[2]; double x;} u;
-  int k,m,n;
+checkint (double x)
+{
+  union
+  {
+    int4 i[2];
+    double x;
+  } u;
+  int k, m, n;
   u.x = x;
-  m = u.i[HIGH_HALF]&0x7fffffff;    /* no sign */
-  if (m >= 0x7ff00000) return 0;    /*  x is +/-inf or NaN  */
-  if (m >= 0x43400000) return 1;    /*  |x| >= 2**53   */
-  if (m < 0x40000000) return 0;     /* |x| < 2,  can not be 0 or 1  */
+  m = u.i[HIGH_HALF] & 0x7fffffff;	/* no sign */
+  if (m >= 0x7ff00000)
+    return 0;			/*  x is +/-inf or NaN  */
+  if (m >= 0x43400000)
+    return 1;			/*  |x| >= 2**53   */
+  if (m < 0x40000000)
+    return 0;			/* |x| < 2,  can not be 0 or 1  */
   n = u.i[LOW_HALF];
-  k = (m>>20)-1023;                 /*  1 <= k <= 52   */
-  if (k == 52) return (n&1)? -1:1;  /* odd or even*/
-  if (k>20) {
-    if (n<<(k-20)) return 0;        /* if not integer */
-    return (n<<(k-21))?-1:1;
-  }
-  if (n) return 0;                  /*if  not integer*/
-  if (k == 20) return (m&1)? -1:1;
-  if (m<<(k+12)) return 0;
-  return (m<<(k+11))?-1:1;
+  k = (m >> 20) - 1023;		/*  1 <= k <= 52   */
+  if (k == 52)
+    return (n & 1) ? -1 : 1;	/* odd or even */
+  if (k > 20)
+    {
+      if (n << (k - 20))
+	return 0;		/* if not integer */
+      return (n << (k - 21)) ? -1 : 1;
+    }
+  if (n)
+    return 0;			/*if  not integer */
+  if (k == 20)
+    return (m & 1) ? -1 : 1;
+  if (m << (k + 12))
+    return 0;
+  return (m << (k + 11)) ? -1 : 1;
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c b/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c
index 4478be0b0..2f55ca294 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_rem_pio2.c
@@ -57,110 +57,137 @@ static const int32_t npio2_hw[] = {
  */
 
 static const double
-zero =  0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
-half =  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
-two24 =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
-invpio2 =  6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
-pio2_1  =  1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
-pio2_1t =  6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
-pio2_2  =  6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
-pio2_2t =  2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
-pio2_3  =  2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
-pio2_3t =  8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
+  zero    = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
+  half    = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+  two24   = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
+  invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
+  pio2_1  = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
+  pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
+  pio2_2  = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
+  pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
+  pio2_3  = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
+  pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
 
 int32_t
-__ieee754_rem_pio2(double x, double *y)
+__ieee754_rem_pio2 (double x, double *y)
 {
-	double z,w,t,r,fn;
-	double tx[3];
-	int32_t e0,i,j,nx,n,ix,hx;
-	u_int32_t low;
+  double z, w, t, r, fn;
+  double tx[3];
+  int32_t e0, i, j, nx, n, ix, hx;
+  u_int32_t low;
 
-	GET_HIGH_WORD(hx,x);		/* high word of x */
-	ix = hx&0x7fffffff;
-	if(ix<=0x3fe921fb)   /* |x| ~<= pi/4 , no need for reduction */
-	    {y[0] = x; y[1] = 0; return 0;}
-	if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
-	    if(hx>0) {
-		z = x - pio2_1;
-		if(ix!=0x3ff921fb) {	/* 33+53 bit pi is good enough */
-		    y[0] = z - pio2_1t;
-		    y[1] = (z-y[0])-pio2_1t;
-		} else {		/* near pi/2, use 33+33+53 bit pi */
-		    z -= pio2_2;
-		    y[0] = z - pio2_2t;
-		    y[1] = (z-y[0])-pio2_2t;
-		}
-		return 1;
-	    } else {	/* negative x */
-		z = x + pio2_1;
-		if(ix!=0x3ff921fb) {	/* 33+53 bit pi is good enough */
-		    y[0] = z + pio2_1t;
-		    y[1] = (z-y[0])+pio2_1t;
-		} else {		/* near pi/2, use 33+33+53 bit pi */
-		    z += pio2_2;
-		    y[0] = z + pio2_2t;
-		    y[1] = (z-y[0])+pio2_2t;
-		}
-		return -1;
+  GET_HIGH_WORD (hx, x);                /* high word of x */
+  ix = hx & 0x7fffffff;
+  if (ix <= 0x3fe921fb)      /* |x| ~<= pi/4 , no need for reduction */
+    {
+      y[0] = x; y[1] = 0; return 0;
+    }
+  if (ix < 0x4002d97c)       /* |x| < 3pi/4, special case with n=+-1 */
+    {
+      if (hx > 0)
+	{
+	  z = x - pio2_1;
+	  if (ix != 0x3ff921fb)         /* 33+53 bit pi is good enough */
+	    {
+	      y[0] = z - pio2_1t;
+	      y[1] = (z - y[0]) - pio2_1t;
+	    }
+	  else                          /* near pi/2, use 33+33+53 bit pi */
+	    {
+	      z -= pio2_2;
+	      y[0] = z - pio2_2t;
+	      y[1] = (z - y[0]) - pio2_2t;
 	    }
+	  return 1;
 	}
-	if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
-	    t  = fabs(x);
-	    n  = (int32_t) (t*invpio2+half);
-	    fn = (double)n;
-	    r  = t-fn*pio2_1;
-	    w  = fn*pio2_1t;	/* 1st round good to 85 bit */
-	    if(n<32&&ix!=npio2_hw[n-1]) {
-		y[0] = r-w;	/* quick check no cancellation */
-	    } else {
-		u_int32_t high;
-		j  = ix>>20;
-		y[0] = r-w;
-		GET_HIGH_WORD(high,y[0]);
-		i = j-((high>>20)&0x7ff);
-		if(i>16) {  /* 2nd iteration needed, good to 118 */
-		    t  = r;
-		    w  = fn*pio2_2;
-		    r  = t-w;
-		    w  = fn*pio2_2t-((t-r)-w);
-		    y[0] = r-w;
-		    GET_HIGH_WORD(high,y[0]);
-		    i = j-((high>>20)&0x7ff);
-		    if(i>49)  {	/* 3rd iteration need, 151 bits acc */
-			t  = r;	/* will cover all possible cases */
-			w  = fn*pio2_3;
-			r  = t-w;
-			w  = fn*pio2_3t-((t-r)-w);
-			y[0] = r-w;
-		    }
-		}
+      else              /* negative x */
+	{
+	  z = x + pio2_1;
+	  if (ix != 0x3ff921fb)         /* 33+53 bit pi is good enough */
+	    {
+	      y[0] = z + pio2_1t;
+	      y[1] = (z - y[0]) + pio2_1t;
+	    }
+	  else                          /* near pi/2, use 33+33+53 bit pi */
+	    {
+	      z += pio2_2;
+	      y[0] = z + pio2_2t;
+	      y[1] = (z - y[0]) + pio2_2t;
 	    }
-	    y[1] = (r-y[0])-w;
-	    if(hx<0)	{y[0] = -y[0]; y[1] = -y[1]; return -n;}
-	    else	 return n;
+	  return -1;
 	}
-    /*
-     * all other (large) arguments
-     */
-	if(ix>=0x7ff00000) {		/* x is inf or NaN */
-	    y[0]=y[1]=x-x; return 0;
+    }
+  if (ix <= 0x413921fb)      /* |x| ~<= 2^19*(pi/2), medium size */
+    {
+      t = fabs (x);
+      n = (int32_t) (t * invpio2 + half);
+      fn = (double) n;
+      r = t - fn * pio2_1;
+      w = fn * pio2_1t;         /* 1st round good to 85 bit */
+      if (n < 32 && ix != npio2_hw[n - 1])
+	{
+	  y[0] = r - w;         /* quick check no cancellation */
 	}
-    /* set z = scalbn(|x|,ilogb(x)-23) */
-	GET_LOW_WORD(low,x);
-	SET_LOW_WORD(z,low);
-	e0	= (ix>>20)-1046;	/* e0 = ilogb(z)-23; */
-	SET_HIGH_WORD(z, ix - ((int32_t)(e0<<20)));
-	for(i=0;i<2;i++) {
-		tx[i] = (double)((int32_t)(z));
-		z     = (z-tx[i])*two24;
+      else
+	{
+	  u_int32_t high;
+	  j = ix >> 20;
+	  y[0] = r - w;
+	  GET_HIGH_WORD (high, y[0]);
+	  i = j - ((high >> 20) & 0x7ff);
+	  if (i > 16)       /* 2nd iteration needed, good to 118 */
+	    {
+	      t = r;
+	      w = fn * pio2_2;
+	      r = t - w;
+	      w = fn * pio2_2t - ((t - r) - w);
+	      y[0] = r - w;
+	      GET_HIGH_WORD (high, y[0]);
+	      i = j - ((high >> 20) & 0x7ff);
+	      if (i > 49)       /* 3rd iteration need, 151 bits acc */
+		{
+		  t = r;        /* will cover all possible cases */
+		  w = fn * pio2_3;
+		  r = t - w;
+		  w = fn * pio2_3t - ((t - r) - w);
+		  y[0] = r - w;
+		}
+	    }
 	}
-	tx[2] = z;
-	nx = 3;
-	while(tx[nx-1]==zero) nx--;	/* skip zero term */
-	n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
-	if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
+      y[1] = (r - y[0]) - w;
+      if (hx < 0)
+	{
+	  y[0] = -y[0]; y[1] = -y[1]; return -n;
+	}
+      else
 	return n;
+    }
+  /*
+   * all other (large) arguments
+   */
+  if (ix >= 0x7ff00000)                 /* x is inf or NaN */
+    {
+      y[0] = y[1] = x - x; return 0;
+    }
+  /* set z = scalbn(|x|,ilogb(x)-23) */
+  GET_LOW_WORD (low, x);
+  SET_LOW_WORD (z, low);
+  e0 = (ix >> 20) - 1046;               /* e0 = ilogb(z)-23; */
+  SET_HIGH_WORD (z, ix - ((int32_t) (e0 << 20)));
+  for (i = 0; i < 2; i++)
+    {
+      tx[i] = (double) ((int32_t) (z));
+      z = (z - tx[i]) * two24;
+    }
+  tx[2] = z;
+  nx = 3;
+  while (tx[nx - 1] == zero)
+    nx--;                               /* skip zero term */
+  n = __kernel_rem_pio2 (tx, y, e0, nx, 2, two_over_pi);
+  if (hx < 0)
+    {
+      y[0] = -y[0]; y[1] = -y[1]; return -n;
+    }
+  return n;
 }
-
 #endif
diff --git a/libc/sysdeps/ieee754/dbl-64/e_remainder.c b/libc/sysdeps/ieee754/dbl-64/e_remainder.c
index 2d20bb1df..c6a1901b1 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_remainder.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_remainder.c
@@ -39,89 +39,111 @@
 /* An ultimate remainder routine. Given two IEEE double machine numbers x */
 /* ,y   it computes the correctly rounded (to nearest) value of remainder */
 /**************************************************************************/
-double __ieee754_remainder(double x, double y)
+double
+__ieee754_remainder (double x, double y)
 {
-  double z,d,xx;
-  int4 kx,ky,n,nn,n1,m1,l;
-  mynumber u,t,w={{0,0}},v={{0,0}},ww={{0,0}},r;
-  u.x=x;
-  t.x=y;
-  kx=u.i[HIGH_HALF]&0x7fffffff; /* no sign  for x*/
-  t.i[HIGH_HALF]&=0x7fffffff;   /*no sign for y */
-  ky=t.i[HIGH_HALF];
+  double z, d, xx;
+  int4 kx, ky, n, nn, n1, m1, l;
+  mynumber u, t, w = { { 0, 0 } }, v = { { 0, 0 } }, ww = { { 0, 0 } }, r;
+  u.x = x;
+  t.x = y;
+  kx = u.i[HIGH_HALF] & 0x7fffffff; /* no sign  for x*/
+  t.i[HIGH_HALF] &= 0x7fffffff;   /*no sign for y */
+  ky = t.i[HIGH_HALF];
   /*------ |x| < 2^1023  and   2^-970 < |y| < 2^1024 ------------------*/
-  if (kx<0x7fe00000 && ky<0x7ff00000 && ky>=0x03500000) {
-    SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
-    if (kx+0x00100000<ky) return x;
-    if ((kx-0x01500000)<ky) {
-      z=x/t.x;
-      v.i[HIGH_HALF]=t.i[HIGH_HALF];
-      d=(z+big.x)-big.x;
-      xx=(x-d*v.x)-d*(t.x-v.x);
-      if (d-z!=0.5&&d-z!=-0.5) return (xx!=0)?xx:((x>0)?ZERO.x:nZERO.x);
-      else {
-	if (ABS(xx)>0.5*t.x) return (z>d)?xx-t.x:xx+t.x;
-	else return xx;
-      }
-    }   /*    (kx<(ky+0x01500000))         */
-    else  {
-      r.x=1.0/t.x;
-      n=t.i[HIGH_HALF];
-      nn=(n&0x7ff00000)+0x01400000;
-      w.i[HIGH_HALF]=n;
-      ww.x=t.x-w.x;
-      l=(kx-nn)&0xfff00000;
-      n1=ww.i[HIGH_HALF];
-      m1=r.i[HIGH_HALF];
-      while (l>0) {
-	r.i[HIGH_HALF]=m1-l;
-	z=u.x*r.x;
-	w.i[HIGH_HALF]=n+l;
-	ww.i[HIGH_HALF]=(n1)?n1+l:n1;
-	d=(z+big.x)-big.x;
-	u.x=(u.x-d*w.x)-d*ww.x;
-	l=(u.i[HIGH_HALF]&0x7ff00000)-nn;
-      }
-      r.i[HIGH_HALF]=m1;
-      w.i[HIGH_HALF]=n;
-      ww.i[HIGH_HALF]=n1;
-      z=u.x*r.x;
-      d=(z+big.x)-big.x;
-      u.x=(u.x-d*w.x)-d*ww.x;
-      if (ABS(u.x)<0.5*t.x) return (u.x!=0)?u.x:((x>0)?ZERO.x:nZERO.x);
+  if (kx < 0x7fe00000 && ky < 0x7ff00000 && ky >= 0x03500000)
+    {
+      SET_RESTORE_ROUND_NOEX (FE_TONEAREST);
+      if (kx + 0x00100000 < ky)
+	return x;
+      if ((kx - 0x01500000) < ky)
+	{
+	  z = x / t.x;
+	  v.i[HIGH_HALF] = t.i[HIGH_HALF];
+	  d = (z + big.x) - big.x;
+	  xx = (x - d * v.x) - d * (t.x - v.x);
+	  if (d - z != 0.5 && d - z != -0.5)
+	    return (xx != 0) ? xx : ((x > 0) ? ZERO.x : nZERO.x);
+	  else
+	    {
+	      if (ABS (xx) > 0.5 * t.x)
+		return (z > d) ? xx - t.x : xx + t.x;
+	      else
+		return xx;
+	    }
+	} /*    (kx<(ky+0x01500000))         */
       else
-	if (ABS(u.x)>0.5*t.x) return (d>z)?u.x+t.x:u.x-t.x;
-	else
-	{z=u.x/t.x; d=(z+big.x)-big.x; return ((u.x-d*w.x)-d*ww.x);}
-    }
-
-  }   /*   (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000)     */
-  else {
-    if (kx<0x7fe00000&&ky<0x7ff00000&&(ky>0||t.i[LOW_HALF]!=0)) {
-      y=ABS(y)*t128.x;
-      z=__ieee754_remainder(x,y)*t128.x;
-      z=__ieee754_remainder(z,y)*tm128.x;
-      return z;
-    }
-  else {
-    if ((kx&0x7ff00000)==0x7fe00000&&ky<0x7ff00000&&(ky>0||t.i[LOW_HALF]!=0)) {
-      y=ABS(y);
-      z=2.0*__ieee754_remainder(0.5*x,y);
-      d = ABS(z);
-      if (d <= ABS(d-y)) return z;
-      else return (z>0)?z-y:z+y;
-    }
-    else { /* if x is too big */
-      if (ky==0 && t.i[LOW_HALF] == 0)		/* y = 0 */
-	return (x * y) / (x * y);
-      else if (kx >= 0x7ff00000			/* x not finite */
-	       || (ky>0x7ff00000		/* y is NaN */
-		   || (ky == 0x7ff00000 && t.i[LOW_HALF] != 0)))
-	return (x * y) / (x * y);
+	{
+	  r.x = 1.0 / t.x;
+	  n = t.i[HIGH_HALF];
+	  nn = (n & 0x7ff00000) + 0x01400000;
+	  w.i[HIGH_HALF] = n;
+	  ww.x = t.x - w.x;
+	  l = (kx - nn) & 0xfff00000;
+	  n1 = ww.i[HIGH_HALF];
+	  m1 = r.i[HIGH_HALF];
+	  while (l > 0)
+	    {
+	      r.i[HIGH_HALF] = m1 - l;
+	      z = u.x * r.x;
+	      w.i[HIGH_HALF] = n + l;
+	      ww.i[HIGH_HALF] = (n1) ? n1 + l : n1;
+	      d = (z + big.x) - big.x;
+	      u.x = (u.x - d * w.x) - d * ww.x;
+	      l = (u.i[HIGH_HALF] & 0x7ff00000) - nn;
+	    }
+	  r.i[HIGH_HALF] = m1;
+	  w.i[HIGH_HALF] = n;
+	  ww.i[HIGH_HALF] = n1;
+	  z = u.x * r.x;
+	  d = (z + big.x) - big.x;
+	  u.x = (u.x - d * w.x) - d * ww.x;
+	  if (ABS (u.x) < 0.5 * t.x)
+	    return (u.x != 0) ? u.x : ((x > 0) ? ZERO.x : nZERO.x);
+	  else
+	  if (ABS (u.x) > 0.5 * t.x)
+	    return (d > z) ? u.x + t.x : u.x - t.x;
+	  else
+	    {
+	      z = u.x / t.x; d = (z + big.x) - big.x;
+              return ((u.x - d * w.x) - d * ww.x);
+	    }
+	}
+    } /*   (kx<0x7fe00000&&ky<0x7ff00000&&ky>=0x03500000)     */
+  else
+    {
+      if (kx < 0x7fe00000 && ky < 0x7ff00000 && (ky > 0 || t.i[LOW_HALF] != 0))
+	{
+	  y = ABS (y) * t128.x;
+	  z = __ieee754_remainder (x, y) * t128.x;
+	  z = __ieee754_remainder (z, y) * tm128.x;
+	  return z;
+	}
       else
-	return x;
+	{
+	  if ((kx & 0x7ff00000) == 0x7fe00000 && ky < 0x7ff00000 &&
+              (ky > 0 || t.i[LOW_HALF] != 0))
+	    {
+	      y = ABS (y);
+	      z = 2.0 * __ieee754_remainder (0.5 * x, y);
+	      d = ABS (z);
+	      if (d <= ABS (d - y))
+		return z;
+	      else
+		return (z > 0) ? z - y : z + y;
+	    }
+	  else /* if x is too big */
+	    {
+	      if (ky == 0 && t.i[LOW_HALF] == 0) /* y = 0 */
+		return (x * y) / (x * y);
+	      else if (kx >= 0x7ff00000         /* x not finite */
+		       || (ky > 0x7ff00000      /* y is NaN */
+			   || (ky == 0x7ff00000 && t.i[LOW_HALF] != 0)))
+		return (x * y) / (x * y);
+	      else
+		return x;
+	    }
+	}
     }
-   }
-  }
 }
 strong_alias (__ieee754_remainder, __remainder_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_sinh.c b/libc/sysdeps/ieee754/dbl-64/e_sinh.c
index b954100ba..851b510aa 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_sinh.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_sinh.c
@@ -38,43 +38,50 @@ static char rcsid[] = "$NetBSD: e_sinh.c,v 1.7 1995/05/10 20:46:13 jtc Exp $";
 static const double one = 1.0, shuge = 1.0e307;
 
 double
-__ieee754_sinh(double x)
+__ieee754_sinh (double x)
 {
-	double t,w,h;
-	int32_t ix,jx;
-	u_int32_t lx;
+  double t, w, h;
+  int32_t ix, jx;
+  u_int32_t lx;
 
-    /* High word of |x|. */
-	GET_HIGH_WORD(jx,x);
-	ix = jx&0x7fffffff;
+  /* High word of |x|. */
+  GET_HIGH_WORD (jx, x);
+  ix = jx & 0x7fffffff;
 
-    /* x is INF or NaN */
-	if(__builtin_expect(ix>=0x7ff00000, 0)) return x+x;
+  /* x is INF or NaN */
+  if (__builtin_expect (ix >= 0x7ff00000, 0))
+    return x + x;
 
-	h = 0.5;
-	if (jx<0) h = -h;
-    /* |x| in [0,22], return sign(x)*0.5*(E+E/(E+1))) */
-	if (ix < 0x40360000) {		/* |x|<22 */
-	    if (__builtin_expect(ix<0x3e300000, 0))	/* |x|<2**-28 */
-		if(shuge+x>one)
-		    return x;/* sinh(tiny) = tiny with inexact */
-	    t = __expm1(fabs(x));
-	    if(ix<0x3ff00000) return h*(2.0*t-t*t/(t+one));
-	    return h*(t+t/(t+one));
-	}
+  h = 0.5;
+  if (jx < 0)
+    h = -h;
+  /* |x| in [0,22], return sign(x)*0.5*(E+E/(E+1))) */
+  if (ix < 0x40360000)                  /* |x|<22 */
+    {
+      if (__builtin_expect (ix < 0x3e300000, 0))        /* |x|<2**-28 */
+	if (shuge + x > one)
+	  return x;
+      /* sinh(tiny) = tiny with inexact */
+      t = __expm1 (fabs (x));
+      if (ix < 0x3ff00000)
+	return h * (2.0 * t - t * t / (t + one));
+      return h * (t + t / (t + one));
+    }
 
-    /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */
-	if (ix < 0x40862e42)  return h*__ieee754_exp(fabs(x));
+  /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */
+  if (ix < 0x40862e42)
+    return h * __ieee754_exp (fabs (x));
 
-    /* |x| in [log(maxdouble), overflowthresold] */
-	GET_LOW_WORD(lx,x);
-	if (ix<0x408633ce || ((ix==0x408633ce)&&(lx<=(u_int32_t)0x8fb9f87d))) {
-	    w = __ieee754_exp(0.5*fabs(x));
-	    t = h*w;
-	    return t*w;
-	}
+  /* |x| in [log(maxdouble), overflowthresold] */
+  GET_LOW_WORD (lx, x);
+  if (ix < 0x408633ce || ((ix == 0x408633ce) && (lx <= (u_int32_t) 0x8fb9f87d)))
+    {
+      w = __ieee754_exp (0.5 * fabs (x));
+      t = h * w;
+      return t * w;
+    }
 
-    /* |x| > overflowthresold, sinh(x) overflow */
-	return x*shuge;
+  /* |x| > overflowthresold, sinh(x) overflow */
+  return x * shuge;
 }
 strong_alias (__ieee754_sinh, __sinh_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/e_sqrt.c b/libc/sysdeps/ieee754/dbl-64/e_sqrt.c
index 54610eeea..854ae38c4 100644
--- a/libc/sysdeps/ieee754/dbl-64/e_sqrt.c
+++ b/libc/sysdeps/ieee754/dbl-64/e_sqrt.c
@@ -44,58 +44,67 @@
 /* it computes the correctly rounded (to nearest) value of square    */
 /* root of x.                                                        */
 /*********************************************************************/
-double __ieee754_sqrt(double x) {
+double
+__ieee754_sqrt (double x)
+{
 #include "uroot.h"
   static const double
     rt0 = 9.99999999859990725855365213134618E-01,
     rt1 = 4.99999999495955425917856814202739E-01,
     rt2 = 3.75017500867345182581453026130850E-01,
     rt3 = 3.12523626554518656309172508769531E-01;
-  static const double big =  134217728.0;
-  double y,t,del,res,res1,hy,z,zz,p,hx,tx,ty,s;
-  mynumber a,c={{0,0}};
+  static const double big = 134217728.0;
+  double y, t, del, res, res1, hy, z, zz, p, hx, tx, ty, s;
+  mynumber a, c = { { 0, 0 } };
   int4 k;
 
-  a.x=x;
-  k=a.i[HIGH_HALF];
-  a.i[HIGH_HALF]=(k&0x001fffff)|0x3fe00000;
-  t=inroot[(k&0x001fffff)>>14];
-  s=a.x;
+  a.x = x;
+  k = a.i[HIGH_HALF];
+  a.i[HIGH_HALF] = (k & 0x001fffff) | 0x3fe00000;
+  t = inroot[(k & 0x001fffff) >> 14];
+  s = a.x;
   /*----------------- 2^-1022  <= | x |< 2^1024  -----------------*/
-  if (k>0x000fffff && k<0x7ff00000) {
-    fenv_t env;
-    libc_feholdexcept (&env);
-    double ret;
-    y=1.0-t*(t*s);
-    t=t*(rt0+y*(rt1+y*(rt2+y*rt3)));
-    c.i[HIGH_HALF]=0x20000000+((k&0x7fe00000)>>1);
-    y=t*s;
-    hy=(y+big)-big;
-    del=0.5*t*((s-hy*hy)-(y-hy)*(y+hy));
-    res=y+del;
-    if (res == (res+1.002*((y-res)+del))) ret = res*c.x;
-    else {
-      res1=res+1.5*((y-res)+del);
-      EMULV(res,res1,z,zz,p,hx,tx,hy,ty);  /* (z+zz)=res*res1 */
-      ret = ((((z-s)+zz)<0)?max(res,res1):min(res,res1))*c.x;
+  if (k > 0x000fffff && k < 0x7ff00000)
+    {
+      fenv_t env;
+      libc_feholdexcept (&env);
+      double ret;
+      y = 1.0 - t * (t * s);
+      t = t * (rt0 + y * (rt1 + y * (rt2 + y * rt3)));
+      c.i[HIGH_HALF] = 0x20000000 + ((k & 0x7fe00000) >> 1);
+      y = t * s;
+      hy = (y + big) - big;
+      del = 0.5 * t * ((s - hy * hy) - (y - hy) * (y + hy));
+      res = y + del;
+      if (res == (res + 1.002 * ((y - res) + del)))
+	ret = res * c.x;
+      else
+	{
+	  res1 = res + 1.5 * ((y - res) + del);
+	  EMULV (res, res1, z, zz, p, hx, tx, hy, ty); /* (z+zz)=res*res1 */
+	  ret = ((((z - s) + zz) < 0) ? max (res, res1) :
+					min (res, res1)) * c.x;
+	}
+      math_force_eval (ret);
+      libc_fesetenv (&env);
+      if (x / ret != ret)
+	{
+	  double force_inexact = 1.0 / 3.0;
+	  math_force_eval (force_inexact);
+	}
+      /* Otherwise (x / ret == ret), either the square root was exact or
+         the division was inexact.  */
+      return ret;
+    }
+  else
+    {
+      if ((k & 0x7ff00000) == 0x7ff00000)
+	return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
+      if (x == 0)
+	return x;       /* sqrt(+0)=+0, sqrt(-0)=-0 */
+      if (k < 0)
+	return (x - x) / (x - x); /* sqrt(-ve)=sNaN */
+      return tm256.x * __ieee754_sqrt (x * t512.x);
     }
-    math_force_eval (ret);
-    libc_fesetenv (&env);
-    if (x / ret != ret)
-      {
-	double force_inexact = 1.0 / 3.0;
-	math_force_eval (force_inexact);
-      }
-    /* Otherwise (x / ret == ret), either the square root was exact or
-       the division was inexact.  */
-    return ret;
-  }
-  else {
-    if ((k & 0x7ff00000) == 0x7ff00000)
-      return x*x+x;	/* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
-    if (x==0) return x;	/* sqrt(+0)=+0, sqrt(-0)=-0 */
-    if (k<0) return (x-x)/(x-x); /* sqrt(-ve)=sNaN */
-    return tm256.x*__ieee754_sqrt(x*t512.x);
-  }
 }
 strong_alias (__ieee754_sqrt, __sqrt_finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/halfulp.c b/libc/sysdeps/ieee754/dbl-64/halfulp.c
index 8c5a9312a..382ad7aad 100644
--- a/libc/sysdeps/ieee754/dbl-64/halfulp.c
+++ b/libc/sysdeps/ieee754/dbl-64/halfulp.c
@@ -44,86 +44,109 @@
 #endif
 
 static const int4 tab54[32] = {
-   262143, 11585, 1782, 511, 210, 107, 63, 42,
-       30,    22,   17,  14,  12,  10,  9,  7,
-	7,     6,    5,   5,   5,   4,  4,  4,
-	3,     3,    3,   3,   3,   3,  3,  3 };
+  262143, 11585,  1782, 511, 210, 107, 63, 42,
+  30,     22,     17,   14,  12,  10,  9, 7,
+  7,      6,      5,    5,   5,   4,   4, 4,
+  3,      3,      3,    3,   3,   3,   3, 3
+};
 
 
 double
 SECTION
-__halfulp(double x, double y)
+__halfulp (double x, double y)
 {
   mynumber v;
-  double z,u,uu;
+  double z, u, uu;
 #ifndef DLA_FMS
-  double j1,j2,j3,j4,j5;
+  double j1, j2, j3, j4, j5;
 #endif
-  int4 k,l,m,n;
-  if (y <= 0) {               /*if power is negative or zero */
-    v.x = y;
-    if (v.i[LOW_HALF] != 0) return -10.0;
-    v.x = x;
-    if (v.i[LOW_HALF] != 0) return -10.0;
-    if ((v.i[HIGH_HALF]&0x000fffff) != 0) return -10;   /* if x =2 ^ n */
-    k = ((v.i[HIGH_HALF]&0x7fffffff)>>20)-1023;         /* find this n */
-    z = (double) k;
-    return (z*y == -1075.0)?0: -10.0;
-  }
-			      /* if y > 0  */
+  int4 k, l, m, n;
+  if (y <= 0)                 /*if power is negative or zero */
+    {
+      v.x = y;
+      if (v.i[LOW_HALF] != 0)
+	return -10.0;
+      v.x = x;
+      if (v.i[LOW_HALF] != 0)
+	return -10.0;
+      if ((v.i[HIGH_HALF] & 0x000fffff) != 0)
+	return -10;                                     /* if x =2 ^ n */
+      k = ((v.i[HIGH_HALF] & 0x7fffffff) >> 20) - 1023; /* find this n */
+      z = (double) k;
+      return (z * y == -1075.0) ? 0 : -10.0;
+    }
+  /* if y > 0  */
   v.x = y;
-    if (v.i[LOW_HALF] != 0) return -10.0;
+  if (v.i[LOW_HALF] != 0)
+    return -10.0;
 
-  v.x=x;
-			      /*  case where x = 2**n for some integer n */
-  if (((v.i[HIGH_HALF]&0x000fffff)|v.i[LOW_HALF]) == 0) {
-    k=(v.i[HIGH_HALF]>>20)-1023;
-    return (((double) k)*y == -1075.0)?0:-10.0;
-  }
+  v.x = x;
+  /*  case where x = 2**n for some integer n */
+  if (((v.i[HIGH_HALF] & 0x000fffff) | v.i[LOW_HALF]) == 0)
+    {
+      k = (v.i[HIGH_HALF] >> 20) - 1023;
+      return (((double) k) * y == -1075.0) ? 0 : -10.0;
+    }
 
   v.x = y;
   k = v.i[HIGH_HALF];
-  m = k<<12;
+  m = k << 12;
   l = 0;
   while (m)
-    {m = m<<1; l++; }
-  n = (k&0x000fffff)|0x00100000;
-  n = n>>(20-l);                       /*   n is the odd integer of y    */
-  k = ((k>>20) -1023)-l;               /*   y = n*2**k                   */
-  if (k>5) return -10.0;
-  if (k>0) for (;k>0;k--) n *= 2;
-  if (n > 34) return -10.0;
+    {
+      m = m << 1; l++;
+    }
+  n = (k & 0x000fffff) | 0x00100000;
+  n = n >> (20 - l);                       /*   n is the odd integer of y    */
+  k = ((k >> 20) - 1023) - l;               /*   y = n*2**k                   */
+  if (k > 5)
+    return -10.0;
+  if (k > 0)
+    for (; k > 0; k--)
+      n *= 2;
+  if (n > 34)
+    return -10.0;
   k = -k;
-  if (k>5) return -10.0;
+  if (k > 5)
+    return -10.0;
 
-			    /*   now treat x        */
-  while (k>0) {
-    z = __ieee754_sqrt(x);
-    EMULV(z,z,u,uu,j1,j2,j3,j4,j5);
-    if (((u-x)+uu) != 0) break;
-    x = z;
-    k--;
- }
-  if (k) return -10.0;
+  /*   now treat x        */
+  while (k > 0)
+    {
+      z = __ieee754_sqrt (x);
+      EMULV (z, z, u, uu, j1, j2, j3, j4, j5);
+      if (((u - x) + uu) != 0)
+	break;
+      x = z;
+      k--;
+    }
+  if (k)
+    return -10.0;
 
   /* it is impossible that n == 2,  so the mantissa of x must be short  */
 
   v.x = x;
-  if (v.i[LOW_HALF]) return -10.0;
+  if (v.i[LOW_HALF])
+    return -10.0;
   k = v.i[HIGH_HALF];
-  m = k<<12;
+  m = k << 12;
   l = 0;
-  while (m) {m = m<<1; l++; }
-  m = (k&0x000fffff)|0x00100000;
-  m = m>>(20-l);                       /*   m is the odd integer of x    */
+  while (m)
+    {
+      m = m << 1; l++;
+    }
+  m = (k & 0x000fffff) | 0x00100000;
+  m = m >> (20 - l);                       /*   m is the odd integer of x    */
 
-	    /*   now check whether the length of m**n is at most 54 bits */
+  /*   now check whether the length of m**n is at most 54 bits */
 
-  if  (m > tab54[n-3]) return -10.0;
+  if (m > tab54[n - 3])
+    return -10.0;
 
-	     /* yes, it is - now compute x**n by simple multiplications  */
+  /* yes, it is - now compute x**n by simple multiplications  */
 
   u = x;
-  for (k=1;k<n;k++) u = u*x;
+  for (k = 1; k < n; k++)
+    u = u * x;
   return u;
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c b/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
index ec4b4cf60..047c6c288 100644
--- a/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
+++ b/libc/sysdeps/ieee754/dbl-64/k_rem_pio2.c
@@ -147,166 +147,215 @@ static const double PIo2[] = {
 };
 
 static const double
-zero   = 0.0,
-one    = 1.0,
-two24   =  1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
-twon24  =  5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
+  zero   = 0.0,
+  one    = 1.0,
+  two24  = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
+  twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
 
-int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int32_t *ipio2)
+int
+__kernel_rem_pio2 (double *x, double *y, int e0, int nx, int prec,
+                   const int32_t *ipio2)
 {
-	int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
-	double z,fw,f[20],fq[20],q[20];
+  int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
+  double z, fw, f[20], fq[20], q[20];
 
-    /* initialize jk*/
-	jk = init_jk[prec];
-	jp = jk;
+  /* initialize jk*/
+  jk = init_jk[prec];
+  jp = jk;
 
-    /* determine jx,jv,q0, note that 3>q0 */
-	jx =  nx-1;
-	jv = (e0-3)/24; if(jv<0) jv=0;
-	q0 =  e0-24*(jv+1);
+  /* determine jx,jv,q0, note that 3>q0 */
+  jx = nx - 1;
+  jv = (e0 - 3) / 24; if (jv < 0)
+    jv = 0;
+  q0 = e0 - 24 * (jv + 1);
 
-    /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
-	j = jv-jx; m = jx+jk;
-	for(i=0;i<=m;i++,j++) f[i] = (j<0)? zero : (double) ipio2[j];
+  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
+  j = jv - jx; m = jx + jk;
+  for (i = 0; i <= m; i++, j++)
+    f[i] = (j < 0) ? zero : (double) ipio2[j];
 
-    /* compute q[0],q[1],...q[jk] */
-	for (i=0;i<=jk;i++) {
-	    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
-	}
+  /* compute q[0],q[1],...q[jk] */
+  for (i = 0; i <= jk; i++)
+    {
+      for (j = 0, fw = 0.0; j <= jx; j++)
+	fw += x[j] * f[jx + i - j];
+      q[i] = fw;
+    }
 
-	jz = jk;
+  jz = jk;
 recompute:
-    /* distill q[] into iq[] reversingly */
-	for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
-	    fw    =  (double)((int32_t)(twon24* z));
-	    iq[i] =  (int32_t)(z-two24*fw);
-	    z     =  q[j-1]+fw;
-	}
+  /* distill q[] into iq[] reversingly */
+  for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
+    {
+      fw = (double) ((int32_t) (twon24 * z));
+      iq[i] = (int32_t) (z - two24 * fw);
+      z = q[j - 1] + fw;
+    }
 
-    /* compute n */
-	z  = __scalbn(z,q0);		/* actual value of z */
-	z -= 8.0*__floor(z*0.125);		/* trim off integer >= 8 */
-	n  = (int32_t) z;
-	z -= (double)n;
-	ih = 0;
-	if(q0>0) {	/* need iq[jz-1] to determine n */
-	    i  = (iq[jz-1]>>(24-q0)); n += i;
-	    iq[jz-1] -= i<<(24-q0);
-	    ih = iq[jz-1]>>(23-q0);
-	}
-	else if(q0==0) ih = iq[jz-1]>>23;
-	else if(z>=0.5) ih=2;
+  /* compute n */
+  z = __scalbn (z, q0);                 /* actual value of z */
+  z -= 8.0 * __floor (z * 0.125);               /* trim off integer >= 8 */
+  n = (int32_t) z;
+  z -= (double) n;
+  ih = 0;
+  if (q0 > 0)           /* need iq[jz-1] to determine n */
+    {
+      i = (iq[jz - 1] >> (24 - q0)); n += i;
+      iq[jz - 1] -= i << (24 - q0);
+      ih = iq[jz - 1] >> (23 - q0);
+    }
+  else if (q0 == 0)
+    ih = iq[jz - 1] >> 23;
+  else if (z >= 0.5)
+    ih = 2;
 
-	if(ih>0) {	/* q > 0.5 */
-	    n += 1; carry = 0;
-	    for(i=0;i<jz ;i++) {	/* compute 1-q */
-		j = iq[i];
-		if(carry==0) {
-		    if(j!=0) {
-			carry = 1; iq[i] = 0x1000000- j;
-		    }
-		} else  iq[i] = 0xffffff - j;
-	    }
-	    if(q0>0) {		/* rare case: chance is 1 in 12 */
-	        switch(q0) {
-	        case 1:
-		   iq[jz-1] &= 0x7fffff; break;
-		case 2:
-		   iq[jz-1] &= 0x3fffff; break;
-	        }
+  if (ih > 0)           /* q > 0.5 */
+    {
+      n += 1; carry = 0;
+      for (i = 0; i < jz; i++)          /* compute 1-q */
+	{
+	  j = iq[i];
+	  if (carry == 0)
+	    {
+	      if (j != 0)
+		{
+		  carry = 1; iq[i] = 0x1000000 - j;
+		}
 	    }
-	    if(ih==2) {
-		z = one - z;
-		if(carry!=0) z -= __scalbn(one,q0);
+	  else
+	    iq[i] = 0xffffff - j;
+	}
+      if (q0 > 0)               /* rare case: chance is 1 in 12 */
+	{
+	  switch (q0)
+	    {
+	    case 1:
+	      iq[jz - 1] &= 0x7fffff; break;
+	    case 2:
+	      iq[jz - 1] &= 0x3fffff; break;
 	    }
 	}
+      if (ih == 2)
+	{
+	  z = one - z;
+	  if (carry != 0)
+	    z -= __scalbn (one, q0);
+	}
+    }
 
-    /* check if recomputation is needed */
-	if(z==zero) {
-	    j = 0;
-	    for (i=jz-1;i>=jk;i--) j |= iq[i];
-	    if(j==0) { /* need recomputation */
-		for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
+  /* check if recomputation is needed */
+  if (z == zero)
+    {
+      j = 0;
+      for (i = jz - 1; i >= jk; i--)
+	j |= iq[i];
+      if (j == 0)      /* need recomputation */
+	{
+	  for (k = 1; iq[jk - k] == 0; k++)
+	    ;                               /* k = no. of terms needed */
 
-		for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
-		    f[jx+i] = (double) ipio2[jv+i];
-		    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
-		    q[i] = fw;
-		}
-		jz += k;
-		goto recompute;
+	  for (i = jz + 1; i <= jz + k; i++) /* add q[jz+1] to q[jz+k] */
+	    {
+	      f[jx + i] = (double) ipio2[jv + i];
+	      for (j = 0, fw = 0.0; j <= jx; j++)
+		fw += x[j] * f[jx + i - j];
+	      q[i] = fw;
 	    }
+	  jz += k;
+	  goto recompute;
 	}
+    }
 
-    /* chop off zero terms */
-	if(z==0.0) {
-	    jz -= 1; q0 -= 24;
-	    while(iq[jz]==0) { jz--; q0-=24;}
-	} else { /* break z into 24-bit if necessary */
-	    z = __scalbn(z,-q0);
-	    if(z>=two24) {
-		fw = (double)((int32_t)(twon24*z));
-		iq[jz] = (int32_t)(z-two24*fw);
-		jz += 1; q0 += 24;
-		iq[jz] = (int32_t) fw;
-	    } else iq[jz] = (int32_t) z ;
+  /* chop off zero terms */
+  if (z == 0.0)
+    {
+      jz -= 1; q0 -= 24;
+      while (iq[jz] == 0)
+	{
+	  jz--; q0 -= 24;
 	}
-
-    /* convert integer "bit" chunk to floating-point value */
-	fw = __scalbn(one,q0);
-	for(i=jz;i>=0;i--) {
-	    q[i] = fw*(double)iq[i]; fw*=twon24;
+    }
+  else           /* break z into 24-bit if necessary */
+    {
+      z = __scalbn (z, -q0);
+      if (z >= two24)
+	{
+	  fw = (double) ((int32_t) (twon24 * z));
+	  iq[jz] = (int32_t) (z - two24 * fw);
+	  jz += 1; q0 += 24;
+	  iq[jz] = (int32_t) fw;
 	}
+      else
+	iq[jz] = (int32_t) z;
+    }
 
-    /* compute PIo2[0,...,jp]*q[jz,...,0] */
-	for(i=jz;i>=0;i--) {
-	    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
-	    fq[jz-i] = fw;
-	}
+  /* convert integer "bit" chunk to floating-point value */
+  fw = __scalbn (one, q0);
+  for (i = jz; i >= 0; i--)
+    {
+      q[i] = fw * (double) iq[i]; fw *= twon24;
+    }
 
-    /* compress fq[] into y[] */
-	switch(prec) {
-	    case 0:
-		fw = 0.0;
-		for (i=jz;i>=0;i--) fw += fq[i];
-		y[0] = (ih==0)? fw: -fw;
-		break;
-	    case 1:
-	    case 2:;
+  /* compute PIo2[0,...,jp]*q[jz,...,0] */
+  for (i = jz; i >= 0; i--)
+    {
+      for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
+	fw += PIo2[k] * q[i + k];
+      fq[jz - i] = fw;
+    }
+
+  /* compress fq[] into y[] */
+  switch (prec)
+    {
+    case 0:
+      fw = 0.0;
+      for (i = jz; i >= 0; i--)
+	fw += fq[i];
+      y[0] = (ih == 0) ? fw : -fw;
+      break;
+    case 1:
+    case 2:;
 #if __FLT_EVAL_METHOD__ != 0
-		volatile
+      volatile
 #endif
-		double fv = 0.0;
-		for (i=jz;i>=0;i--) fv += fq[i];
-		y[0] = (ih==0)? fv: -fv;
-		fv = fq[0]-fv;
-		for (i=1;i<=jz;i++) fv += fq[i];
-		y[1] = (ih==0)? fv: -fv;
-		break;
-	    case 3:	/* painful */
-		for (i=jz;i>0;i--) {
+      double fv = 0.0;
+      for (i = jz; i >= 0; i--)
+	fv += fq[i];
+      y[0] = (ih == 0) ? fv : -fv;
+      fv = fq[0] - fv;
+      for (i = 1; i <= jz; i++)
+	fv += fq[i];
+      y[1] = (ih == 0) ? fv : -fv;
+      break;
+    case 3:             /* painful */
+      for (i = jz; i > 0; i--)
+	{
 #if __FLT_EVAL_METHOD__ != 0
-		    volatile
+	  volatile
 #endif
-		    double fv = (double)(fq[i-1]+fq[i]);
-		    fq[i]  += fq[i-1]-fv;
-		    fq[i-1] = fv;
-		}
-		for (i=jz;i>1;i--) {
+	  double fv = (double) (fq[i - 1] + fq[i]);
+	  fq[i] += fq[i - 1] - fv;
+	  fq[i - 1] = fv;
+	}
+      for (i = jz; i > 1; i--)
+	{
 #if __FLT_EVAL_METHOD__ != 0
-		    volatile
+	  volatile
 #endif
-		    double fv = (double)(fq[i-1]+fq[i]);
-		    fq[i]  += fq[i-1]-fv;
-		    fq[i-1] = fv;
-		}
-		for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
-		if(ih==0) {
-		    y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
-		} else {
-		    y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
-		}
+	  double fv = (double) (fq[i - 1] + fq[i]);
+	  fq[i] += fq[i - 1] - fv;
+	  fq[i - 1] = fv;
+	}
+      for (fw = 0.0, i = jz; i >= 2; i--)
+	fw += fq[i];
+      if (ih == 0)
+	{
+	  y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
+	}
+      else
+	{
+	  y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
 	}
-	return n&7;
+    }
+  return n & 7;
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/mpa-arch.h b/libc/sysdeps/ieee754/dbl-64/mpa-arch.h
index 7de9d51ae..779a43d9b 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpa-arch.h
+++ b/libc/sysdeps/ieee754/dbl-64/mpa-arch.h
@@ -22,15 +22,15 @@ typedef int64_t mantissa_store_t;
 #define TWOPOW(i) (1L << i)
 
 #define RADIX_EXP 24
-#define  RADIX TWOPOW (RADIX_EXP)		/* 2^24    */
+#define  RADIX TWOPOW (RADIX_EXP)               /* 2^24    */
 
 /* Divide D by RADIX and put the remainder in R.  D must be a non-negative
    integral value.  */
 #define DIV_RADIX(d, r) \
-  ({									      \
-    r = d & (RADIX - 1);						      \
-    d >>= RADIX_EXP;							      \
-  })
+  ({                                                                         \
+     r = d & (RADIX - 1);                                                    \
+     d >>= RADIX_EXP;                                                        \
+   })
 
 /* Put the integer component of a double X in R and retain the fraction in
    X.  This is used in extracting mantissa digits for MP_NO by using the
@@ -38,10 +38,10 @@ typedef int64_t mantissa_store_t;
    digit and then scaling by RADIX to get the next mantissa digit in the same
    manner.  */
 #define INTEGER_OF(x, i) \
-  ({									      \
-    i = (mantissa_t) x;							      \
-    x -= i;								      \
-  })
+  ({                                                                          \
+     i = (mantissa_t) x;                                                       \
+     x -= i;                                                                   \
+   })
 
 /* Align IN down to F.  The code assumes that F is a power of two.  */
-#define ALIGN_DOWN_TO(in, f) ((in) & -(f))
+#define ALIGN_DOWN_TO(in, f) ((in) & - (f))
diff --git a/libc/sysdeps/ieee754/dbl-64/mpa.c b/libc/sysdeps/ieee754/dbl-64/mpa.c
index a3feb175e..190e8a637 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpa.c
+++ b/libc/sysdeps/ieee754/dbl-64/mpa.c
@@ -50,8 +50,8 @@
 #endif
 
 #ifndef NO__CONST
-const mp_no mpone = {1, {1.0, 1.0}};
-const mp_no mptwo = {1, {1.0, 2.0}};
+const mp_no mpone = { 1, { 1.0, 1.0 } };
+const mp_no mptwo = { 1, { 1.0, 2.0 } };
 #endif
 
 #ifndef NO___ACR
@@ -123,7 +123,7 @@ __cpy (const mp_no *x, mp_no *y, int p)
 static void
 norm (const mp_no *x, double *y, int p)
 {
-#define R RADIXI
+# define R RADIXI
   long i;
   double c;
   mantissa_t a, u, v, z[5];
@@ -140,7 +140,7 @@ norm (const mp_no *x, double *y, int p)
     }
   else
     {
-      for (a = 1, z[1] = X[1]; z[1] < TWO23;)
+      for (a = 1, z[1] = X[1]; z[1] < TWO23; )
 	{
 	  a *= 2;
 	  z[1] *= 2;
@@ -188,7 +188,7 @@ norm (const mp_no *x, double *y, int p)
     c *= RADIXI;
 
   *y = c;
-#undef R
+# undef R
 }
 
 /* Convert a multiple precision number *X into a double precision
@@ -201,7 +201,7 @@ denorm (const mp_no *x, double *y, int p)
   double c;
   mantissa_t u, z[5];
 
-#define R RADIXI
+# define R RADIXI
   if (EX < -44 || (EX == -44 && X[1] < TWO5))
     {
       *y = 0;
@@ -298,7 +298,7 @@ denorm (const mp_no *x, double *y, int p)
   c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
 
   *y = c * TWOM1032;
-#undef R
+# undef R
 }
 
 /* Convert multiple precision number *X into double precision number *Y.  The
@@ -394,7 +394,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  zk = 1;
 	}
       else
-        {
+	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
@@ -409,7 +409,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  zk = 1;
 	}
       else
-        {
+	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
@@ -471,7 +471,7 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  zk = -1;
 	}
       else
-        {
+	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
@@ -487,18 +487,19 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  zk = -1;
 	}
       else
-        {
+	{
 	  Z[k--] = zk;
 	  zk = 0;
 	}
     }
 
   /* Normalize.  */
-  for (i = 1; Z[i] == 0; i++);
+  for (i = 1; Z[i] == 0; i++)
+    ;
   EZ = EZ - i + 1;
-  for (k = 1; i <= p2 + 1;)
+  for (k = 1; i <= p2 + 1; )
     Z[k++] = Z[i++];
-  for (; k <= p2;)
+  for (; k <= p2; )
     Z[k++] = 0;
 }
 
diff --git a/libc/sysdeps/ieee754/dbl-64/mpatan.c b/libc/sysdeps/ieee754/dbl-64/mpatan.c
index 807b16a9b..a6ae61195 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpatan.c
+++ b/libc/sysdeps/ieee754/dbl-64/mpatan.c
@@ -43,7 +43,6 @@ void
 SECTION
 __mpatan (mp_no *x, mp_no *y, int p)
 {
-
   int i, m, n;
   double dx;
   mp_no mptwoim1 =
diff --git a/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c b/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c
index 8a2f45b9d..0fada79a0 100644
--- a/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c
+++ b/libc/sysdeps/ieee754/dbl-64/mpn2dbl.c
@@ -40,7 +40,7 @@ __mpn_construct_double (mp_srcptr frac_ptr, int expt, int negative)
   u.ieee.mantissa0 = (frac_ptr[0] >> 32) & (((mp_limb_t) 1
 					     << (DBL_MANT_DIG - 32)) - 1);
 #else
-  #error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
+  # error "mp_limb size " BITS_PER_MP_LIMB "not accounted for"
 #endif
 
   return u.d;
diff --git a/libc/sysdeps/ieee754/dbl-64/mptan.c b/libc/sysdeps/ieee754/dbl-64/mptan.c
index 281bfca1c..a5c3fb36c 100644
--- a/libc/sysdeps/ieee754/dbl-64/mptan.c
+++ b/libc/sysdeps/ieee754/dbl-64/mptan.c
@@ -44,7 +44,6 @@ void
 SECTION
 __mptan (double x, mp_no *mpy, int p)
 {
-
   int n;
   mp_no mpw, mpc, mps;
 
diff --git a/libc/sysdeps/ieee754/dbl-64/mydefs.h b/libc/sysdeps/ieee754/dbl-64/mydefs.h
index 89ca965c6..a430397c6 100644
--- a/libc/sysdeps/ieee754/dbl-64/mydefs.h
+++ b/libc/sysdeps/ieee754/dbl-64/mydefs.h
@@ -28,10 +28,9 @@
 #define MY_H
 
 typedef int int4;
-typedef union {int4 i[2]; double x;} mynumber;
-
-#define ABS(x)   (((x)>0)?(x):-(x))
-#define max(x,y)  (((y)>(x))?(y):(x))
-#define min(x,y)  (((y)<(x))?(y):(x))
+typedef union { int4 i[2]; double x; } mynumber;
 
+#define ABS(x)   (((x) > 0) ? (x) : -(x))
+#define max(x, y)  (((y) > (x)) ? (y) : (x))
+#define min(x, y)  (((y) < (x)) ? (y) : (x))
 #endif
diff --git a/libc/sysdeps/ieee754/dbl-64/s_asinh.c b/libc/sysdeps/ieee754/dbl-64/s_asinh.c
index 68e854f5f..550074684 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_asinh.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_asinh.c
@@ -25,33 +25,43 @@
 #include <math_private.h>
 
 static const double
-one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
-ln2 =  6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
-huge=  1.00000000000000000000e+300;
+  one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+  ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
+  huge = 1.00000000000000000000e+300;
 
 double
-__asinh(double x)
+__asinh (double x)
 {
-	double w;
-	int32_t hx,ix;
-	GET_HIGH_WORD(hx,x);
-	ix = hx&0x7fffffff;
-	if(__builtin_expect(ix< 0x3e300000, 0)) {	/* |x|<2**-28 */
-	    if(huge+x>one) return x;	/* return x inexact except 0 */
+  double w;
+  int32_t hx, ix;
+  GET_HIGH_WORD (hx, x);
+  ix = hx & 0x7fffffff;
+  if (__builtin_expect (ix < 0x3e300000, 0))            /* |x|<2**-28 */
+    {
+      if (huge + x > one)
+	return x;                       /* return x inexact except 0 */
+    }
+  if (__builtin_expect (ix > 0x41b00000, 0))            /* |x| > 2**28 */
+    {
+      if (ix >= 0x7ff00000)
+	return x + x;                           /* x is inf or NaN */
+      w = __ieee754_log (fabs (x)) + ln2;
+    }
+  else
+    {
+      double xa = fabs (x);
+      if (ix > 0x40000000)              /* 2**28 > |x| > 2.0 */
+	{
+	  w = __ieee754_log (2.0 * xa + one / (__ieee754_sqrt (xa * xa + one) +
+              xa));
 	}
-	if(__builtin_expect(ix>0x41b00000, 0)) {	/* |x| > 2**28 */
-	    if(ix>=0x7ff00000) return x+x;	/* x is inf or NaN */
-	    w = __ieee754_log(fabs(x))+ln2;
-	} else {
-	    double xa = fabs(x);
-	    if (ix>0x40000000) {	/* 2**28 > |x| > 2.0 */
-		w = __ieee754_log(2.0*xa+one/(__ieee754_sqrt(xa*xa+one)+xa));
-	    } else {		/* 2.0 > |x| > 2**-28 */
-		double t = xa*xa;
-		w =__log1p(xa+t/(one+__ieee754_sqrt(one+t)));
-	    }
+      else                      /* 2.0 > |x| > 2**-28 */
+	{
+	  double t = xa * xa;
+	  w = __log1p (xa + t / (one + __ieee754_sqrt (one + t)));
 	}
-	return __copysign(w, x);
+    }
+  return __copysign (w, x);
 }
 weak_alias (__asinh, asinh)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_atan.c b/libc/sysdeps/ieee754/dbl-64/s_atan.c
index 7b6c83ffb..744e96146 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_atan.c
@@ -42,6 +42,7 @@
 #include "uatan.tbl"
 #include "atnat.h"
 #include <math.h>
+#include <stap-probe.h>
 
 void __mpatan (mp_no *, mp_no *, int);	/* see definition in mpatan.c */
 static double atanMp (double, const int[]);
@@ -60,7 +61,7 @@ double
 atan (double x)
 {
   double cor, s1, ss1, s2, ss2, t1, t2, t3, t7, t8, t9, t10, u, u2, u3,
-    v, vv, w, ww, y, yy, z, zz;
+	 v, vv, w, ww, y, yy, z, zz;
 #ifndef DLA_FMS
   double t4, t5, t6;
 #endif
@@ -190,17 +191,17 @@ atan (double x)
 	  yy = cij[i][4].d + z * yy;
 	  yy = cij[i][3].d + z * yy;
 	  yy = cij[i][2].d + z * yy;
-	  yy  = HPI1 - z * yy;
+	  yy = HPI1 - z * yy;
 
 	  t1 = HPI - cij[i][1].d;
 	  if (i < 112)
-	    u3 = U31;		/* w <  1/2 */
+	    u3 = U31;           /* w <  1/2 */
 	  else
-	    u3 = U32;		/* w >= 1/2 */
+	    u3 = U32;           /* w >= 1/2 */
 	  if ((y = t1 + (yy - u3)) == t1 + (yy + u3))
 	    return __signArctan (x, y);
 
-	  DIV2 (1 , 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9,
+	  DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8, t9,
 		t10);
 	  t1 = w - hij[i][0].d;
 	  EADD (t1, ww, z, zz);
@@ -229,7 +230,7 @@ atan (double x)
       else
 	{
 	  if (u < E)
-	    {			/* D <= u < E */
+	    {                   /* D <= u < E */
 	      w = 1 / u;
 	      v = w * w;
 	      EMULV (w, u, t1, t2, t3, t4, t5, t6, t7);
@@ -247,7 +248,7 @@ atan (double x)
 	      if ((y = t3 + (yy - U4)) == t3 + (yy + U4))
 		return __signArctan (x, y);
 
-	      DIV2 (1 , 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8,
+	      DIV2 (1, 0, u, 0, w, ww, t1, t2, t3, t4, t5, t6, t7, t8,
 		    t9, t10);
 	      MUL2 (w, ww, w, ww, v, vv, t1, t2, t3, t4, t5, t6, t7, t8);
 
@@ -306,8 +307,12 @@ atanMp (double x, const int pr[])
       __mp_dbl (&mpy1, &y1, p);
       __mp_dbl (&mpy2, &y2, p);
       if (y1 == y2)
-	return y1;
+	{
+	  LIBC_PROBE (slowatan, 3, &p, &x, &y1);
+	  return y1;
+	}
     }
+  LIBC_PROBE (slowatan_inexact, 3, &p, &x, &y1);
   return y1;			/*if impossible to do exact computing */
 }
 
diff --git a/libc/sysdeps/ieee754/dbl-64/s_cbrt.c b/libc/sysdeps/ieee754/dbl-64/s_cbrt.c
index a7120e1e1..208a369a6 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_cbrt.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_cbrt.c
@@ -51,16 +51,17 @@ __cbrt (double x)
   if (xe == 0 && fpclassify (x) <= FP_ZERO)
     return x + x;
 
- u = (0.354895765043919860
-      + ((1.50819193781584896
-	 + ((-2.11499494167371287
-	    + ((2.44693122563534430
-	       + ((-1.83469277483613086
-		  + (0.784932344976639262 - 0.145263899385486377 * xm) * xm)
+  u = (0.354895765043919860
+       + ((1.50819193781584896
+	   + ((-2.11499494167371287
+	       + ((2.44693122563534430
+		   + ((-1.83469277483613086
+		       + (0.784932344976639262 - 0.145263899385486377 * xm)
+	                  * xm)
+		      * xm))
 		  * xm))
-	       * xm))
-	    * xm))
-	 * xm));
+	      * xm))
+	  * xm));
 
   t2 = u * u * u;
 
diff --git a/libc/sysdeps/ieee754/dbl-64/s_ceil.c b/libc/sysdeps/ieee754/dbl-64/s_ceil.c
index e048c81c2..b2154b407 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_ceil.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_ceil.c
@@ -25,44 +25,67 @@
 static const double huge = 1.0e300;
 
 double
-__ceil(double x)
+__ceil (double x)
 {
-	int32_t i0,i1,j0;
-	u_int32_t i,j;
-	EXTRACT_WORDS(i0,i1,x);
-	j0 = ((i0>>20)&0x7ff)-0x3ff;
-	if(j0<20) {
-	    if(j0<0) {	/* raise inexact if x != 0 */
-		math_force_eval(huge+x);
-		/* return 0*sign(x) if |x|<1 */
-		if(i0<0) {i0=0x80000000;i1=0;}
-		else if((i0|i1)!=0) { i0=0x3ff00000;i1=0;}
-	    } else {
-		i = (0x000fffff)>>j0;
-		if(((i0&i)|i1)==0) return x; /* x is integral */
-		math_force_eval(huge+x);	/* raise inexact flag */
-		if(i0>0) i0 += (0x00100000)>>j0;
-		i0 &= (~i); i1=0;
+  int32_t i0, i1, j0;
+  u_int32_t i, j;
+  EXTRACT_WORDS (i0, i1, x);
+  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+  if (j0 < 20)
+    {
+      if (j0 < 0)       /* raise inexact if x != 0 */
+	{
+	  math_force_eval (huge + x);
+	  /* return 0*sign(x) if |x|<1 */
+	  if (i0 < 0)
+	    {
+	      i0 = 0x80000000; i1 = 0;
 	    }
-	} else if (j0>51) {
-	    if(j0==0x400) return x+x;	/* inf or NaN */
-	    else return x;		/* x is integral */
-	} else {
-	    i = ((u_int32_t)(0xffffffff))>>(j0-20);
-	    if((i1&i)==0) return x;	/* x is integral */
-	    math_force_eval(huge+x);		/* raise inexact flag */
-	    if(i0>0) {
-		if(j0==20) i0+=1;
-		else {
-		    j = i1 + (1<<(52-j0));
-		    if(j<i1) i0+=1;	/* got a carry */
-		    i1 = j;
-		}
+	  else if ((i0 | i1) != 0)
+	    {
+	      i0 = 0x3ff00000; i1 = 0;
 	    }
-	    i1 &= (~i);
 	}
-	INSERT_WORDS(x,i0,i1);
-	return x;
+      else
+	{
+	  i = (0x000fffff) >> j0;
+	  if (((i0 & i) | i1) == 0)
+	    return x;                        /* x is integral */
+	  math_force_eval (huge + x);           /* raise inexact flag */
+	  if (i0 > 0)
+	    i0 += (0x00100000) >> j0;
+	  i0 &= (~i); i1 = 0;
+	}
+    }
+  else if (j0 > 51)
+    {
+      if (j0 == 0x400)
+	return x + x;                   /* inf or NaN */
+      else
+	return x;                       /* x is integral */
+    }
+  else
+    {
+      i = ((u_int32_t) (0xffffffff)) >> (j0 - 20);
+      if ((i1 & i) == 0)
+	return x;                       /* x is integral */
+      math_force_eval (huge + x);               /* raise inexact flag */
+      if (i0 > 0)
+	{
+	  if (j0 == 20)
+	    i0 += 1;
+	  else
+	    {
+	      j = i1 + (1 << (52 - j0));
+	      if (j < i1)
+		i0 += 1;                /* got a carry */
+	      i1 = j;
+	    }
+	}
+      i1 &= (~i);
+    }
+  INSERT_WORDS (x, i0, i1);
+  return x;
 }
 #ifndef __ceil
 weak_alias (__ceil, ceil)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_copysign.c b/libc/sysdeps/ieee754/dbl-64/s_copysign.c
index a541ceb05..9caf24e8f 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_copysign.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_copysign.c
@@ -23,13 +23,14 @@ static char rcsid[] = "$NetBSD: s_copysign.c,v 1.8 1995/05/10 20:46:57 jtc Exp $
 #include <math.h>
 #include <math_private.h>
 
-double __copysign(double x, double y)
+double
+__copysign (double x, double y)
 {
-	u_int32_t hx,hy;
-	GET_HIGH_WORD(hx,x);
-	GET_HIGH_WORD(hy,y);
-	SET_HIGH_WORD(x,(hx&0x7fffffff)|(hy&0x80000000));
-        return x;
+  u_int32_t hx, hy;
+  GET_HIGH_WORD (hx, x);
+  GET_HIGH_WORD (hy, y);
+  SET_HIGH_WORD (x, (hx & 0x7fffffff) | (hy & 0x80000000));
+  return x;
 }
 weak_alias (__copysign, copysign)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_erf.c b/libc/sysdeps/ieee754/dbl-64/s_erf.c
index e25e28d9d..aab4ed593 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_erf.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_erf.c
@@ -116,181 +116,176 @@ static char rcsid[] = "$NetBSD: s_erf.c,v 1.8 1995/05/10 20:47:05 jtc Exp $";
 #include <math_private.h>
 
 static const double
-tiny	    = 1e-300,
-half=  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
-one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
-two =  2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
-	/* c = (float)0.84506291151 */
-erx =  8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
+  tiny = 1e-300,
+  half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+  one = 1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+  two = 2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
+/* c = (float)0.84506291151 */
+  erx = 8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
 /*
  * Coefficients for approximation to  erf on [0,0.84375]
  */
-efx =  1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
-efx8=  1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
-pp[]  =  {1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
- -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
- -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
- -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
- -2.37630166566501626084e-05}, /* 0xBEF8EAD6, 0x120016AC */
-qq[]  =  {0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
-  6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
-  5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
-  1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
- -3.96022827877536812320e-06}, /* 0xBED09C43, 0x42A26120 */
+  efx = 1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
+  efx8 = 1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
+  pp[] = { 1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
+	   -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
+	   -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
+	   -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
+	   -2.37630166566501626084e-05 }, /* 0xBEF8EAD6, 0x120016AC */
+  qq[] = { 0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
+	   6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
+	   5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
+	   1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
+	   -3.96022827877536812320e-06 }, /* 0xBED09C43, 0x42A26120 */
 /*
  * Coefficients for approximation to  erf  in [0.84375,1.25]
  */
-pa[]  = {-2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
-  4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
- -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
-  3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
- -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
-  3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
- -2.16637559486879084300e-03}, /* 0xBF61BF38, 0x0A96073F */
-qa[]  =  {0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
-  5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
-  7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
-  1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
-  1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
-  1.19844998467991074170e-02}, /* 0x3F888B54, 0x5735151D */
+  pa[] = { -2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
+	   4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
+	   -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
+	   3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
+	   -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
+	   3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
+	   -2.16637559486879084300e-03 }, /* 0xBF61BF38, 0x0A96073F */
+  qa[] = { 0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
+	   5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
+	   7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
+	   1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
+	   1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
+	   1.19844998467991074170e-02 }, /* 0x3F888B54, 0x5735151D */
 /*
  * Coefficients for approximation to  erfc in [1.25,1/0.35]
  */
-ra[]  = {-9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
- -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
- -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
- -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
- -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
- -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
- -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
- -9.81432934416914548592e+00}, /* 0xC023A0EF, 0xC69AC25C */
-sa[]  =  {0.0,1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
-  1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
-  4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
-  6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
-  4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
-  1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
-  6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
- -6.04244152148580987438e-02}, /* 0xBFAEEFF2, 0xEE749A62 */
+  ra[] = { -9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
+	   -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
+	   -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
+	   -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
+	   -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
+	   -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
+	   -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
+	   -9.81432934416914548592e+00 }, /* 0xC023A0EF, 0xC69AC25C */
+  sa[] = { 0.0, 1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
+	   1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
+	   4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
+	   6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
+	   4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
+	   1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
+	   6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
+	   -6.04244152148580987438e-02 }, /* 0xBFAEEFF2, 0xEE749A62 */
 /*
  * Coefficients for approximation to  erfc in [1/.35,28]
  */
-rb[]  = {-9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
- -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
- -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
- -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
- -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
- -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
- -4.83519191608651397019e+02}, /* 0xC07E384E, 0x9BDC383F */
-sb[]  =  {0.0,3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
-  3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
-  1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
-  3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
-  2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
-  4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
- -2.24409524465858183362e+01}; /* 0xC03670E2, 0x42712D62 */
+  rb[] = { -9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
+	   -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
+	   -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
+	   -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
+	   -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
+	   -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
+	   -4.83519191608651397019e+02 }, /* 0xC07E384E, 0x9BDC383F */
+  sb[] = { 0.0, 3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
+	   3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
+	   1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
+	   3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
+	   2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
+	   4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
+	   -2.24409524465858183362e+01 }; /* 0xC03670E2, 0x42712D62 */
 
-double __erf(double x)
+double
+__erf (double x)
 {
-	int32_t hx,ix,i;
-	double R,S,P,Q,s,y,z,r;
-	GET_HIGH_WORD(hx,x);
-	ix = hx&0x7fffffff;
-	if(ix>=0x7ff00000) {		/* erf(nan)=nan */
-	    i = ((u_int32_t)hx>>31)<<1;
-	    return (double)(1-i)+one/x;	/* erf(+-inf)=+-1 */
-	}
+  int32_t hx, ix, i;
+  double R, S, P, Q, s, y, z, r;
+  GET_HIGH_WORD (hx, x);
+  ix = hx & 0x7fffffff;
+  if (ix >= 0x7ff00000)                 /* erf(nan)=nan */
+    {
+      i = ((u_int32_t) hx >> 31) << 1;
+      return (double) (1 - i) + one / x; /* erf(+-inf)=+-1 */
+    }
 
-	if(ix < 0x3feb0000) {		/* |x|<0.84375 */
-	    double r1,r2,s1,s2,s3,z2,z4;
-	    if(ix < 0x3e300000) { 	/* |x|<2**-28 */
-	        if (ix < 0x00800000)
-		    return 0.125*(8.0*x+efx8*x);  /*avoid underflow */
-		return x + efx*x;
-	    }
-	    z = x*x;
-#ifdef DO_NOT_USE_THIS
-	    r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-	    s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-#else
-	    r1 = pp[0]+z*pp[1]; z2=z*z;
-	    r2 = pp[2]+z*pp[3]; z4=z2*z2;
-	    s1 = one+z*qq[1];
-	    s2 = qq[2]+z*qq[3];
-	    s3 = qq[4]+z*qq[5];
-            r = r1 + z2*r2 + z4*pp[4];
-	    s  = s1 + z2*s2 + z4*s3;
-#endif
-	    y = r/s;
-	    return x + x*y;
-	}
-	if(ix < 0x3ff40000) {		/* 0.84375 <= |x| < 1.25 */
-	    double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
-	    s = fabs(x)-one;
-#ifdef DO_NOT_USE_THIS
-	    P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
-	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
-#else
-	    P1 = pa[0]+s*pa[1]; s2=s*s;
-	    Q1 = one+s*qa[1];   s4=s2*s2;
-	    P2 = pa[2]+s*pa[3]; s6=s4*s2;
-	    Q2 = qa[2]+s*qa[3];
-	    P3 = pa[4]+s*pa[5];
-	    Q3 = qa[4]+s*qa[5];
-	    P4 = pa[6];
-	    Q4 = qa[6];
-	    P = P1 + s2*P2 + s4*P3 + s6*P4;
-	    Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
-#endif
-	    if(hx>=0) return erx + P/Q; else return -erx - P/Q;
-	}
-	if (ix >= 0x40180000) {		/* inf>|x|>=6 */
-	    if(hx>=0) return one-tiny; else return tiny-one;
-	}
-	x = fabs(x);
- 	s = one/(x*x);
-	if(ix< 0x4006DB6E) {	/* |x| < 1/0.35 */
-#ifdef DO_NOT_USE_THIS
-	    R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
-				ra5+s*(ra6+s*ra7))))));
-	    S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
-				sa5+s*(sa6+s*(sa7+s*sa8)))))));
-#else
-	    double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
-	    R1 = ra[0]+s*ra[1];s2 = s*s;
-	    S1 = one+s*sa[1];  s4 = s2*s2;
-	    R2 = ra[2]+s*ra[3];s6 = s4*s2;
-	    S2 = sa[2]+s*sa[3];s8 = s4*s4;
-	    R3 = ra[4]+s*ra[5];
-	    S3 = sa[4]+s*sa[5];
-	    R4 = ra[6]+s*ra[7];
-	    S4 = sa[6]+s*sa[7];
-	    R = R1 + s2*R2 + s4*R3 + s6*R4;
-	    S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
-#endif
-	} else {	/* |x| >= 1/0.35 */
-#ifdef DO_NOT_USE_THIS
-	    R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
-				rb5+s*rb6)))));
-	    S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
-				sb5+s*(sb6+s*sb7))))));
-#else
-	    double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
-	    R1 = rb[0]+s*rb[1];s2 = s*s;
-	    S1 = one+s*sb[1];  s4 = s2*s2;
-	    R2 = rb[2]+s*rb[3];s6 = s4*s2;
-	    S2 = sb[2]+s*sb[3];
-	    R3 = rb[4]+s*rb[5];
-	    S3 = sb[4]+s*sb[5];
-	    S4 = sb[6]+s*sb[7];
-	    R = R1 + s2*R2 + s4*R3 + s6*rb[6];
-	    S = S1 + s2*S2 + s4*S3 + s6*S4;
-#endif
+  if (ix < 0x3feb0000)                  /* |x|<0.84375 */
+    {
+      double r1, r2, s1, s2, s3, z2, z4;
+      if (ix < 0x3e300000)              /* |x|<2**-28 */
+	{
+	  if (ix < 0x00800000)
+	    return 0.125 * (8.0 * x + efx8 * x);  /*avoid underflow */
+	  return x + efx * x;
 	}
-	z  = x;
-	SET_LOW_WORD(z,0);
-	r  =  __ieee754_exp(-z*z-0.5625)*__ieee754_exp((z-x)*(z+x)+R/S);
-	if(hx>=0) return one-r/x; else return  r/x-one;
+      z = x * x;
+      r1 = pp[0] + z * pp[1]; z2 = z * z;
+      r2 = pp[2] + z * pp[3]; z4 = z2 * z2;
+      s1 = one + z * qq[1];
+      s2 = qq[2] + z * qq[3];
+      s3 = qq[4] + z * qq[5];
+      r = r1 + z2 * r2 + z4 * pp[4];
+      s = s1 + z2 * s2 + z4 * s3;
+      y = r / s;
+      return x + x * y;
+    }
+  if (ix < 0x3ff40000)                  /* 0.84375 <= |x| < 1.25 */
+    {
+      double s2, s4, s6, P1, P2, P3, P4, Q1, Q2, Q3, Q4;
+      s = fabs (x) - one;
+      P1 = pa[0] + s * pa[1]; s2 = s * s;
+      Q1 = one + s * qa[1];   s4 = s2 * s2;
+      P2 = pa[2] + s * pa[3]; s6 = s4 * s2;
+      Q2 = qa[2] + s * qa[3];
+      P3 = pa[4] + s * pa[5];
+      Q3 = qa[4] + s * qa[5];
+      P4 = pa[6];
+      Q4 = qa[6];
+      P = P1 + s2 * P2 + s4 * P3 + s6 * P4;
+      Q = Q1 + s2 * Q2 + s4 * Q3 + s6 * Q4;
+      if (hx >= 0)
+	return erx + P / Q;
+      else
+	return -erx - P / Q;
+    }
+  if (ix >= 0x40180000)                 /* inf>|x|>=6 */
+    {
+      if (hx >= 0)
+	return one - tiny;
+      else
+	return tiny - one;
+    }
+  x = fabs (x);
+  s = one / (x * x);
+  if (ix < 0x4006DB6E)          /* |x| < 1/0.35 */
+    {
+      double R1, R2, R3, R4, S1, S2, S3, S4, s2, s4, s6, s8;
+      R1 = ra[0] + s * ra[1]; s2 = s * s;
+      S1 = one + s * sa[1];  s4 = s2 * s2;
+      R2 = ra[2] + s * ra[3]; s6 = s4 * s2;
+      S2 = sa[2] + s * sa[3]; s8 = s4 * s4;
+      R3 = ra[4] + s * ra[5];
+      S3 = sa[4] + s * sa[5];
+      R4 = ra[6] + s * ra[7];
+      S4 = sa[6] + s * sa[7];
+      R = R1 + s2 * R2 + s4 * R3 + s6 * R4;
+      S = S1 + s2 * S2 + s4 * S3 + s6 * S4 + s8 * sa[8];
+    }
+  else                  /* |x| >= 1/0.35 */
+    {
+      double R1, R2, R3, S1, S2, S3, S4, s2, s4, s6;
+      R1 = rb[0] + s * rb[1]; s2 = s * s;
+      S1 = one + s * sb[1];  s4 = s2 * s2;
+      R2 = rb[2] + s * rb[3]; s6 = s4 * s2;
+      S2 = sb[2] + s * sb[3];
+      R3 = rb[4] + s * rb[5];
+      S3 = sb[4] + s * sb[5];
+      S4 = sb[6] + s * sb[7];
+      R = R1 + s2 * R2 + s4 * R3 + s6 * rb[6];
+      S = S1 + s2 * S2 + s4 * S3 + s6 * S4;
+    }
+  z = x;
+  SET_LOW_WORD (z, 0);
+  r = __ieee754_exp (-z * z - 0.5625) *
+      __ieee754_exp ((z - x) * (z + x) + R / S);
+  if (hx >= 0)
+    return one - r / x;
+  else
+    return r / x - one;
 }
 weak_alias (__erf, erf)
 #ifdef NO_LONG_DOUBLE
@@ -298,117 +293,115 @@ strong_alias (__erf, __erfl)
 weak_alias (__erf, erfl)
 #endif
 
-double __erfc(double x)
+double
+__erfc (double x)
 {
-	int32_t hx,ix;
-	double R,S,P,Q,s,y,z,r;
-	GET_HIGH_WORD(hx,x);
-	ix = hx&0x7fffffff;
-	if(ix>=0x7ff00000) {			/* erfc(nan)=nan */
-						/* erfc(+-inf)=0,2 */
-	    return (double)(((u_int32_t)hx>>31)<<1)+one/x;
-	}
+  int32_t hx, ix;
+  double R, S, P, Q, s, y, z, r;
+  GET_HIGH_WORD (hx, x);
+  ix = hx & 0x7fffffff;
+  if (ix >= 0x7ff00000)                         /* erfc(nan)=nan */
+    {                                           /* erfc(+-inf)=0,2 */
+      return (double) (((u_int32_t) hx >> 31) << 1) + one / x;
+    }
 
-	if(ix < 0x3feb0000) {		/* |x|<0.84375 */
-	    double r1,r2,s1,s2,s3,z2,z4;
-	    if(ix < 0x3c700000)  	/* |x|<2**-56 */
-		return one-x;
-	    z = x*x;
-#ifdef DO_NOT_USE_THIS
-	    r = pp0+z*(pp1+z*(pp2+z*(pp3+z*pp4)));
-	    s = one+z*(qq1+z*(qq2+z*(qq3+z*(qq4+z*qq5))));
-#else
-	    r1 = pp[0]+z*pp[1]; z2=z*z;
-	    r2 = pp[2]+z*pp[3]; z4=z2*z2;
-	    s1 = one+z*qq[1];
-	    s2 = qq[2]+z*qq[3];
-	    s3 = qq[4]+z*qq[5];
-            r = r1 + z2*r2 + z4*pp[4];
-	    s  = s1 + z2*s2 + z4*s3;
-#endif
-	    y = r/s;
-	    if(hx < 0x3fd00000) {  	/* x<1/4 */
-		return one-(x+x*y);
-	    } else {
-		r = x*y;
-		r += (x-half);
-	        return half - r ;
-	    }
+  if (ix < 0x3feb0000)                  /* |x|<0.84375 */
+    {
+      double r1, r2, s1, s2, s3, z2, z4;
+      if (ix < 0x3c700000)              /* |x|<2**-56 */
+	return one - x;
+      z = x * x;
+      r1 = pp[0] + z * pp[1]; z2 = z * z;
+      r2 = pp[2] + z * pp[3]; z4 = z2 * z2;
+      s1 = one + z * qq[1];
+      s2 = qq[2] + z * qq[3];
+      s3 = qq[4] + z * qq[5];
+      r = r1 + z2 * r2 + z4 * pp[4];
+      s = s1 + z2 * s2 + z4 * s3;
+      y = r / s;
+      if (hx < 0x3fd00000)              /* x<1/4 */
+	{
+	  return one - (x + x * y);
 	}
-	if(ix < 0x3ff40000) {		/* 0.84375 <= |x| < 1.25 */
-	    double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
-	    s = fabs(x)-one;
-#ifdef DO_NOT_USE_THIS
-	    P = pa0+s*(pa1+s*(pa2+s*(pa3+s*(pa4+s*(pa5+s*pa6)))));
-	    Q = one+s*(qa1+s*(qa2+s*(qa3+s*(qa4+s*(qa5+s*qa6)))));
-#else
-	    P1 = pa[0]+s*pa[1]; s2=s*s;
-	    Q1 = one+s*qa[1];   s4=s2*s2;
-	    P2 = pa[2]+s*pa[3]; s6=s4*s2;
-	    Q2 = qa[2]+s*qa[3];
-	    P3 = pa[4]+s*pa[5];
-	    Q3 = qa[4]+s*qa[5];
-	    P4 = pa[6];
-	    Q4 = qa[6];
-	    P = P1 + s2*P2 + s4*P3 + s6*P4;
-	    Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
-#endif
-	    if(hx>=0) {
-	        z  = one-erx; return z - P/Q;
-	    } else {
-		z = erx+P/Q; return one+z;
-	    }
+      else
+	{
+	  r = x * y;
+	  r += (x - half);
+	  return half - r;
 	}
-	if (ix < 0x403c0000) {		/* |x|<28 */
-	    x = fabs(x);
- 	    s = one/(x*x);
-	    if(ix< 0x4006DB6D) {	/* |x| < 1/.35 ~ 2.857143*/
-#ifdef DO_NOT_USE_THIS
-	        R=ra0+s*(ra1+s*(ra2+s*(ra3+s*(ra4+s*(
-				ra5+s*(ra6+s*ra7))))));
-	        S=one+s*(sa1+s*(sa2+s*(sa3+s*(sa4+s*(
-				sa5+s*(sa6+s*(sa7+s*sa8)))))));
-#else
-		double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
-	    R1 = ra[0]+s*ra[1];s2 = s*s;
-	    S1 = one+s*sa[1];  s4 = s2*s2;
-	    R2 = ra[2]+s*ra[3];s6 = s4*s2;
-	    S2 = sa[2]+s*sa[3];s8 = s4*s4;
-	    R3 = ra[4]+s*ra[5];
-	    S3 = sa[4]+s*sa[5];
-	    R4 = ra[6]+s*ra[7];
-	    S4 = sa[6]+s*sa[7];
-	    R = R1 + s2*R2 + s4*R3 + s6*R4;
-	    S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
-#endif
-	    } else {			/* |x| >= 1/.35 ~ 2.857143 */
-		double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
-		if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
-#ifdef DO_NOT_USE_THIS
-	        R=rb0+s*(rb1+s*(rb2+s*(rb3+s*(rb4+s*(
-				rb5+s*rb6)))));
-	        S=one+s*(sb1+s*(sb2+s*(sb3+s*(sb4+s*(
-				sb5+s*(sb6+s*sb7))))));
-#else
-		R1 = rb[0]+s*rb[1];s2 = s*s;
-		S1 = one+s*sb[1];  s4 = s2*s2;
-		R2 = rb[2]+s*rb[3];s6 = s4*s2;
-		S2 = sb[2]+s*sb[3];
-		R3 = rb[4]+s*rb[5];
-		S3 = sb[4]+s*sb[5];
-		S4 = sb[6]+s*sb[7];
-		R = R1 + s2*R2 + s4*R3 + s6*rb[6];
-		S = S1 + s2*S2 + s4*S3 + s6*S4;
-#endif
-	    }
-	    z  = x;
-	    SET_LOW_WORD(z,0);
-	    r  =  __ieee754_exp(-z*z-0.5625)*
-			__ieee754_exp((z-x)*(z+x)+R/S);
-	    if(hx>0) return r/x; else return two-r/x;
-	} else {
-	    if(hx>0) return tiny*tiny; else return two-tiny;
+    }
+  if (ix < 0x3ff40000)                  /* 0.84375 <= |x| < 1.25 */
+    {
+      double s2, s4, s6, P1, P2, P3, P4, Q1, Q2, Q3, Q4;
+      s = fabs (x) - one;
+      P1 = pa[0] + s * pa[1]; s2 = s * s;
+      Q1 = one + s * qa[1];   s4 = s2 * s2;
+      P2 = pa[2] + s * pa[3]; s6 = s4 * s2;
+      Q2 = qa[2] + s * qa[3];
+      P3 = pa[4] + s * pa[5];
+      Q3 = qa[4] + s * qa[5];
+      P4 = pa[6];
+      Q4 = qa[6];
+      P = P1 + s2 * P2 + s4 * P3 + s6 * P4;
+      Q = Q1 + s2 * Q2 + s4 * Q3 + s6 * Q4;
+      if (hx >= 0)
+	{
+	  z = one - erx; return z - P / Q;
+	}
+      else
+	{
+	  z = erx + P / Q; return one + z;
+	}
+    }
+  if (ix < 0x403c0000)                  /* |x|<28 */
+    {
+      x = fabs (x);
+      s = one / (x * x);
+      if (ix < 0x4006DB6D)              /* |x| < 1/.35 ~ 2.857143*/
+	{
+	  double R1, R2, R3, R4, S1, S2, S3, S4, s2, s4, s6, s8;
+	  R1 = ra[0] + s * ra[1]; s2 = s * s;
+	  S1 = one + s * sa[1];  s4 = s2 * s2;
+	  R2 = ra[2] + s * ra[3]; s6 = s4 * s2;
+	  S2 = sa[2] + s * sa[3]; s8 = s4 * s4;
+	  R3 = ra[4] + s * ra[5];
+	  S3 = sa[4] + s * sa[5];
+	  R4 = ra[6] + s * ra[7];
+	  S4 = sa[6] + s * sa[7];
+	  R = R1 + s2 * R2 + s4 * R3 + s6 * R4;
+	  S = S1 + s2 * S2 + s4 * S3 + s6 * S4 + s8 * sa[8];
+	}
+      else                              /* |x| >= 1/.35 ~ 2.857143 */
+	{
+	  double R1, R2, R3, S1, S2, S3, S4, s2, s4, s6;
+	  if (hx < 0 && ix >= 0x40180000)
+	    return two - tiny;                           /* x < -6 */
+	  R1 = rb[0] + s * rb[1]; s2 = s * s;
+	  S1 = one + s * sb[1];  s4 = s2 * s2;
+	  R2 = rb[2] + s * rb[3]; s6 = s4 * s2;
+	  S2 = sb[2] + s * sb[3];
+	  R3 = rb[4] + s * rb[5];
+	  S3 = sb[4] + s * sb[5];
+	  S4 = sb[6] + s * sb[7];
+	  R = R1 + s2 * R2 + s4 * R3 + s6 * rb[6];
+	  S = S1 + s2 * S2 + s4 * S3 + s6 * S4;
 	}
+      z = x;
+      SET_LOW_WORD (z, 0);
+      r = __ieee754_exp (-z * z - 0.5625) *
+	  __ieee754_exp ((z - x) * (z + x) + R / S);
+      if (hx > 0)
+	return r / x;
+      else
+	return two - r / x;
+    }
+  else
+    {
+      if (hx > 0)
+	return tiny * tiny;
+      else
+	return two - tiny;
+    }
 }
 weak_alias (__erfc, erfc)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_expm1.c b/libc/sysdeps/ieee754/dbl-64/s_expm1.c
index 1a4bcd979..9c9bb3455 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_expm1.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_expm1.c
@@ -11,7 +11,7 @@
  */
 /* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
    for performance improvement on pipelined processors.
-*/
+ */
 
 /* expm1(x)
  * Returns exp(x)-1, the exponential of x minus 1.
@@ -113,120 +113,145 @@
 #include <math_private.h>
 #define one Q[0]
 static const double
-huge		= 1.0e+300,
-tiny		= 1.0e-300,
-o_threshold	= 7.09782712893383973096e+02,/* 0x40862E42, 0xFEFA39EF */
-ln2_hi		= 6.93147180369123816490e-01,/* 0x3fe62e42, 0xfee00000 */
-ln2_lo		= 1.90821492927058770002e-10,/* 0x3dea39ef, 0x35793c76 */
-invln2		= 1.44269504088896338700e+00,/* 0x3ff71547, 0x652b82fe */
-	/* scaled coefficients related to expm1 */
-Q[]  =  {1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */
-   1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
-  -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
-   4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
-  -2.01099218183624371326e-07}; /* BE8AFDB7 6E09C32D */
+  huge = 1.0e+300,
+  tiny = 1.0e-300,
+  o_threshold = 7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */
+  ln2_hi = 6.93147180369123816490e-01,       /* 0x3fe62e42, 0xfee00000 */
+  ln2_lo = 1.90821492927058770002e-10,       /* 0x3dea39ef, 0x35793c76 */
+  invln2 = 1.44269504088896338700e+00,       /* 0x3ff71547, 0x652b82fe */
+/* scaled coefficients related to expm1 */
+  Q[] = { 1.0, -3.33333333333331316428e-02, /* BFA11111 111110F4 */
+	  1.58730158725481460165e-03, /* 3F5A01A0 19FE5585 */
+	  -7.93650757867487942473e-05, /* BF14CE19 9EAADBB7 */
+	  4.00821782732936239552e-06, /* 3ED0CFCA 86E65239 */
+	  -2.01099218183624371326e-07 }; /* BE8AFDB7 6E09C32D */
 
 double
-__expm1(double x)
+__expm1 (double x)
 {
-	double y,hi,lo,c,t,e,hxs,hfx,r1,h2,h4,R1,R2,R3;
-	int32_t k,xsb;
-	u_int32_t hx;
+  double y, hi, lo, c, t, e, hxs, hfx, r1, h2, h4, R1, R2, R3;
+  int32_t k, xsb;
+  u_int32_t hx;
 
-	GET_HIGH_WORD(hx,x);
-	xsb = hx&0x80000000;		/* sign bit of x */
-	if(xsb==0) y=x; else y= -x;	/* y = |x| */
-	hx &= 0x7fffffff;		/* high word of |x| */
+  GET_HIGH_WORD (hx, x);
+  xsb = hx & 0x80000000;                /* sign bit of x */
+  if (xsb == 0)
+    y = x;
+  else
+    y = -x;                             /* y = |x| */
+  hx &= 0x7fffffff;                     /* high word of |x| */
 
-    /* filter out huge and non-finite argument */
-	if(hx >= 0x4043687A) {			/* if |x|>=56*ln2 */
-	    if(hx >= 0x40862E42) {		/* if |x|>=709.78... */
-		if(hx>=0x7ff00000) {
-		    u_int32_t low;
-		    GET_LOW_WORD(low,x);
-		    if(((hx&0xfffff)|low)!=0)
-			 return x+x;	 /* NaN */
-		    else return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */
-		}
-		if(x > o_threshold) {
-		  __set_errno (ERANGE);
-		  return huge*huge; /* overflow */
-		}
+  /* filter out huge and non-finite argument */
+  if (hx >= 0x4043687A)                         /* if |x|>=56*ln2 */
+    {
+      if (hx >= 0x40862E42)                     /* if |x|>=709.78... */
+	{
+	  if (hx >= 0x7ff00000)
+	    {
+	      u_int32_t low;
+	      GET_LOW_WORD (low, x);
+	      if (((hx & 0xfffff) | low) != 0)
+		return x + x;            /* NaN */
+	      else
+		return (xsb == 0) ? x : -1.0;    /* exp(+-inf)={inf,-1} */
 	    }
-	    if(xsb!=0) { /* x < -56*ln2, return -1.0 with inexact */
-		math_force_eval(x+tiny);	/* raise inexact */
-		return tiny-one;	/* return -1 */
+	  if (x > o_threshold)
+	    {
+	      __set_errno (ERANGE);
+	      return huge * huge;   /* overflow */
 	    }
 	}
+      if (xsb != 0)      /* x < -56*ln2, return -1.0 with inexact */
+	{
+	  math_force_eval (x + tiny);           /* raise inexact */
+	  return tiny - one;            /* return -1 */
+	}
+    }
 
-    /* argument reduction */
-	if(hx > 0x3fd62e42) {		/* if  |x| > 0.5 ln2 */
-	    if(hx < 0x3FF0A2B2) {	/* and |x| < 1.5 ln2 */
-		if(xsb==0)
-		    {hi = x - ln2_hi; lo =  ln2_lo;  k =  1;}
-		else
-		    {hi = x + ln2_hi; lo = -ln2_lo;  k = -1;}
-	    } else {
-		k  = invln2*x+((xsb==0)?0.5:-0.5);
-		t  = k;
-		hi = x - t*ln2_hi;	/* t*ln2_hi is exact here */
-		lo = t*ln2_lo;
+  /* argument reduction */
+  if (hx > 0x3fd62e42)                  /* if  |x| > 0.5 ln2 */
+    {
+      if (hx < 0x3FF0A2B2)              /* and |x| < 1.5 ln2 */
+	{
+	  if (xsb == 0)
+	    {
+	      hi = x - ln2_hi; lo = ln2_lo;  k = 1;
+	    }
+	  else
+	    {
+	      hi = x + ln2_hi; lo = -ln2_lo;  k = -1;
 	    }
-	    x  = hi - lo;
-	    c  = (hi-x)-lo;
 	}
-	else if(hx < 0x3c900000) {	/* when |x|<2**-54, return x */
-	    t = huge+x;	/* return x with inexact flags when x!=0 */
-	    return x - (t-(huge+x));
+      else
+	{
+	  k = invln2 * x + ((xsb == 0) ? 0.5 : -0.5);
+	  t = k;
+	  hi = x - t * ln2_hi;          /* t*ln2_hi is exact here */
+	  lo = t * ln2_lo;
 	}
-	else k = 0;
+      x = hi - lo;
+      c = (hi - x) - lo;
+    }
+  else if (hx < 0x3c900000)             /* when |x|<2**-54, return x */
+    {
+      t = huge + x;     /* return x with inexact flags when x!=0 */
+      return x - (t - (huge + x));
+    }
+  else
+    k = 0;
 
-    /* x is now in primary range */
-	hfx = 0.5*x;
-	hxs = x*hfx;
-#ifdef DO_NOT_USE_THIS
-	r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5))));
-#else
-	R1 = one+hxs*Q[1]; h2 = hxs*hxs;
-	R2 = Q[2]+hxs*Q[3]; h4 = h2*h2;
-	R3 = Q[4]+hxs*Q[5];
-	r1 = R1 + h2*R2 + h4*R3;
-#endif
-	t  = 3.0-r1*hfx;
-	e  = hxs*((r1-t)/(6.0 - x*t));
-	if(k==0) return x - (x*e-hxs);		/* c is 0 */
-	else {
-	    e  = (x*(e-c)-c);
-	    e -= hxs;
-	    if(k== -1) return 0.5*(x-e)-0.5;
-	    if(k==1) {
-		if(x < -0.25) return -2.0*(e-(x+0.5));
-		else	      return  one+2.0*(x-e);
-	    }
-	    if (k <= -2 || k>56) {   /* suffice to return exp(x)-1 */
-		u_int32_t high;
-		y = one-(e-x);
-		GET_HIGH_WORD(high,y);
-		SET_HIGH_WORD(y,high+(k<<20));	/* add k to y's exponent */
-		return y-one;
-	    }
-	    t = one;
-	    if(k<20) {
-		u_int32_t high;
-		SET_HIGH_WORD(t,0x3ff00000 - (0x200000>>k));  /* t=1-2^-k */
-		y = t-(e-x);
-		GET_HIGH_WORD(high,y);
-		SET_HIGH_WORD(y,high+(k<<20));	/* add k to y's exponent */
-	   } else {
-		u_int32_t high;
-		SET_HIGH_WORD(t,((0x3ff-k)<<20));	/* 2^-k */
-		y = x-(e+t);
-		y += one;
-		GET_HIGH_WORD(high,y);
-		SET_HIGH_WORD(y,high+(k<<20));	/* add k to y's exponent */
-	    }
+  /* x is now in primary range */
+  hfx = 0.5 * x;
+  hxs = x * hfx;
+  R1 = one + hxs * Q[1]; h2 = hxs * hxs;
+  R2 = Q[2] + hxs * Q[3]; h4 = h2 * h2;
+  R3 = Q[4] + hxs * Q[5];
+  r1 = R1 + h2 * R2 + h4 * R3;
+  t = 3.0 - r1 * hfx;
+  e = hxs * ((r1 - t) / (6.0 - x * t));
+  if (k == 0)
+    return x - (x * e - hxs);                   /* c is 0 */
+  else
+    {
+      e = (x * (e - c) - c);
+      e -= hxs;
+      if (k == -1)
+	return 0.5 * (x - e) - 0.5;
+      if (k == 1)
+	{
+	  if (x < -0.25)
+	    return -2.0 * (e - (x + 0.5));
+	  else
+	    return one + 2.0 * (x - e);
+	}
+      if (k <= -2 || k > 56)         /* suffice to return exp(x)-1 */
+	{
+	  u_int32_t high;
+	  y = one - (e - x);
+	  GET_HIGH_WORD (high, y);
+	  SET_HIGH_WORD (y, high + (k << 20));  /* add k to y's exponent */
+	  return y - one;
+	}
+      t = one;
+      if (k < 20)
+	{
+	  u_int32_t high;
+	  SET_HIGH_WORD (t, 0x3ff00000 - (0x200000 >> k));    /* t=1-2^-k */
+	  y = t - (e - x);
+	  GET_HIGH_WORD (high, y);
+	  SET_HIGH_WORD (y, high + (k << 20));  /* add k to y's exponent */
+	}
+      else
+	{
+	  u_int32_t high;
+	  SET_HIGH_WORD (t, ((0x3ff - k) << 20));       /* 2^-k */
+	  y = x - (e + t);
+	  y += one;
+	  GET_HIGH_WORD (high, y);
+	  SET_HIGH_WORD (y, high + (k << 20));  /* add k to y's exponent */
 	}
-	return y;
+    }
+  return y;
 }
 weak_alias (__expm1, expm1)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_fabs.c b/libc/sysdeps/ieee754/dbl-64/s_fabs.c
index 86f1d52be..c82c4210e 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_fabs.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_fabs.c
@@ -21,12 +21,13 @@ static char rcsid[] = "$NetBSD: s_fabs.c,v 1.7 1995/05/10 20:47:13 jtc Exp $";
 #include <math.h>
 #include <math_private.h>
 
-double __fabs(double x)
+double
+__fabs (double x)
 {
-	u_int32_t high;
-	GET_HIGH_WORD(high,x);
-	SET_HIGH_WORD(x,high&0x7fffffff);
-        return x;
+  u_int32_t high;
+  GET_HIGH_WORD (high, x);
+  SET_HIGH_WORD (x, high & 0x7fffffff);
+  return x;
 }
 weak_alias (__fabs, fabs)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_finite.c b/libc/sysdeps/ieee754/dbl-64/s_finite.c
index 47dad5df2..5dc398efc 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_finite.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_finite.c
@@ -23,11 +23,12 @@ static char rcsid[] = "$NetBSD: s_finite.c,v 1.8 1995/05/10 20:47:17 jtc Exp $";
 #include <math_private.h>
 
 #undef __finite
-int __finite(double x)
+int
+__finite (double x)
 {
-	int32_t hx;
-	GET_HIGH_WORD(hx,x);
-	return (int)((u_int32_t)((hx&0x7fffffff)-0x7ff00000)>>31);
+  int32_t hx;
+  GET_HIGH_WORD (hx, x);
+  return (int) ((u_int32_t) ((hx & 0x7fffffff) - 0x7ff00000) >> 31);
 }
 hidden_def (__finite)
 weak_alias (__finite, finite)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_floor.c b/libc/sysdeps/ieee754/dbl-64/s_floor.c
index 5c7297842..bd6afa72e 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_floor.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_floor.c
@@ -24,44 +24,67 @@
 
 static const double huge = 1.0e300;
 
-double __floor(double x)
+double
+__floor (double x)
 {
-	int32_t i0,i1,j0;
-	u_int32_t i,j;
-	EXTRACT_WORDS(i0,i1,x);
-	j0 = ((i0>>20)&0x7ff)-0x3ff;
-	if(j0<20) {
-	    if(j0<0) {	/* raise inexact if x != 0 */
-		math_force_eval(huge+x);/* return 0*sign(x) if |x|<1 */
-		if(i0>=0) {i0=i1=0;}
-		else if(((i0&0x7fffffff)|i1)!=0)
-		  { i0=0xbff00000;i1=0;}
-	    } else {
-		i = (0x000fffff)>>j0;
-		if(((i0&i)|i1)==0) return x; /* x is integral */
-		math_force_eval(huge+x);	/* raise inexact flag */
-		if(i0<0) i0 += (0x00100000)>>j0;
-		i0 &= (~i); i1=0;
+  int32_t i0, i1, j0;
+  u_int32_t i, j;
+  EXTRACT_WORDS (i0, i1, x);
+  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+  if (j0 < 20)
+    {
+      if (j0 < 0)       /* raise inexact if x != 0 */
+	{
+	  math_force_eval (huge + x);   /* return 0*sign(x) if |x|<1 */
+	  if (i0 >= 0)
+	    {
+	      i0 = i1 = 0;
 	    }
-	} else if (j0>51) {
-	    if(j0==0x400) return x+x;	/* inf or NaN */
-	    else return x;		/* x is integral */
-	} else {
-	    i = ((u_int32_t)(0xffffffff))>>(j0-20);
-	    if((i1&i)==0) return x;	/* x is integral */
-	    math_force_eval(huge+x);		/* raise inexact flag */
-	    if(i0<0) {
-		if(j0==20) i0+=1;
-		else {
-		    j = i1+(1<<(52-j0));
-		    if(j<i1) i0 +=1 ;	/* got a carry */
-		    i1=j;
-		}
+	  else if (((i0 & 0x7fffffff) | i1) != 0)
+	    {
+	      i0 = 0xbff00000; i1 = 0;
 	    }
-	    i1 &= (~i);
 	}
-	INSERT_WORDS(x,i0,i1);
-	return x;
+      else
+	{
+	  i = (0x000fffff) >> j0;
+	  if (((i0 & i) | i1) == 0)
+	    return x;                        /* x is integral */
+	  math_force_eval (huge + x);           /* raise inexact flag */
+	  if (i0 < 0)
+	    i0 += (0x00100000) >> j0;
+	  i0 &= (~i); i1 = 0;
+	}
+    }
+  else if (j0 > 51)
+    {
+      if (j0 == 0x400)
+	return x + x;                   /* inf or NaN */
+      else
+	return x;                       /* x is integral */
+    }
+  else
+    {
+      i = ((u_int32_t) (0xffffffff)) >> (j0 - 20);
+      if ((i1 & i) == 0)
+	return x;                       /* x is integral */
+      math_force_eval (huge + x);               /* raise inexact flag */
+      if (i0 < 0)
+	{
+	  if (j0 == 20)
+	    i0 += 1;
+	  else
+	    {
+	      j = i1 + (1 << (52 - j0));
+	      if (j < i1)
+		i0 += 1;                /* got a carry */
+	      i1 = j;
+	    }
+	}
+      i1 &= (~i);
+    }
+  INSERT_WORDS (x, i0, i1);
+  return x;
 }
 #ifndef __floor
 weak_alias (__floor, floor)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_frexp.c b/libc/sysdeps/ieee754/dbl-64/s_frexp.c
index 516f561a1..1b8d8500b 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_frexp.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_frexp.c
@@ -28,25 +28,28 @@ static char rcsid[] = "$NetBSD: s_frexp.c,v 1.9 1995/05/10 20:47:24 jtc Exp $";
 #include <math_private.h>
 
 static const double
-two54 =  1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
+  two54 = 1.80143985094819840000e+16; /* 0x43500000, 0x00000000 */
 
-double __frexp(double x, int *eptr)
+double
+__frexp (double x, int *eptr)
 {
-	int32_t hx, ix, lx;
-	EXTRACT_WORDS(hx,lx,x);
-	ix = 0x7fffffff&hx;
-	*eptr = 0;
-	if(ix>=0x7ff00000||((ix|lx)==0)) return x;	/* 0,inf,nan */
-	if (ix<0x00100000) {		/* subnormal */
-	    x *= two54;
-	    GET_HIGH_WORD(hx,x);
-	    ix = hx&0x7fffffff;
-	    *eptr = -54;
-	}
-	*eptr += (ix>>20)-1022;
-	hx = (hx&0x800fffff)|0x3fe00000;
-	SET_HIGH_WORD(x,hx);
-	return x;
+  int32_t hx, ix, lx;
+  EXTRACT_WORDS (hx, lx, x);
+  ix = 0x7fffffff & hx;
+  *eptr = 0;
+  if (ix >= 0x7ff00000 || ((ix | lx) == 0))
+    return x;                                           /* 0,inf,nan */
+  if (ix < 0x00100000)                  /* subnormal */
+    {
+      x *= two54;
+      GET_HIGH_WORD (hx, x);
+      ix = hx & 0x7fffffff;
+      *eptr = -54;
+    }
+  *eptr += (ix >> 20) - 1022;
+  hx = (hx & 0x800fffff) | 0x3fe00000;
+  SET_HIGH_WORD (x, hx);
+  return x;
 }
 weak_alias (__frexp, frexp)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_isinf.c b/libc/sysdeps/ieee754/dbl-64/s_isinf.c
index 886b346f5..46a7266e7 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_isinf.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_isinf.c
@@ -19,11 +19,11 @@ static char rcsid[] = "$NetBSD: s_isinf.c,v 1.3 1995/05/11 23:20:14 jtc Exp $";
 int
 __isinf (double x)
 {
-	int32_t hx,lx;
-	EXTRACT_WORDS(hx,lx,x);
-	lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
-	lx |= -lx;
-	return ~(lx >> 31) & (hx >> 30);
+  int32_t hx, lx;
+  EXTRACT_WORDS (hx, lx, x);
+  lx |= (hx & 0x7fffffff) ^ 0x7ff00000;
+  lx |= -lx;
+  return ~(lx >> 31) & (hx >> 30);
 }
 hidden_def (__isinf)
 weak_alias (__isinf, isinf)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c b/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c
index 0ce50352c..d8142bcf6 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_isinf_ns.c
@@ -14,7 +14,7 @@
 int
 __isinf_ns (double x)
 {
-	int32_t hx,lx;
-	EXTRACT_WORDS(hx,lx,x);
-	return !(lx | ((hx & 0x7fffffff) ^ 0x7ff00000));
+  int32_t hx, lx;
+  EXTRACT_WORDS (hx, lx, x);
+  return !(lx | ((hx & 0x7fffffff) ^ 0x7ff00000));
 }
diff --git a/libc/sysdeps/ieee754/dbl-64/s_isnan.c b/libc/sysdeps/ieee754/dbl-64/s_isnan.c
index f8958dcbb..5d9f31be2 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_isnan.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_isnan.c
@@ -23,14 +23,15 @@ static char rcsid[] = "$NetBSD: s_isnan.c,v 1.8 1995/05/10 20:47:36 jtc Exp $";
 #include <math_private.h>
 
 #undef __isnan
-int __isnan(double x)
+int
+__isnan (double x)
 {
-	int32_t hx,lx;
-	EXTRACT_WORDS(hx,lx,x);
-	hx &= 0x7fffffff;
-	hx |= (u_int32_t)(lx|(-lx))>>31;
-	hx = 0x7ff00000 - hx;
-	return (int)(((u_int32_t)hx)>>31);
+  int32_t hx, lx;
+  EXTRACT_WORDS (hx, lx, x);
+  hx &= 0x7fffffff;
+  hx |= (u_int32_t) (lx | (-lx)) >> 31;
+  hx = 0x7ff00000 - hx;
+  return (int) (((u_int32_t) hx) >> 31);
 }
 hidden_def (__isnan)
 weak_alias (__isnan, isnan)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_llround.c b/libc/sysdeps/ieee754/dbl-64/s_llround.c
index e8c2232e9..7d50a57dd 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_llround.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_llround.c
@@ -66,7 +66,7 @@ __llround (double x)
   else
     {
       /* The number is too large.  It is left implementation defined
-	 what happens.  */
+         what happens.  */
       return (long long int) x;
     }
 
diff --git a/libc/sysdeps/ieee754/dbl-64/s_log1p.c b/libc/sysdeps/ieee754/dbl-64/s_log1p.c
index e3e686096..ea1dc6ca7 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_log1p.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_log1p.c
@@ -11,7 +11,7 @@
  */
 /* Modified by Naohiko Shimizu/Tokai University, Japan 1997/08/25,
    for performance improvement on pipelined processors.
-*/
+ */
 
 /* double log1p(double x)
  *
@@ -82,91 +82,112 @@
 #include <math_private.h>
 
 static const double
-ln2_hi  =  6.93147180369123816490e-01,	/* 3fe62e42 fee00000 */
-ln2_lo  =  1.90821492927058770002e-10,	/* 3dea39ef 35793c76 */
-two54   =  1.80143985094819840000e+16,  /* 43500000 00000000 */
-Lp[] = {0.0, 6.666666666666735130e-01,  /* 3FE55555 55555593 */
- 3.999999999940941908e-01,  /* 3FD99999 9997FA04 */
- 2.857142874366239149e-01,  /* 3FD24924 94229359 */
- 2.222219843214978396e-01,  /* 3FCC71C5 1D8E78AF */
- 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
- 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
- 1.479819860511658591e-01};  /* 3FC2F112 DF3E5244 */
+  ln2_hi = 6.93147180369123816490e-01,  /* 3fe62e42 fee00000 */
+  ln2_lo = 1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */
+  two54 = 1.80143985094819840000e+16,   /* 43500000 00000000 */
+  Lp[] = { 0.0, 6.666666666666735130e-01, /* 3FE55555 55555593 */
+	   3.999999999940941908e-01, /* 3FD99999 9997FA04 */
+	   2.857142874366239149e-01, /* 3FD24924 94229359 */
+	   2.222219843214978396e-01, /* 3FCC71C5 1D8E78AF */
+	   1.818357216161805012e-01, /* 3FC74664 96CB03DE */
+	   1.531383769920937332e-01, /* 3FC39A09 D078C69F */
+	   1.479819860511658591e-01 }; /* 3FC2F112 DF3E5244 */
 
 static const double zero = 0.0;
 
 double
-__log1p(double x)
+__log1p (double x)
 {
-	double hfsq,f,c,s,z,R,u,z2,z4,z6,R1,R2,R3,R4;
-	int32_t k,hx,hu,ax;
+  double hfsq, f, c, s, z, R, u, z2, z4, z6, R1, R2, R3, R4;
+  int32_t k, hx, hu, ax;
 
-	GET_HIGH_WORD(hx,x);
-	ax = hx&0x7fffffff;
+  GET_HIGH_WORD (hx, x);
+  ax = hx & 0x7fffffff;
 
-	k = 1;
-	if (hx < 0x3FDA827A) {			/* x < 0.41422  */
-	    if(__builtin_expect(ax>=0x3ff00000, 0)) { /* x <= -1.0 */
-		if(x==-1.0) return -two54/(x-x);/* log1p(-1)=+inf */
-		else return (x-x)/(x-x);	/* log1p(x<-1)=NaN */
-	    }
-	    if(__builtin_expect(ax<0x3e200000, 0)) { /* |x| < 2**-29 */
-		math_force_eval(two54+x);	/* raise inexact */
-		if (ax<0x3c900000)		/* |x| < 2**-54 */
-		    return x;
-		else
-		    return x - x*x*0.5;
-	    }
-	    if(hx>0||hx<=((int32_t)0xbfd2bec3)) {
-		k=0;f=x;hu=1;}	/* -0.2929<x<0.41422 */
+  k = 1;
+  if (hx < 0x3FDA827A)                          /* x < 0.41422  */
+    {
+      if (__builtin_expect (ax >= 0x3ff00000, 0))     /* x <= -1.0 */
+	{
+	  if (x == -1.0)
+	    return -two54 / (x - x);            /* log1p(-1)=+inf */
+	  else
+	    return (x - x) / (x - x);           /* log1p(x<-1)=NaN */
 	}
-	else if (__builtin_expect(hx >= 0x7ff00000, 0)) return x+x;
-	if(k!=0) {
-	    if(hx<0x43400000) {
-		u  = 1.0+x;
-		GET_HIGH_WORD(hu,u);
-		k  = (hu>>20)-1023;
-		c  = (k>0)? 1.0-(u-x):x-(u-1.0);/* correction term */
-		c /= u;
-	    } else {
-		u  = x;
-		GET_HIGH_WORD(hu,u);
-		k  = (hu>>20)-1023;
-		c  = 0;
-	    }
-	    hu &= 0x000fffff;
-	    if(hu<0x6a09e) {
-		SET_HIGH_WORD(u,hu|0x3ff00000);	/* normalize u */
-	    } else {
-		k += 1;
-		SET_HIGH_WORD(u,hu|0x3fe00000);	/* normalize u/2 */
-		hu = (0x00100000-hu)>>2;
-	    }
-	    f = u-1.0;
+      if (__builtin_expect (ax < 0x3e200000, 0))     /* |x| < 2**-29 */
+	{
+	  math_force_eval (two54 + x);          /* raise inexact */
+	  if (ax < 0x3c900000)                  /* |x| < 2**-54 */
+	    return x;
+	  else
+	    return x - x * x * 0.5;
 	}
-	hfsq=0.5*f*f;
-	if(hu==0) {	/* |f| < 2**-20 */
-	    if(f==zero) {
-	      if(k==0) return zero;
-			else {c += k*ln2_lo; return k*ln2_hi+c;}
+      if (hx > 0 || hx <= ((int32_t) 0xbfd2bec3))
+	{
+	  k = 0; f = x; hu = 1;
+	}                       /* -0.2929<x<0.41422 */
+    }
+  else if (__builtin_expect (hx >= 0x7ff00000, 0))
+    return x + x;
+  if (k != 0)
+    {
+      if (hx < 0x43400000)
+	{
+	  u = 1.0 + x;
+	  GET_HIGH_WORD (hu, u);
+	  k = (hu >> 20) - 1023;
+	  c = (k > 0) ? 1.0 - (u - x) : x - (u - 1.0); /* correction term */
+	  c /= u;
+	}
+      else
+	{
+	  u = x;
+	  GET_HIGH_WORD (hu, u);
+	  k = (hu >> 20) - 1023;
+	  c = 0;
+	}
+      hu &= 0x000fffff;
+      if (hu < 0x6a09e)
+	{
+	  SET_HIGH_WORD (u, hu | 0x3ff00000);   /* normalize u */
+	}
+      else
+	{
+	  k += 1;
+	  SET_HIGH_WORD (u, hu | 0x3fe00000);   /* normalize u/2 */
+	  hu = (0x00100000 - hu) >> 2;
+	}
+      f = u - 1.0;
+    }
+  hfsq = 0.5 * f * f;
+  if (hu == 0)          /* |f| < 2**-20 */
+    {
+      if (f == zero)
+	{
+	  if (k == 0)
+	    return zero;
+	  else
+	    {
+	      c += k * ln2_lo; return k * ln2_hi + c;
 	    }
-	    R = hfsq*(1.0-0.66666666666666666*f);
-	    if(k==0) return f-R; else
-		     return k*ln2_hi-((R-(k*ln2_lo+c))-f);
 	}
-	s = f/(2.0+f);
-	z = s*s;
-#ifdef DO_NOT_USE_THIS
-	R = z*(Lp1+z*(Lp2+z*(Lp3+z*(Lp4+z*(Lp5+z*(Lp6+z*Lp7))))));
-#else
-	R1 = z*Lp[1]; z2=z*z;
-	R2 = Lp[2]+z*Lp[3]; z4=z2*z2;
-	R3 = Lp[4]+z*Lp[5]; z6=z4*z2;
-	R4 = Lp[6]+z*Lp[7];
-	R = R1 + z2*R2 + z4*R3 + z6*R4;
-#endif
-	if(k==0) return f-(hfsq-s*(hfsq+R)); else
-		 return k*ln2_hi-((hfsq-(s*(hfsq+R)+(k*ln2_lo+c)))-f);
+      R = hfsq * (1.0 - 0.66666666666666666 * f);
+      if (k == 0)
+	return f - R;
+      else
+	return k * ln2_hi - ((R - (k * ln2_lo + c)) - f);
+    }
+  s = f / (2.0 + f);
+  z = s * s;
+  R1 = z * Lp[1]; z2 = z * z;
+  R2 = Lp[2] + z * Lp[3]; z4 = z2 * z2;
+  R3 = Lp[4] + z * Lp[5]; z6 = z4 * z2;
+  R4 = Lp[6] + z * Lp[7];
+  R = R1 + z2 * R2 + z4 * R3 + z6 * R4;
+  if (k == 0)
+    return f - (hfsq - s * (hfsq + R));
+  else
+    return k * ln2_hi - ((hfsq - (s * (hfsq + R) + (k * ln2_lo + c))) - f);
 }
 weak_alias (__log1p, log1p)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_logb.c b/libc/sysdeps/ieee754/dbl-64/s_logb.c
index 17aa94b75..c065826dd 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_logb.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_logb.c
@@ -25,7 +25,7 @@ __logb (double x)
   int32_t lx, ix, rix;
 
   EXTRACT_WORDS (ix, lx, x);
-  ix &= 0x7fffffff;		/* high |x| */
+  ix &= 0x7fffffff;             /* high |x| */
   if ((ix | lx) == 0)
     return -1.0 / fabs (x);
   if (ix >= 0x7ff00000)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_lrint.c b/libc/sysdeps/ieee754/dbl-64/s_lrint.c
index a68c97e59..1c3037364 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_lrint.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_lrint.c
@@ -33,7 +33,7 @@ long int
 __lrint (double x)
 {
   int32_t j0;
-  u_int32_t i0,i1;
+  u_int32_t i0, i1;
   volatile double w;
   double t;
   long int result;
diff --git a/libc/sysdeps/ieee754/dbl-64/s_modf.c b/libc/sysdeps/ieee754/dbl-64/s_modf.c
index b9911c1af..1dce6381a 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_modf.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_modf.c
@@ -25,45 +25,59 @@
 static const double one = 1.0;
 
 double
-__modf(double x, double *iptr)
+__modf (double x, double *iptr)
 {
-	int32_t i0,i1,j0;
-	u_int32_t i;
-	EXTRACT_WORDS(i0,i1,x);
-	j0 = ((i0>>20)&0x7ff)-0x3ff;	/* exponent of x */
-	if(j0<20) {			/* integer part in high x */
-	    if(j0<0) {			/* |x|<1 */
-		INSERT_WORDS(*iptr,i0&0x80000000,0);	/* *iptr = +-0 */
-		return x;
-	    } else {
-		i = (0x000fffff)>>j0;
-		if(((i0&i)|i1)==0) {		/* x is integral */
-		    *iptr = x;
-		    INSERT_WORDS(x,i0&0x80000000,0);	/* return +-0 */
-		    return x;
-		} else {
-		    INSERT_WORDS(*iptr,i0&(~i),0);
-		    return x - *iptr;
-		}
+  int32_t i0, i1, j0;
+  u_int32_t i;
+  EXTRACT_WORDS (i0, i1, x);
+  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;    /* exponent of x */
+  if (j0 < 20)                          /* integer part in high x */
+    {
+      if (j0 < 0)                       /* |x|<1 */
+	{
+	  INSERT_WORDS (*iptr, i0 & 0x80000000, 0);     /* *iptr = +-0 */
+	  return x;
+	}
+      else
+	{
+	  i = (0x000fffff) >> j0;
+	  if (((i0 & i) | i1) == 0)             /* x is integral */
+	    {
+	      *iptr = x;
+	      INSERT_WORDS (x, i0 & 0x80000000, 0);     /* return +-0 */
+	      return x;
 	    }
-	} else if (__builtin_expect(j0>51, 0)) { /* no fraction part */
-	    *iptr = x*one;
-	    /* We must handle NaNs separately.  */
-	    if (j0 == 0x400 && ((i0 & 0xfffff) | i1))
-	      return x*one;
-	    INSERT_WORDS(x,i0&0x80000000,0);	/* return +-0 */
-	    return x;
-	} else {			/* fraction part in low x */
-	    i = ((u_int32_t)(0xffffffff))>>(j0-20);
-	    if((i1&i)==0) { 		/* x is integral */
-		*iptr = x;
-		INSERT_WORDS(x,i0&0x80000000,0);	/* return +-0 */
-		return x;
-	    } else {
-		INSERT_WORDS(*iptr,i0,i1&(~i));
-		return x - *iptr;
+	  else
+	    {
+	      INSERT_WORDS (*iptr, i0 & (~i), 0);
+	      return x - *iptr;
 	    }
 	}
+    }
+  else if (__builtin_expect (j0 > 51, 0))        /* no fraction part */
+    {
+      *iptr = x * one;
+      /* We must handle NaNs separately.  */
+      if (j0 == 0x400 && ((i0 & 0xfffff) | i1))
+	return x * one;
+      INSERT_WORDS (x, i0 & 0x80000000, 0);     /* return +-0 */
+      return x;
+    }
+  else                                  /* fraction part in low x */
+    {
+      i = ((u_int32_t) (0xffffffff)) >> (j0 - 20);
+      if ((i1 & i) == 0)                /* x is integral */
+	{
+	  *iptr = x;
+	  INSERT_WORDS (x, i0 & 0x80000000, 0);         /* return +-0 */
+	  return x;
+	}
+      else
+	{
+	  INSERT_WORDS (*iptr, i0, i1 & (~i));
+	  return x - *iptr;
+	}
+    }
 }
 weak_alias (__modf, modf)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c b/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c
index 5017f471d..dec0c5d6e 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c
@@ -29,40 +29,47 @@ static char rcsid[] = "$NetBSD: s_rint.c,v 1.8 1995/05/10 20:48:04 jtc Exp $";
 #include <math_private.h>
 
 static const double
-TWO52[2]={
+  TWO52[2] = {
   4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
  -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
 };
 
-double __nearbyint(double x)
+double
+__nearbyint (double x)
 {
-	fenv_t env;
-	int32_t i0,j0,sx;
-	double w,t;
-	GET_HIGH_WORD(i0,x);
-	sx = (i0>>31)&1;
-	j0 = ((i0>>20)&0x7ff)-0x3ff;
-	if(j0<52) {
-	    if(j0<0) {
-		libc_feholdexcept (&env);
-	        w = TWO52[sx]+x;
-	        t =  w-TWO52[sx];
-		math_force_eval (t);
-		libc_fesetenv (&env);
-		GET_HIGH_WORD(i0,t);
-		SET_HIGH_WORD(t,(i0&0x7fffffff)|(sx<<31));
-	        return t;
-	    }
-	} else {
-	    if(j0==0x400) return x+x;	/* inf or NaN */
-	    else return x;		/* x is integral */
+  fenv_t env;
+  int32_t i0, j0, sx;
+  double w, t;
+  GET_HIGH_WORD (i0, x);
+  sx = (i0 >> 31) & 1;
+  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+  if (j0 < 52)
+    {
+      if (j0 < 0)
+	{
+	  libc_feholdexcept (&env);
+	  w = TWO52[sx] + x;
+	  t = w - TWO52[sx];
+	  math_force_eval (t);
+	  libc_fesetenv (&env);
+	  GET_HIGH_WORD (i0, t);
+	  SET_HIGH_WORD (t, (i0 & 0x7fffffff) | (sx << 31));
+	  return t;
 	}
-	libc_feholdexcept (&env);
-	w = TWO52[sx]+x;
-	t = w-TWO52[sx];
-	math_force_eval (t);
-	libc_fesetenv (&env);
-	return t;
+    }
+  else
+    {
+      if (j0 == 0x400)
+	return x + x;                   /* inf or NaN */
+      else
+	return x;                       /* x is integral */
+    }
+  libc_feholdexcept (&env);
+  w = TWO52[sx] + x;
+  t = w - TWO52[sx];
+  math_force_eval (t);
+  libc_fesetenv (&env);
+  return t;
 }
 weak_alias (__nearbyint, nearbyint)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_remquo.c b/libc/sysdeps/ieee754/dbl-64/s_remquo.c
index 642581ede..c40615004 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_remquo.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_remquo.c
@@ -28,8 +28,8 @@ static const double zero = 0.0;
 double
 __remquo (double x, double y, int *quo)
 {
-  int32_t hx,hy;
-  u_int32_t sx,lx,ly;
+  int32_t hx, hy;
+  u_int32_t sx, lx, ly;
   int cquo, qs;
 
   EXTRACT_WORDS (hx, lx, x);
@@ -41,14 +41,14 @@ __remquo (double x, double y, int *quo)
 
   /* Purge off exception values.  */
   if ((hy | ly) == 0)
-    return (x * y) / (x * y); 			/* y = 0 */
-  if ((hx >= 0x7ff00000)			/* x not finite */
-      || ((hy >= 0x7ff00000)			/* p is NaN */
+    return (x * y) / (x * y);                   /* y = 0 */
+  if ((hx >= 0x7ff00000)                        /* x not finite */
+      || ((hy >= 0x7ff00000)                    /* p is NaN */
 	  && (((hy - 0x7ff00000) | ly) != 0)))
     return (x * y) / (x * y);
 
   if (hy <= 0x7fbfffff)
-    x = __ieee754_fmod (x, 8 * y);		/* now x < 8y */
+    x = __ieee754_fmod (x, 8 * y);              /* now x < 8y */
 
   if (((hx - hy) | (lx - ly)) == 0)
     {
@@ -56,8 +56,8 @@ __remquo (double x, double y, int *quo)
       return zero * x;
     }
 
-  x  = fabs (x);
-  y  = fabs (y);
+  x = fabs (x);
+  y = fabs (y);
   cquo = 0;
 
   if (x >= 4 * y)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_rint.c b/libc/sysdeps/ieee754/dbl-64/s_rint.c
index 845890916..a9c0d2784 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_rint.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_rint.c
@@ -24,33 +24,39 @@
 #include <math_private.h>
 
 static const double
-TWO52[2]={
+  TWO52[2] = {
   4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
  -4.50359962737049600000e+15, /* 0xC3300000, 0x00000000 */
 };
 
 double
-__rint(double x)
+__rint (double x)
 {
-	int32_t i0,j0,sx;
-	double w,t;
-	GET_HIGH_WORD(i0,x);
-	sx = (i0>>31)&1;
-	j0 = ((i0>>20)&0x7ff)-0x3ff;
-	if(j0<52) {
-	    if(j0<0) {
-		w = TWO52[sx]+x;
-		t =  w-TWO52[sx];
-		GET_HIGH_WORD(i0,t);
-		SET_HIGH_WORD(t,(i0&0x7fffffff)|(sx<<31));
-		return t;
-	    }
-	} else {
-	    if(j0==0x400) return x+x;	/* inf or NaN */
-	    else return x;		/* x is integral */
+  int32_t i0, j0, sx;
+  double w, t;
+  GET_HIGH_WORD (i0, x);
+  sx = (i0 >> 31) & 1;
+  j0 = ((i0 >> 20) & 0x7ff) - 0x3ff;
+  if (j0 < 52)
+    {
+      if (j0 < 0)
+	{
+	  w = TWO52[sx] + x;
+	  t = w - TWO52[sx];
+	  GET_HIGH_WORD (i0, t);
+	  SET_HIGH_WORD (t, (i0 & 0x7fffffff) | (sx << 31));
+	  return t;
 	}
-	w = TWO52[sx]+x;
-	return w-TWO52[sx];
+    }
+  else
+    {
+      if (j0 == 0x400)
+	return x + x;                   /* inf or NaN */
+      else
+	return x;                       /* x is integral */
+    }
+  w = TWO52[sx] + x;
+  return w - TWO52[sx];
 }
 #ifndef __rint
 weak_alias (__rint, rint)
diff --git a/libc/sysdeps/ieee754/dbl-64/s_scalbln.c b/libc/sysdeps/ieee754/dbl-64/s_scalbln.c
index 271a24c3e..6402927d2 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_scalbln.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_scalbln.c
@@ -20,38 +20,43 @@
 #include <math_private.h>
 
 static const double
-two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-huge   = 1.0e+300,
-tiny   = 1.0e-300;
+  two54 = 1.80143985094819840000e+16,  /* 0x43500000, 0x00000000 */
+  twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
+  huge = 1.0e+300,
+  tiny = 1.0e-300;
 
 double
 __scalbln (double x, long int n)
 {
-	int32_t k,hx,lx;
-	EXTRACT_WORDS(hx,lx,x);
-	k = (hx&0x7ff00000)>>20;		/* extract exponent */
-	if (__builtin_expect(k==0, 0)) {	/* 0 or subnormal x */
-	    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
-	    x *= two54;
-	    GET_HIGH_WORD(hx,x);
-	    k = ((hx&0x7ff00000)>>20) - 54;
-	    }
-	if (__builtin_expect(k==0x7ff, 0)) return x+x;	/* NaN or Inf */
-	if (__builtin_expect(n< -50000, 0))
-	  return tiny*__copysign(tiny,x); /*underflow*/
-	if (__builtin_expect(n> 50000 || k+n > 0x7fe, 0))
-	  return huge*__copysign(huge,x); /* overflow  */
-	/* Now k and n are bounded we know that k = k+n does not
-	   overflow.  */
-	k = k+n;
-	if (__builtin_expect(k > 0, 1))		/* normal result */
-	    {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
-	if (k <= -54)
-	  return tiny*__copysign(tiny,x); 	/*underflow*/
-	k += 54;				/* subnormal result */
-	SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
-	return x*twom54;
+  int32_t k, hx, lx;
+  EXTRACT_WORDS (hx, lx, x);
+  k = (hx & 0x7ff00000) >> 20;                  /* extract exponent */
+  if (__builtin_expect (k == 0, 0))             /* 0 or subnormal x */
+    {
+      if ((lx | (hx & 0x7fffffff)) == 0)
+	return x;                                  /* +-0 */
+      x *= two54;
+      GET_HIGH_WORD (hx, x);
+      k = ((hx & 0x7ff00000) >> 20) - 54;
+    }
+  if (__builtin_expect (k == 0x7ff, 0))
+    return x + x;                                       /* NaN or Inf */
+  if (__builtin_expect (n < -50000, 0))
+    return tiny * __copysign (tiny, x);   /*underflow*/
+  if (__builtin_expect (n > 50000 || k + n > 0x7fe, 0))
+    return huge * __copysign (huge, x);   /* overflow  */
+  /* Now k and n are bounded we know that k = k+n does not
+     overflow.  */
+  k = k + n;
+  if (__builtin_expect (k > 0, 1))              /* normal result */
+    {
+      SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20)); return x;
+    }
+  if (k <= -54)
+    return tiny * __copysign (tiny, x);         /*underflow*/
+  k += 54;                                      /* subnormal result */
+  SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20));
+  return x * twom54;
 }
 weak_alias (__scalbln, scalbln)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_scalbn.c b/libc/sysdeps/ieee754/dbl-64/s_scalbn.c
index 1f302557e..6e7d5ad21 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_scalbn.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_scalbn.c
@@ -20,38 +20,43 @@
 #include <math_private.h>
 
 static const double
-two54   =  1.80143985094819840000e+16, /* 0x43500000, 0x00000000 */
-twom54  =  5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
-huge   = 1.0e+300,
-tiny   = 1.0e-300;
+  two54 = 1.80143985094819840000e+16,  /* 0x43500000, 0x00000000 */
+  twom54 = 5.55111512312578270212e-17, /* 0x3C900000, 0x00000000 */
+  huge = 1.0e+300,
+  tiny = 1.0e-300;
 
 double
 __scalbn (double x, int n)
 {
-	int32_t k,hx,lx;
-	EXTRACT_WORDS(hx,lx,x);
-	k = (hx&0x7ff00000)>>20;		/* extract exponent */
-	if (__builtin_expect(k==0, 0)) {	/* 0 or subnormal x */
-	    if ((lx|(hx&0x7fffffff))==0) return x; /* +-0 */
-	    x *= two54;
-	    GET_HIGH_WORD(hx,x);
-	    k = ((hx&0x7ff00000)>>20) - 54;
-	    }
-	if (__builtin_expect(k==0x7ff, 0)) return x+x;	/* NaN or Inf */
-	if (__builtin_expect(n< -50000, 0))
-	  return tiny*__copysign(tiny,x); /*underflow*/
-	if (__builtin_expect(n> 50000 || k+n > 0x7fe, 0))
-	  return huge*__copysign(huge,x); /* overflow  */
-	/* Now k and n are bounded we know that k = k+n does not
-	   overflow.  */
-	k = k+n;
-	if (__builtin_expect(k > 0, 1))		/* normal result */
-	    {SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20)); return x;}
-	if (k <= -54)
-	  return tiny*__copysign(tiny,x); 	/*underflow*/
-	k += 54;				/* subnormal result */
-	SET_HIGH_WORD(x,(hx&0x800fffff)|(k<<20));
-	return x*twom54;
+  int32_t k, hx, lx;
+  EXTRACT_WORDS (hx, lx, x);
+  k = (hx & 0x7ff00000) >> 20;                  /* extract exponent */
+  if (__builtin_expect (k == 0, 0))             /* 0 or subnormal x */
+    {
+      if ((lx | (hx & 0x7fffffff)) == 0)
+	return x;                                  /* +-0 */
+      x *= two54;
+      GET_HIGH_WORD (hx, x);
+      k = ((hx & 0x7ff00000) >> 20) - 54;
+    }
+  if (__builtin_expect (k == 0x7ff, 0))
+    return x + x;                                       /* NaN or Inf */
+  if (__builtin_expect (n < -50000, 0))
+    return tiny * __copysign (tiny, x);   /*underflow*/
+  if (__builtin_expect (n > 50000 || k + n > 0x7fe, 0))
+    return huge * __copysign (huge, x);   /* overflow  */
+  /* Now k and n are bounded we know that k = k+n does not
+     overflow.  */
+  k = k + n;
+  if (__builtin_expect (k > 0, 1))              /* normal result */
+    {
+      SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20)); return x;
+    }
+  if (k <= -54)
+    return tiny * __copysign (tiny, x);         /*underflow*/
+  k += 54;                                      /* subnormal result */
+  SET_HIGH_WORD (x, (hx & 0x800fffff) | (k << 20));
+  return x * twom54;
 }
 weak_alias (__scalbn, scalbn)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/s_sin.c b/libc/sysdeps/ieee754/dbl-64/s_sin.c
index 5c388c8b9..53eef6002 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_sin.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_sin.c
@@ -55,6 +55,59 @@
 #include <math_private.h>
 #include <fenv.h>
 
+/* Helper macros to compute sin of the input values.  */
+#define POLYNOMIAL2(xx) ((((s5.x * (xx) + s4.x) * (xx) + s3.x) * (xx) + s2.x) \
+			 * (xx))
+
+#define POLYNOMIAL(xx) (POLYNOMIAL2 (xx) + s1.x)
+
+/* The computed polynomial is a variation of the Taylor series expansion for
+   sin(a):
+
+   a - a^3/3! + a^5/5! - a^7/7! + a^9/9! + (1 - a^2) * da / 2
+
+   The constants s1, s2, s3, etc. are pre-computed values of 1/3!, 1/5! and so
+   on.  The result is returned to LHS and correction in COR.  */
+#define TAYLOR_SINCOS(xx, a, da, cor) \
+({									      \
+  double t = ((POLYNOMIAL (xx)  * (a) - 0.5 * (da))  * (xx) + (da));	      \
+  double res = (a) + t;							      \
+  (cor) = ((a) - res) + t;						      \
+  res;									      \
+})
+
+/* This is again a variation of the Taylor series expansion with the term
+   x^3/3! expanded into the following for better accuracy:
+
+   bb * x ^ 3 + 3 * aa * x * x1 * x2 + aa * x1 ^ 3 + aa * x2 ^ 3
+
+   The correction term is dx and bb + aa = -1/3!
+   */
+#define TAYLOR_SLOW(x0, dx, cor) \
+({									      \
+  static const double th2_36 = 206158430208.0;	/*    1.5*2**37   */	      \
+  double xx = (x0) * (x0);						      \
+  double x1 = ((x0) + th2_36) - th2_36;					      \
+  double y = aa.x * x1 * x1 * x1;					      \
+  double r = (x0) + y;							      \
+  double x2 = ((x0) - x1) + (dx);					      \
+  double t = (((POLYNOMIAL2 (xx) + bb.x) * xx + 3.0 * aa.x * x1 * x2)	      \
+	      * (x0)  + aa.x * x2 * x2 * x2 + (dx));			      \
+  t = (((x0) - r) + y) + t;						      \
+  double res = r + t;							      \
+  (cor) = (r - res) + t;						      \
+  res;									      \
+})
+
+#define SINCOS_TABLE_LOOKUP(u, sn, ssn, cs, ccs) \
+({									      \
+  int4 k = u.i[LOW_HALF] << 2;						      \
+  sn = __sincostab.x[k];						      \
+  ssn = __sincostab.x[k + 1];						      \
+  cs = __sincostab.x[k + 2];						      \
+  ccs = __sincostab.x[k + 3];						      \
+})
+
 #ifndef SECTION
 # define SECTION
 #endif
@@ -74,10 +127,8 @@ static const double
 
 void __dubsin (double x, double dx, double w[]);
 void __docos (double x, double dx, double w[]);
-double __mpsin (double x, double dx);
-double __mpcos (double x, double dx);
-double __mpsin1 (double x);
-double __mpcos1 (double x);
+double __mpsin (double x, double dx, bool reduce_range);
+double __mpcos (double x, double dx, bool reduce_range);
 static double slow (double x);
 static double slow1 (double x);
 static double slow2 (double x);
@@ -93,6 +144,39 @@ static double csloww (double x, double dx, double orig);
 static double csloww1 (double x, double dx, double orig);
 static double csloww2 (double x, double dx, double orig, int n);
 
+/* Reduce range of X and compute sin of a + da.  K is the amount by which to
+   rotate the quadrants.  This allows us to use the same routine to compute cos
+   by simply rotating the quadrants by 1.  */
+static inline double
+__always_inline
+reduce_and_compute (double x, double a, double da, unsigned int k)
+{
+  double retval = 0;
+  unsigned int n = __branred (x, &a, &da);
+  k = (n + k) % 4;
+  switch (k)
+    {
+      case 0:
+	if (a * a < 0.01588)
+	  retval = bsloww (a, da, x, n);
+	else
+	  retval = bsloww1 (a, da, x, n);
+	break;
+      case 2:
+	if (a * a < 0.01588)
+	  retval = bsloww (-a, -da, x, n);
+	else
+	  retval = bsloww1 (-a, -da, x, n);
+	break;
+
+      case 1:
+      case 3:
+	retval = bsloww2 (a, da, x, n);
+	break;
+    }
+  return retval;
+}
+
 /*******************************************************************/
 /* An ultimate sin routine. Given an IEEE double machine number x   */
 /* it computes the correctly rounded (to nearest) value of sin(x)  */
@@ -113,21 +197,16 @@ __sin (double x)
   m = u.i[HIGH_HALF];
   k = 0x7fffffff & m;		/* no sign           */
   if (k < 0x3e500000)		/* if x->0 =>sin(x)=x */
-    {
-      retval = x;
-      goto ret;
-    }
+    retval = x;
  /*---------------------------- 2^-26 < |x|< 0.25 ----------------------*/
   else if (k < 0x3fd00000)
     {
       xx = x * x;
-      /*Taylor series.  */
-      t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + s1.x)
-	   * (xx * x));
+      /* Taylor series.  */
+      t = POLYNOMIAL (xx) * (xx * x);
       res = x + t;
       cor = (x - res) + t;
       retval = (res == res + 1.07 * cor) ? res : slow (x);
-      goto ret;
     }				/*  else  if (k < 0x3fd00000)    */
 /*---------------------------- 0.25<|x|< 0.855469---------------------- */
   else if (k < 0x3feb6000)
@@ -137,16 +216,16 @@ __sin (double x)
       xx = y * y;
       s = y + y * xx * (sn3 + xx * sn5);
       c = xx * (cs2 + xx * (cs4 + xx * cs6));
-      k = u.i[LOW_HALF] << 2;
-      sn = (m > 0) ? __sincostab.x[k] : -__sincostab.x[k];
-      ssn = (m > 0) ? __sincostab.x[k + 1] : -__sincostab.x[k + 1];
-      cs = __sincostab.x[k + 2];
-      ccs = __sincostab.x[k + 3];
+      SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
+      if (m <= 0)
+        {
+          sn = -sn;
+	  ssn = -ssn;
+	}
       cor = (ssn + s * ccs - sn * c) + cs * s;
       res = sn + cor;
       cor = (sn - res) + cor;
       retval = (res == res + 1.096 * cor) ? res : slow1 (x);
-      goto ret;
     }				/*   else  if (k < 0x3feb6000)    */
 
 /*----------------------- 0.855469  <|x|<2.426265  ----------------------*/
@@ -167,16 +246,11 @@ __sin (double x)
       xx = y * y;
       s = y + y * xx * (sn3 + xx * sn5);
       c = xx * (cs2 + xx * (cs4 + xx * cs6));
-      k = u.i[LOW_HALF] << 2;
-      sn = __sincostab.x[k];
-      ssn = __sincostab.x[k + 1];
-      cs = __sincostab.x[k + 2];
-      ccs = __sincostab.x[k + 3];
+      SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
       cor = (ccs - s * ssn - cs * c) - sn * s;
       res = cs + cor;
       cor = (cs - res) + cor;
       retval = (res == res + 1.020 * cor) ? ((m > 0) ? res : -res) : slow2 (x);
-      goto ret;
     }				/*   else  if (k < 0x400368fd)    */
 
 /*-------------------------- 2.426265<|x|< 105414350 ----------------------*/
@@ -204,14 +278,10 @@ __sin (double x)
 	    }
 	  if (xx < 0.01588)
 	    {
-	      /*Taylor series */
-	      t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
-		    + s1.x) * a - 0.5 * da) * xx + da;
-	      res = a + t;
-	      cor = (a - res) + t;
+	      /* Taylor series.  */
+	      res = TAYLOR_SINCOS (xx, a, da, cor);
 	      cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
 	      retval = (res == res + cor) ? res : sloww (a, da, x);
-	      goto ret;
 	    }
 	  else
 	    {
@@ -232,18 +302,13 @@ __sin (double x)
 	      xx = y * y;
 	      s = y + (db + y * xx * (sn3 + xx * sn5));
 	      c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
-	      k = u.i[LOW_HALF] << 2;
-	      sn = __sincostab.x[k];
-	      ssn = __sincostab.x[k + 1];
-	      cs = __sincostab.x[k + 2];
-	      ccs = __sincostab.x[k + 3];
+	      SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	      cor = (ssn + s * ccs - sn * c) + cs * s;
 	      res = sn + cor;
 	      cor = (sn - res) + cor;
 	      cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
 	      retval = ((res == res + cor) ? ((m) ? res : -res)
 			: sloww1 (a, da, x));
-	      goto ret;
 	    }
 	  break;
 
@@ -257,11 +322,7 @@ __sin (double x)
 	  u.x = big.x + a;
 	  y = a - (u.x - big.x) + da;
 	  xx = y * y;
-	  k = u.i[LOW_HALF] << 2;
-	  sn = __sincostab.x[k];
-	  ssn = __sincostab.x[k + 1];
-	  cs = __sincostab.x[k + 2];
-	  ccs = __sincostab.x[k + 3];
+	  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	  s = y + y * xx * (sn3 + xx * sn5);
 	  c = xx * (cs2 + xx * (cs4 + xx * cs6));
 	  cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -270,11 +331,8 @@ __sin (double x)
 	  cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
 	  retval = ((res == res + cor) ? ((n & 2) ? -res : res)
 		    : sloww2 (a, da, x, n));
-	  goto ret;
-
 	  break;
 	}
-
     }				/*   else  if (k <  0x419921FB )    */
 
 /*---------------------105414350 <|x|< 281474976710656 --------------------*/
@@ -307,14 +365,10 @@ __sin (double x)
 	    }
 	  if (xx < 0.01588)
 	    {
-	      /* Taylor series */
-	      t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
-		    + s1.x) * a - 0.5 * da) * xx + da;
-	      res = a + t;
-	      cor = (a - res) + t;
+	      /* Taylor series.  */
+	      res = TAYLOR_SINCOS (xx, a, da, cor);
 	      cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
 	      retval = (res == res + cor) ? res : bsloww (a, da, x, n);
-	      goto ret;
 	    }
 	  else
 	    {
@@ -335,18 +389,13 @@ __sin (double x)
 	      xx = y * y;
 	      s = y + (db + y * xx * (sn3 + xx * sn5));
 	      c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
-	      k = u.i[LOW_HALF] << 2;
-	      sn = __sincostab.x[k];
-	      ssn = __sincostab.x[k + 1];
-	      cs = __sincostab.x[k + 2];
-	      ccs = __sincostab.x[k + 3];
+	      SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	      cor = (ssn + s * ccs - sn * c) + cs * s;
 	      res = sn + cor;
 	      cor = (sn - res) + cor;
 	      cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
 	      retval = ((res == res + cor) ? ((m) ? res : -res)
 			: bsloww1 (a, da, x, n));
-	      goto ret;
 	    }
 	  break;
 
@@ -360,11 +409,7 @@ __sin (double x)
 	  u.x = big.x + a;
 	  y = a - (u.x - big.x) + da;
 	  xx = y * y;
-	  k = u.i[LOW_HALF] << 2;
-	  sn = __sincostab.x[k];
-	  ssn = __sincostab.x[k + 1];
-	  cs = __sincostab.x[k + 2];
-	  ccs = __sincostab.x[k + 3];
+	  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	  s = y + y * xx * (sn3 + xx * sn5);
 	  c = xx * (cs2 + xx * (cs4 + xx * cs6));
 	  cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -373,40 +418,13 @@ __sin (double x)
 	  cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
 	  retval = ((res == res + cor) ? ((n & 2) ? -res : res)
 		    : bsloww2 (a, da, x, n));
-	  goto ret;
-
 	  break;
 	}
     }				/*   else  if (k <  0x42F00000 )   */
 
 /* -----------------281474976710656 <|x| <2^1024----------------------------*/
   else if (k < 0x7ff00000)
-    {
-      n = __branred (x, &a, &da);
-      switch (n)
-	{
-	case 0:
-	  if (a * a < 0.01588)
-	    retval = bsloww (a, da, x, n);
-	  else
-	    retval = bsloww1 (a, da, x, n);
-	  goto ret;
-	  break;
-	case 2:
-	  if (a * a < 0.01588)
-	    retval = bsloww (-a, -da, x, n);
-	  else
-	    retval = bsloww1 (-a, -da, x, n);
-	  goto ret;
-	  break;
-
-	case 1:
-	case 3:
-	  retval = bsloww2 (a, da, x, n);
-	  goto ret;
-	  break;
-	}
-    }				/*   else  if (k <  0x7ff00000 )    */
+    retval = reduce_and_compute (x, a, da, 0);
 
 /*--------------------- |x| > 2^1024 ----------------------------------*/
   else
@@ -414,10 +432,8 @@ __sin (double x)
       if (k == 0x7ff00000 && u.i[LOW_HALF] == 0)
 	__set_errno (EDOM);
       retval = x / x;
-      goto ret;
     }
 
-ret:
   return retval;
 }
 
@@ -444,11 +460,9 @@ __cos (double x)
   m = u.i[HIGH_HALF];
   k = 0x7fffffff & m;
 
+  /* |x|<2^-27 => cos(x)=1 */
   if (k < 0x3e400000)
-    {
-      retval = 1.0;
-      goto ret;
-    }				/* |x|<2^-27 => cos(x)=1 */
+    retval = 1.0;
 
   else if (k < 0x3feb6000)
     {				/* 2^-27 < |x| < 0.855469 */
@@ -458,16 +472,11 @@ __cos (double x)
       xx = y * y;
       s = y + y * xx * (sn3 + xx * sn5);
       c = xx * (cs2 + xx * (cs4 + xx * cs6));
-      k = u.i[LOW_HALF] << 2;
-      sn = __sincostab.x[k];
-      ssn = __sincostab.x[k + 1];
-      cs = __sincostab.x[k + 2];
-      ccs = __sincostab.x[k + 3];
+      SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
       cor = (ccs - s * ssn - cs * c) - sn * s;
       res = cs + cor;
       cor = (cs - res) + cor;
       retval = (res == res + 1.020 * cor) ? res : cslow2 (x);
-      goto ret;
     }				/*   else  if (k < 0x3feb6000)    */
 
   else if (k < 0x400368fd)
@@ -478,13 +487,9 @@ __cos (double x)
       xx = a * a;
       if (xx < 0.01588)
 	{
-	  t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + s1.x)
-	       * a - 0.5 * da) * xx + da;
-	  res = a + t;
-	  cor = (a - res) + t;
+	  res = TAYLOR_SINCOS (xx, a, da, cor);
 	  cor = (cor > 0) ? 1.02 * cor + 1.0e-31 : 1.02 * cor - 1.0e-31;
 	  retval = (res == res + cor) ? res : csloww (a, da, x);
-	  goto ret;
 	}
       else
 	{
@@ -505,18 +510,13 @@ __cos (double x)
 	  xx = y * y;
 	  s = y + (db + y * xx * (sn3 + xx * sn5));
 	  c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
-	  k = u.i[LOW_HALF] << 2;
-	  sn = __sincostab.x[k];
-	  ssn = __sincostab.x[k + 1];
-	  cs = __sincostab.x[k + 2];
-	  ccs = __sincostab.x[k + 3];
+	  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	  cor = (ssn + s * ccs - sn * c) + cs * s;
 	  res = sn + cor;
 	  cor = (sn - res) + cor;
 	  cor = (cor > 0) ? 1.035 * cor + 1.0e-31 : 1.035 * cor - 1.0e-31;
 	  retval = ((res == res + cor) ? ((m) ? res : -res)
 		    : csloww1 (a, da, x));
-	  goto ret;
 	}
 
     }				/*   else  if (k < 0x400368fd)    */
@@ -546,13 +546,9 @@ __cos (double x)
 	    }
 	  if (xx < 0.01588)
 	    {
-	      t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
-		    + s1.x) * a - 0.5 * da) * xx + da;
-	      res = a + t;
-	      cor = (a - res) + t;
+	      res = TAYLOR_SINCOS (xx, a, da, cor);
 	      cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
 	      retval = (res == res + cor) ? res : csloww (a, da, x);
-	      goto ret;
 	    }
 	  else
 	    {
@@ -573,18 +569,13 @@ __cos (double x)
 	      xx = y * y;
 	      s = y + (db + y * xx * (sn3 + xx * sn5));
 	      c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
-	      k = u.i[LOW_HALF] << 2;
-	      sn = __sincostab.x[k];
-	      ssn = __sincostab.x[k + 1];
-	      cs = __sincostab.x[k + 2];
-	      ccs = __sincostab.x[k + 3];
+	      SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	      cor = (ssn + s * ccs - sn * c) + cs * s;
 	      res = sn + cor;
 	      cor = (sn - res) + cor;
 	      cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
 	      retval = ((res == res + cor) ? ((m) ? res : -res)
 			: csloww1 (a, da, x));
-	      goto ret;
 	    }
 	  break;
 
@@ -598,11 +589,7 @@ __cos (double x)
 	  u.x = big.x + a;
 	  y = a - (u.x - big.x) + da;
 	  xx = y * y;
-	  k = u.i[LOW_HALF] << 2;
-	  sn = __sincostab.x[k];
-	  ssn = __sincostab.x[k + 1];
-	  cs = __sincostab.x[k + 2];
-	  ccs = __sincostab.x[k + 3];
+	  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	  s = y + y * xx * (sn3 + xx * sn5);
 	  c = xx * (cs2 + xx * (cs4 + xx * cs6));
 	  cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -611,8 +598,6 @@ __cos (double x)
 	  cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
 	  retval = ((res == res + cor) ? ((n) ? -res : res)
 		    : csloww2 (a, da, x, n));
-	  goto ret;
-
 	  break;
 	}
     }				/*   else  if (k <  0x419921FB )    */
@@ -646,13 +631,9 @@ __cos (double x)
 	    }
 	  if (xx < 0.01588)
 	    {
-	      t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx
-		    + s1.x) * a - 0.5 * da) * xx + da;
-	      res = a + t;
-	      cor = (a - res) + t;
+	      res = TAYLOR_SINCOS (xx, a, da, cor);
 	      cor = (cor > 0) ? 1.02 * cor + eps : 1.02 * cor - eps;
 	      retval = (res == res + cor) ? res : bsloww (a, da, x, n);
-	      goto ret;
 	    }
 	  else
 	    {
@@ -673,18 +654,13 @@ __cos (double x)
 	      xx = y * y;
 	      s = y + (db + y * xx * (sn3 + xx * sn5));
 	      c = y * db + xx * (cs2 + xx * (cs4 + xx * cs6));
-	      k = u.i[LOW_HALF] << 2;
-	      sn = __sincostab.x[k];
-	      ssn = __sincostab.x[k + 1];
-	      cs = __sincostab.x[k + 2];
-	      ccs = __sincostab.x[k + 3];
+	      SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	      cor = (ssn + s * ccs - sn * c) + cs * s;
 	      res = sn + cor;
 	      cor = (sn - res) + cor;
 	      cor = (cor > 0) ? 1.035 * cor + eps : 1.035 * cor - eps;
 	      retval = ((res == res + cor) ? ((m) ? res : -res)
 			: bsloww1 (a, da, x, n));
-	      goto ret;
 	    }
 	  break;
 
@@ -698,11 +674,7 @@ __cos (double x)
 	  u.x = big.x + a;
 	  y = a - (u.x - big.x) + da;
 	  xx = y * y;
-	  k = u.i[LOW_HALF] << 2;
-	  sn = __sincostab.x[k];
-	  ssn = __sincostab.x[k + 1];
-	  cs = __sincostab.x[k + 2];
-	  ccs = __sincostab.x[k + 3];
+	  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 	  s = y + y * xx * (sn3 + xx * sn5);
 	  c = xx * (cs2 + xx * (cs4 + xx * cs6));
 	  cor = (ccs - s * ssn - cs * c) - sn * s;
@@ -711,49 +683,21 @@ __cos (double x)
 	  cor = (cor > 0) ? 1.025 * cor + eps : 1.025 * cor - eps;
 	  retval = ((res == res + cor) ? ((n) ? -res : res)
 		    : bsloww2 (a, da, x, n));
-	  goto ret;
 	  break;
 	}
     }				/*   else  if (k <  0x42F00000 )    */
 
+  /* 281474976710656 <|x| <2^1024 */
   else if (k < 0x7ff00000)
-    {				/* 281474976710656 <|x| <2^1024 */
-
-      n = __branred (x, &a, &da);
-      switch (n)
-	{
-	case 1:
-	  if (a * a < 0.01588)
-	    retval = bsloww (-a, -da, x, n);
-	  else
-	    retval = bsloww1 (-a, -da, x, n);
-	  goto ret;
-	  break;
-	case 3:
-	  if (a * a < 0.01588)
-	    retval = bsloww (a, da, x, n);
-	  else
-	    retval = bsloww1 (a, da, x, n);
-	  goto ret;
-	  break;
-
-	case 0:
-	case 2:
-	  retval = bsloww2 (a, da, x, n);
-	  goto ret;
-	  break;
-	}
-    }				/*   else  if (k <  0x7ff00000 )    */
+    retval = reduce_and_compute (x, a, da, 1);
 
   else
     {
       if (k == 0x7ff00000 && u.i[LOW_HALF] == 0)
 	__set_errno (EDOM);
       retval = x / x;		/* |x| > 2^1024 */
-      goto ret;
     }
 
-ret:
   return retval;
 }
 
@@ -766,18 +710,8 @@ static double
 SECTION
 slow (double x)
 {
-  static const double th2_36 = 206158430208.0;	/*    1.5*2**37   */
-  double y, x1, x2, xx, r, t, res, cor, w[2];
-  x1 = (x + th2_36) - th2_36;
-  y = aa.x * x1 * x1 * x1;
-  r = x + y;
-  x2 = x - x1;
-  xx = x * x;
-  t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
-       + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2;
-  t = ((x - r) + y) + t;
-  res = r + t;
-  cor = (r - res) + t;
+  double res, cor, w[2];
+  res = TAYLOR_SLOW (x, 0, cor);
   if (res == res + 1.0007 * cor)
     return res;
   else
@@ -786,12 +720,12 @@ slow (double x)
       if (w[0] == w[0] + 1.000000001 * w[1])
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+	return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
     }
 }
 
 /*******************************************************************************/
-/* Routine compute sin(x) for   0.25<|x|< 0.855469 by  __sincostab.tbl   and Taylor */
+/* Routine compute sin(x) for 0.25<|x|< 0.855469 by __sincostab.tbl and Taylor */
 /* and if result still doesn't accurate enough by mpsin   or dubsin            */
 /*******************************************************************************/
 
@@ -802,18 +736,13 @@ slow1 (double x)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
   y = ABS (x);
   u.x = big.x + y;
   y = y - (u.x - big.x);
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];	/* Data          */
-  ssn = __sincostab.x[k + 1];	/*  from         */
-  cs = __sincostab.x[k + 2];	/*   tables      */
-  ccs = __sincostab.x[k + 3];	/*    __sincostab.tbl */
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   y1 = (y + t22) - t22;
   y2 = y - y1;
   c1 = (cs + t22) - t22;
@@ -831,7 +760,7 @@ slow1 (double x)
       if (w[0] == w[0] + 1.000000005 * w[1])
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+	return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
     }
 }
 
@@ -846,7 +775,6 @@ slow2 (double x)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res, del;
   static const double t22 = 6291456.0;
-  int4 k;
   y = ABS (x);
   y = hp0.x - y;
   if (y >= 0)
@@ -864,11 +792,7 @@ slow2 (double x)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = y * del + xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   y1 = (y + t22) - t22;
   y2 = (y - y1) + del;
   e1 = (sn + t22) - t22;
@@ -889,7 +813,7 @@ slow2 (double x)
       if (w[0] == w[0] + 1.000000005 * w[1])
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (x > 0) ? __mpsin (x, 0) : -__mpsin (-x, 0);
+	return (x > 0) ? __mpsin (x, 0, false) : -__mpsin (-x, 0, false);
     }
 }
 
@@ -905,24 +829,14 @@ static double
 SECTION
 sloww (double x, double dx, double orig)
 {
-  static const double th2_36 = 206158430208.0;	/*    1.5*2**37   */
-  double y, x1, x2, xx, r, t, res, cor, w[2], a, da, xn;
+  double y, t, res, cor, w[2], a, da, xn;
   union
   {
     int4 i[2];
     double x;
   } v;
   int4 n;
-  x1 = (x + th2_36) - th2_36;
-  y = aa.x * x1 * x1 * x1;
-  r = x + y;
-  x2 = (x - x1) + dx;
-  xx = x * x;
-  t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
-       + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2 + dx;
-  t = ((x - r) + y) + t;
-  res = r + t;
-  cor = (r - res) + t;
+  res = TAYLOR_SLOW (x, dx, cor);
   cor =
     (cor >
      0) ? 1.0005 * cor + ABS (orig) * 3.1e-30 : 1.0005 * cor -
@@ -966,7 +880,7 @@ sloww (double x, double dx, double orig)
 	  if (w[0] == w[0] + cor)
 	    return (a > 0) ? w[0] : -w[0];
 	  else
-	    return __mpsin1 (orig);
+	    return __mpsin (orig, 0, true);
 	}
     }
 }
@@ -985,7 +899,6 @@ sloww1 (double x, double dx, double orig)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
 
   y = ABS (x);
   u.x = big.x + y;
@@ -994,11 +907,7 @@ sloww1 (double x, double dx, double orig)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   y1 = (y + t22) - t22;
   y2 = (y - y1) + dx;
   c1 = (cs + t22) - t22;
@@ -1028,7 +937,7 @@ sloww1 (double x, double dx, double orig)
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return __mpsin1 (orig);
+	return __mpsin (orig, 0, true);
     }
 }
 
@@ -1046,7 +955,6 @@ sloww2 (double x, double dx, double orig, int n)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
 
   y = ABS (x);
   u.x = big.x + y;
@@ -1055,11 +963,7 @@ sloww2 (double x, double dx, double orig, int n)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = y * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 
   y1 = (y + t22) - t22;
   y2 = (y - y1) + dx;
@@ -1090,7 +994,7 @@ sloww2 (double x, double dx, double orig, int n)
       if (w[0] == w[0] + cor)
 	return (n & 2) ? -w[0] : w[0];
       else
-	return __mpsin1 (orig);
+	return __mpsin (orig, 0, true);
     }
 }
 
@@ -1106,19 +1010,9 @@ static double
 SECTION
 bsloww (double x, double dx, double orig, int n)
 {
-  static const double th2_36 = 206158430208.0;	/*    1.5*2**37   */
-  double y, x1, x2, xx, r, t, res, cor, w[2];
-
-  x1 = (x + th2_36) - th2_36;
-  y = aa.x * x1 * x1 * x1;
-  r = x + y;
-  x2 = (x - x1) + dx;
-  xx = x * x;
-  t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
-       + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2 + dx;
-  t = ((x - r) + y) + t;
-  res = r + t;
-  cor = (r - res) + t;
+  double res, cor, w[2];
+
+  res = TAYLOR_SLOW (x, dx, cor);
   cor = (cor > 0) ? 1.0005 * cor + 1.1e-24 : 1.0005 * cor - 1.1e-24;
   if (res == res + cor)
     return res;
@@ -1132,7 +1026,7 @@ bsloww (double x, double dx, double orig, int n)
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+	return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
     }
 }
 
@@ -1150,7 +1044,6 @@ bsloww1 (double x, double dx, double orig, int n)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
 
   y = ABS (x);
   u.x = big.x + y;
@@ -1159,11 +1052,7 @@ bsloww1 (double x, double dx, double orig, int n)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   y1 = (y + t22) - t22;
   y2 = (y - y1) + dx;
   c1 = (cs + t22) - t22;
@@ -1188,7 +1077,7 @@ bsloww1 (double x, double dx, double orig, int n)
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return (n & 1) ? __mpcos1 (orig) : __mpsin1 (orig);
+	return (n & 1) ? __mpcos (orig, 0, true) : __mpsin (orig, 0, true);
     }
 }
 
@@ -1206,7 +1095,6 @@ bsloww2 (double x, double dx, double orig, int n)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
 
   y = ABS (x);
   u.x = big.x + y;
@@ -1215,11 +1103,7 @@ bsloww2 (double x, double dx, double orig, int n)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = y * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 
   y1 = (y + t22) - t22;
   y2 = (y - y1) + dx;
@@ -1245,7 +1129,7 @@ bsloww2 (double x, double dx, double orig, int n)
       if (w[0] == w[0] + cor)
 	return (n & 2) ? -w[0] : w[0];
       else
-	return (n & 1) ? __mpsin1 (orig) : __mpcos1 (orig);
+	return (n & 1) ? __mpsin (orig, 0, true) : __mpcos (orig, 0, true);
     }
 }
 
@@ -1261,7 +1145,6 @@ cslow2 (double x)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
 
   y = ABS (x);
   u.x = big.x + y;
@@ -1269,11 +1152,7 @@ cslow2 (double x)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   y1 = (y + t22) - t22;
   y2 = y - y1;
   e1 = (sn + t22) - t22;
@@ -1292,7 +1171,7 @@ cslow2 (double x)
       if (w[0] == w[0] + 1.000000005 * w[1])
 	return w[0];
       else
-	return __mpcos (x, 0);
+	return __mpcos (x, 0, false);
     }
 }
 
@@ -1308,8 +1187,7 @@ static double
 SECTION
 csloww (double x, double dx, double orig)
 {
-  static const double th2_36 = 206158430208.0;	/*    1.5*2**37   */
-  double y, x1, x2, xx, r, t, res, cor, w[2], a, da, xn;
+  double y, t, res, cor, w[2], a, da, xn;
   union
   {
     int4 i[2];
@@ -1317,17 +1195,8 @@ csloww (double x, double dx, double orig)
   } v;
   int4 n;
 
-  x1 = (x + th2_36) - th2_36;
-  y = aa.x * x1 * x1 * x1;
-  r = x + y;
-  x2 = (x - x1) + dx;
-  xx = x * x;
   /* Taylor series */
-  t = (((((s5.x * xx + s4.x) * xx + s3.x) * xx + s2.x) * xx + bb.x) * xx
-       + 3.0 * aa.x * x1 * x2) * x + aa.x * x2 * x2 * x2 + dx;
-  t = ((x - r) + y) + t;
-  res = r + t;
-  cor = (r - res) + t;
+  t = TAYLOR_SLOW (x, dx, cor);
 
   if (cor > 0)
     cor = 1.0005 * cor + ABS (orig) * 3.1e-30;
@@ -1375,7 +1244,7 @@ csloww (double x, double dx, double orig)
 	  if (w[0] == w[0] + cor)
 	    return (a > 0) ? w[0] : -w[0];
 	  else
-	    return __mpcos1 (orig);
+	    return __mpcos (orig, 0, true);
 	}
     }
 }
@@ -1394,7 +1263,6 @@ csloww1 (double x, double dx, double orig)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, c1, c2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
 
   y = ABS (x);
   u.x = big.x + y;
@@ -1403,11 +1271,7 @@ csloww1 (double x, double dx, double orig)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
   y1 = (y + t22) - t22;
   y2 = (y - y1) + dx;
   c1 = (cs + t22) - t22;
@@ -1435,7 +1299,7 @@ csloww1 (double x, double dx, double orig)
       if (w[0] == w[0] + cor)
 	return (x > 0) ? w[0] : -w[0];
       else
-	return __mpcos1 (orig);
+	return __mpcos (orig, 0, true);
     }
 }
 
@@ -1454,7 +1318,6 @@ csloww2 (double x, double dx, double orig, int n)
   mynumber u;
   double sn, ssn, cs, ccs, s, c, w[2], y, y1, y2, e1, e2, xx, cor, res;
   static const double t22 = 6291456.0;
-  int4 k;
 
   y = ABS (x);
   u.x = big.x + y;
@@ -1463,11 +1326,7 @@ csloww2 (double x, double dx, double orig, int n)
   xx = y * y;
   s = y * xx * (sn3 + xx * sn5);
   c = y * dx + xx * (cs2 + xx * (cs4 + xx * cs6));
-  k = u.i[LOW_HALF] << 2;
-  sn = __sincostab.x[k];
-  ssn = __sincostab.x[k + 1];
-  cs = __sincostab.x[k + 2];
-  ccs = __sincostab.x[k + 3];
+  SINCOS_TABLE_LOOKUP (u, sn, ssn, cs, ccs);
 
   y1 = (y + t22) - t22;
   y2 = (y - y1) + dx;
@@ -1496,7 +1355,7 @@ csloww2 (double x, double dx, double orig, int n)
       if (w[0] == w[0] + cor)
 	return (n) ? -w[0] : w[0];
       else
-	return __mpcos1 (orig);
+	return __mpcos (orig, 0, true);
     }
 }
 
diff --git a/libc/sysdeps/ieee754/dbl-64/s_sincos.c b/libc/sysdeps/ieee754/dbl-64/s_sincos.c
index b6d5432f4..01d1bdf97 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_sincos.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_sincos.c
@@ -32,7 +32,7 @@ __sincos (double x, double *sinx, double *cosx)
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffff;
-  if (ix>=0x7ff00000)
+  if (ix >= 0x7ff00000)
     {
       /* sin(Inf or NaN) is NaN */
       *sinx = *cosx = x - x;
diff --git a/libc/sysdeps/ieee754/dbl-64/s_tan.c b/libc/sysdeps/ieee754/dbl-64/s_tan.c
index 54f863e54..09db096d0 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_tan.c
@@ -41,6 +41,7 @@
 #include <math.h>
 #include <math_private.h>
 #include <fenv.h>
+#include <stap-probe.h>
 
 #ifndef SECTION
 # define SECTION
@@ -58,8 +59,8 @@ tan (double x)
 
   int ux, i, n;
   double a, da, a2, b, db, c, dc, c1, cc1, c2, cc2, c3, cc3, fi, ffi, gi, pz,
-    s, sy, t, t1, t2, t3, t4, t7, t8, t9, t10, w, x2, xn, xx2, y, ya, yya, z0,
-    z, zz, z2, zz2;
+	 s, sy, t, t1, t2, t3, t4, t7, t8, t9, t10, w, x2, xn, xx2, y, ya,
+         yya, z0, z, zz, z2, zz2;
 #ifndef DLA_FMS
   double t5, t6;
 #endif
@@ -97,7 +98,6 @@ tan (double x)
   /* (II) The case 1.259e-8 < abs(x) <= 0.0608 */
   if (w <= g2.d)
     {
-
       /* First stage */
       x2 = x * x;
 
@@ -149,7 +149,6 @@ tan (double x)
   /* (III) The case 0.0608 < abs(x) <= 0.787 */
   if (w <= g3.d)
     {
-
       /* First stage */
       i = ((int) (mfftnhf.d + TWO8 * w));
       z = w - xfg[i][0].d;
@@ -376,7 +375,7 @@ tan (double x)
       /* Second stage */
       ffi = xfg[i][3].d;
       EADD (z0, yya, z, zz)
-	MUL2 (z, zz, z, zz, z2, zz2, t1, t2, t3, t4, t5, t6, t7, t8);
+      MUL2 (z, zz, z, zz, z2, zz2, t1, t2, t3, t4, t5, t6, t7, t8);
       c1 = z2 * (a7.d + z2 * (a9.d + z2 * a11.d));
       ADD2 (a5.d, aa5.d, c1, 0.0, c2, cc2, t1, t2);
       MUL2 (z2, zz2, c2, cc2, c1, cc1, t1, t2, t3, t4, t5, t6, t7, t8);
@@ -838,6 +837,7 @@ tanMp (double x)
   p = 32;
   __mptan (x, &mpy, p);
   __mp_dbl (&mpy, &y, p);
+  LIBC_PROBE (slowtan, 2, &x, &y);
   return y;
 }
 
diff --git a/libc/sysdeps/ieee754/dbl-64/s_tanh.c b/libc/sysdeps/ieee754/dbl-64/s_tanh.c
index ded0d6025..23cfcdead 100644
--- a/libc/sysdeps/ieee754/dbl-64/s_tanh.c
+++ b/libc/sysdeps/ieee754/dbl-64/s_tanh.c
@@ -41,41 +41,51 @@ static char rcsid[] = "$NetBSD: s_tanh.c,v 1.7 1995/05/10 20:48:22 jtc Exp $";
 #include <math.h>
 #include <math_private.h>
 
-static const double one=1.0, two=2.0, tiny = 1.0e-300;
+static const double one = 1.0, two = 2.0, tiny = 1.0e-300;
 
-double __tanh(double x)
+double
+__tanh (double x)
 {
-	double t,z;
-	int32_t jx,ix,lx;
+  double t, z;
+  int32_t jx, ix, lx;
 
-    /* High word of |x|. */
-	EXTRACT_WORDS(jx,lx,x);
-	ix = jx&0x7fffffff;
+  /* High word of |x|. */
+  EXTRACT_WORDS (jx, lx, x);
+  ix = jx & 0x7fffffff;
 
-    /* x is INF or NaN */
-	if(ix>=0x7ff00000) {
-	    if (jx>=0) return one/x+one;    /* tanh(+-inf)=+-1 */
-	    else       return one/x-one;    /* tanh(NaN) = NaN */
-	}
+  /* x is INF or NaN */
+  if (ix >= 0x7ff00000)
+    {
+      if (jx >= 0)
+	return one / x + one;               /* tanh(+-inf)=+-1 */
+      else
+	return one / x - one;               /* tanh(NaN) = NaN */
+    }
 
-    /* |x| < 22 */
-	if (ix < 0x40360000) {		/* |x|<22 */
-	    if ((ix | lx) == 0)
-		return x;		/* x == +-0 */
-	    if (ix<0x3c800000) 		/* |x|<2**-55 */
-		return x*(one+x);    	/* tanh(small) = small */
-	    if (ix>=0x3ff00000) {	/* |x|>=1  */
-		t = __expm1(two*fabs(x));
-		z = one - two/(t+two);
-	    } else {
-	        t = __expm1(-two*fabs(x));
-	        z= -t/(t+two);
-	    }
-    /* |x| > 22, return +-1 */
-	} else {
-	    z = one - tiny;		/* raised inexact flag */
+  /* |x| < 22 */
+  if (ix < 0x40360000)                  /* |x|<22 */
+    {
+      if ((ix | lx) == 0)
+	return x;                       /* x == +-0 */
+      if (ix < 0x3c800000)              /* |x|<2**-55 */
+	return x * (one + x);           /* tanh(small) = small */
+      if (ix >= 0x3ff00000)             /* |x|>=1  */
+	{
+	  t = __expm1 (two * fabs (x));
+	  z = one - two / (t + two);
+	}
+      else
+	{
+	  t = __expm1 (-two * fabs (x));
+	  z = -t / (t + two);
 	}
-	return (jx>=0)? z: -z;
+      /* |x| > 22, return +-1 */
+    }
+  else
+    {
+      z = one - tiny;                   /* raised inexact flag */
+    }
+  return (jx >= 0) ? z : -z;
 }
 weak_alias (__tanh, tanh)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/ieee754/dbl-64/sincos32.c b/libc/sysdeps/ieee754/dbl-64/sincos32.c
index 954db66d6..f253b8ce8 100644
--- a/libc/sysdeps/ieee754/dbl-64/sincos32.c
+++ b/libc/sysdeps/ieee754/dbl-64/sincos32.c
@@ -48,312 +48,318 @@
 # define SECTION
 #endif
 
-/****************************************************************/
-/* Compute Multi-Precision sin() function for given p.  Receive */
-/* Multi  Precision number x and result stored at y             */
-/****************************************************************/
+/* Compute Multi-Precision sin() function for given p.  Receive Multi Precision
+   number x and result stored at y.  */
 static void
 SECTION
-ss32(mp_no *x, mp_no *y, int p) {
+ss32 (mp_no *x, mp_no *y, int p)
+{
   int i;
   double a;
-  mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
-  for (i=1;i<=p;i++) mpk.d[i]=0;
+  mp_no mpt1, x2, gor, sum, mpk = {1, {1.0}};
+  for (i = 1; i <= p; i++)
+    mpk.d[i] = 0;
 
-  __sqr(x,&x2,p);
-  __cpy(&oofac27,&gor,p);
-  __cpy(&gor,&sum,p);
-  for (a=27.0;a>1.0;a-=2.0) {
-    mpk.d[1]=a*(a-1.0);
-    __mul(&gor,&mpk,&mpt1,p);
-    __cpy(&mpt1,&gor,p);
-    __mul(&x2,&sum,&mpt1,p);
-    __sub(&gor,&mpt1,&sum,p);
-  }
-  __mul(x,&sum,y,p);
+  __sqr (x, &x2, p);
+  __cpy (&oofac27, &gor, p);
+  __cpy (&gor, &sum, p);
+  for (a = 27.0; a > 1.0; a -= 2.0)
+    {
+      mpk.d[1] = a * (a - 1.0);
+      __mul (&gor, &mpk, &mpt1, p);
+      __cpy (&mpt1, &gor, p);
+      __mul (&x2, &sum, &mpt1, p);
+      __sub (&gor, &mpt1, &sum, p);
+    }
+  __mul (x, &sum, y, p);
 }
 
-/**********************************************************************/
-/* Compute Multi-Precision cos() function for given p. Receive Multi  */
-/* Precision number x and result stored at y                          */
-/**********************************************************************/
+/* Compute Multi-Precision cos() function for given p. Receive Multi Precision
+   number x and result stored at y.  */
 static void
 SECTION
-cc32(mp_no *x, mp_no *y, int p) {
+cc32 (mp_no *x, mp_no *y, int p)
+{
   int i;
   double a;
-  mp_no mpt1,x2,gor,sum ,mpk={1,{1.0}};
-  for (i=1;i<=p;i++) mpk.d[i]=0;
+  mp_no mpt1, x2, gor, sum, mpk = {1, {1.0}};
+  for (i = 1; i <= p; i++)
+    mpk.d[i] = 0;
 
-  __sqr(x,&x2,p);
-  mpk.d[1]=27.0;
-  __mul(&oofac27,&mpk,&gor,p);
-  __cpy(&gor,&sum,p);
-  for (a=26.0;a>2.0;a-=2.0) {
-    mpk.d[1]=a*(a-1.0);
-    __mul(&gor,&mpk,&mpt1,p);
-    __cpy(&mpt1,&gor,p);
-    __mul(&x2,&sum,&mpt1,p);
-    __sub(&gor,&mpt1,&sum,p);
-  }
-  __mul(&x2,&sum,y,p);
+  __sqr (x, &x2, p);
+  mpk.d[1] = 27.0;
+  __mul (&oofac27, &mpk, &gor, p);
+  __cpy (&gor, &sum, p);
+  for (a = 26.0; a > 2.0; a -= 2.0)
+    {
+      mpk.d[1] = a * (a - 1.0);
+      __mul (&gor, &mpk, &mpt1, p);
+      __cpy (&mpt1, &gor, p);
+      __mul (&x2, &sum, &mpt1, p);
+      __sub (&gor, &mpt1, &sum, p);
+    }
+  __mul (&x2, &sum, y, p);
 }
 
-/***************************************************************************/
-/* c32()   computes both sin(x), cos(x) as Multi precision numbers         */
-/***************************************************************************/
+/* Compute both sin(x), cos(x) as Multi precision numbers.  */
 void
 SECTION
-__c32(mp_no *x, mp_no *y, mp_no *z, int p) {
-  mp_no u,t,t1,t2,c,s;
+__c32 (mp_no *x, mp_no *y, mp_no *z, int p)
+{
+  mp_no u, t, t1, t2, c, s;
   int i;
-  __cpy(x,&u,p);
-  u.e=u.e-1;
-  cc32(&u,&c,p);
-  ss32(&u,&s,p);
-  for (i=0;i<24;i++) {
-    __mul(&c,&s,&t,p);
-    __sub(&s,&t,&t1,p);
-    __add(&t1,&t1,&s,p);
-    __sub(&mptwo,&c,&t1,p);
-    __mul(&t1,&c,&t2,p);
-    __add(&t2,&t2,&c,p);
-  }
-  __sub(&mpone,&c,y,p);
-  __cpy(&s,z,p);
+  __cpy (x, &u, p);
+  u.e = u.e - 1;
+  cc32 (&u, &c, p);
+  ss32 (&u, &s, p);
+  for (i = 0; i < 24; i++)
+    {
+      __mul (&c, &s, &t, p);
+      __sub (&s, &t, &t1, p);
+      __add (&t1, &t1, &s, p);
+      __sub (&mptwo, &c, &t1, p);
+      __mul (&t1, &c, &t2, p);
+      __add (&t2, &t2, &c, p);
+    }
+  __sub (&mpone, &c, y, p);
+  __cpy (&s, z, p);
 }
 
-/************************************************************************/
-/*Routine receive double x and two double results of sin(x) and return  */
-/*result which is more accurate                                         */
-/*Computing sin(x) with multi precision routine c32                     */
-/************************************************************************/
+/* Receive double x and two double results of sin(x) and return result which is
+   more accurate, computing sin(x) with multi precision routine c32.  */
 double
 SECTION
-__sin32(double x, double res, double res1) {
+__sin32 (double x, double res, double res1)
+{
   int p;
-  mp_no a,b,c;
-  p=32;
-  __dbl_mp(res,&a,p);
-  __dbl_mp(0.5*(res1-res),&b,p);
-  __add(&a,&b,&c,p);
-  if (x>0.8)
-  { __sub(&hp,&c,&a,p);
-    __c32(&a,&b,&c,p);
-  }
-  else __c32(&c,&a,&b,p);     /* b=sin(0.5*(res+res1))  */
-  __dbl_mp(x,&c,p);           /* c = x                  */
-  __sub(&b,&c,&a,p);
-  /* if a>0 return min(res,res1), otherwise return max(res,res1) */
-  if (a.d[0]>0)  return (res<res1)?res:res1;
-  else  return (res>res1)?res:res1;
+  mp_no a, b, c;
+  p = 32;
+  __dbl_mp (res, &a, p);
+  __dbl_mp (0.5 * (res1 - res), &b, p);
+  __add (&a, &b, &c, p);
+  if (x > 0.8)
+    {
+      __sub (&hp, &c, &a, p);
+      __c32 (&a, &b, &c, p);
+    }
+  else
+    __c32 (&c, &a, &b, p);	/* b=sin(0.5*(res+res1))  */
+  __dbl_mp (x, &c, p);		/* c = x  */
+  __sub (&b, &c, &a, p);
+  /* if a > 0 return min (res, res1), otherwise return max (res, res1).  */
+  if (a.d[0] > 0)
+    return (res < res1) ? res : res1;
+  else
+    return (res > res1) ? res : res1;
 }
 
-/************************************************************************/
-/*Routine receive double x and two double results of cos(x) and return  */
-/*result which is more accurate                                         */
-/*Computing cos(x) with multi precision routine c32                     */
-/************************************************************************/
+/* Receive double x and two double results of cos(x) and return result which is
+   more accurate, computing cos(x) with multi precision routine c32.  */
 double
 SECTION
-__cos32(double x, double res, double res1) {
+__cos32 (double x, double res, double res1)
+{
   int p;
-  mp_no a,b,c;
-  p=32;
-  __dbl_mp(res,&a,p);
-  __dbl_mp(0.5*(res1-res),&b,p);
-  __add(&a,&b,&c,p);
-  if (x>2.4)
-  { __sub(&pi,&c,&a,p);
-    __c32(&a,&b,&c,p);
-    b.d[0]=-b.d[0];
-  }
-  else if (x>0.8)
-       { __sub(&hp,&c,&a,p);
-	 __c32(&a,&c,&b,p);
-       }
-  else __c32(&c,&b,&a,p);     /* b=cos(0.5*(res+res1))  */
-  __dbl_mp(x,&c,p);    /* c = x                  */
-  __sub(&b,&c,&a,p);
-	     /* if a>0 return max(res,res1), otherwise return min(res,res1) */
-  if (a.d[0]>0)  return (res>res1)?res:res1;
-  else  return (res<res1)?res:res1;
+  mp_no a, b, c;
+  p = 32;
+  __dbl_mp (res, &a, p);
+  __dbl_mp (0.5 * (res1 - res), &b, p);
+  __add (&a, &b, &c, p);
+  if (x > 2.4)
+    {
+      __sub (&pi, &c, &a, p);
+      __c32 (&a, &b, &c, p);
+      b.d[0] = -b.d[0];
+    }
+  else if (x > 0.8)
+    {
+      __sub (&hp, &c, &a, p);
+      __c32 (&a, &c, &b, p);
+    }
+  else
+    __c32 (&c, &b, &a, p);	/* b=cos(0.5*(res+res1))  */
+  __dbl_mp (x, &c, p);		/* c = x                  */
+  __sub (&b, &c, &a, p);
+  /* if a > 0 return max (res, res1), otherwise return min (res, res1).  */
+  if (a.d[0] > 0)
+    return (res > res1) ? res : res1;
+  else
+    return (res < res1) ? res : res1;
 }
 
-/*******************************************************************/
-/*Compute sin(x+dx) as Multi Precision number and return result as */
-/* double                                                          */
-/*******************************************************************/
+/* Compute sin() of double-length number (X + DX) as Multi Precision number and
+   return result as double.  If REDUCE_RANGE is true, X is assumed to be the
+   original input and DX is ignored.  */
 double
 SECTION
-__mpsin(double x, double dx) {
-  int p;
+__mpsin (double x, double dx, bool reduce_range)
+{
   double y;
-  mp_no a,b,c;
-  p=32;
-  __dbl_mp(x,&a,p);
-  __dbl_mp(dx,&b,p);
-  __add(&a,&b,&c,p);
-  if (x>0.8) { __sub(&hp,&c,&a,p); __c32(&a,&b,&c,p); }
-  else __c32(&c,&a,&b,p);     /* b = sin(x+dx)     */
-  __mp_dbl(&b,&y,p);
-  return y;
-}
+  mp_no a, b, c, s;
+  int n;
+  int p = 32;
 
-/*******************************************************************/
-/* Compute cos()of double-length number (x+dx) as Multi Precision  */
-/* number and return result as double                              */
-/*******************************************************************/
-double
-SECTION
-__mpcos(double x, double dx) {
-  int p;
-  double y;
-  mp_no a,b,c;
-  p=32;
-  __dbl_mp(x,&a,p);
-  __dbl_mp(dx,&b,p);
-  __add(&a,&b,&c,p);
-  if (x>0.8)
-  { __sub(&hp,&c,&b,p);
-    __c32(&b,&c,&a,p);
-  }
-  else __c32(&c,&a,&b,p);     /* a = cos(x+dx)     */
-  __mp_dbl(&a,&y,p);
-  return y;
-}
+  if (reduce_range)
+    {
+      n = __mpranred (x, &a, p);	/* n is 0, 1, 2 or 3.  */
+      __c32 (&a, &c, &s, p);
+    }
+  else
+    {
+      n = -1;
+      __dbl_mp (x, &b, p);
+      __dbl_mp (dx, &c, p);
+      __add (&b, &c, &a, p);
+      if (x > 0.8)
+        {
+          __sub (&hp, &a, &b, p);
+          __c32 (&b, &s, &c, p);
+        }
+      else
+        __c32 (&a, &c, &s, p);	/* b = sin(x+dx)  */
+    }
 
-/******************************************************************/
-/* mpranred() performs range reduction of a double number x into  */
-/* multi precision number y, such that y=x-n*pi/2, abs(y)<pi/4,   */
-/* n=0,+-1,+-2,....                                               */
-/* Return int which indicates in which quarter of circle x is     */
-/******************************************************************/
-int
-SECTION
-__mpranred(double x, mp_no *y, int p)
-{
-  number v;
-  double t,xn;
-  int i,k,n;
-  mp_no a,b,c;
+  /* Convert result based on which quarter of unit circle y is in.  */
+  switch (n)
+    {
+    case 1:
+      __mp_dbl (&c, &y, p);
+      break;
+
+    case 3:
+      __mp_dbl (&c, &y, p);
+      y = -y;
+      break;
+
+    case 2:
+      __mp_dbl (&s, &y, p);
+      y = -y;
+      break;
 
-  if (ABS(x) < 2.8e14) {
-    t = (x*hpinv.d + toint.d);
-    xn = t - toint.d;
-    v.d = t;
-    n =v.i[LOW_HALF]&3;
-    __dbl_mp(xn,&a,p);
-    __mul(&a,&hp,&b,p);
-    __dbl_mp(x,&c,p);
-    __sub(&c,&b,y,p);
-    return n;
-  }
-  else {                      /* if x is very big more precision required */
-    __dbl_mp(x,&a,p);
-    a.d[0]=1.0;
-    k = a.e-5;
-    if (k < 0) k=0;
-    b.e = -k;
-    b.d[0] = 1.0;
-    for (i=0;i<p;i++) b.d[i+1] = toverp[i+k];
-    __mul(&a,&b,&c,p);
-    t = c.d[c.e];
-    for (i=1;i<=p-c.e;i++) c.d[i]=c.d[i+c.e];
-    for (i=p+1-c.e;i<=p;i++) c.d[i]=0;
-    c.e=0;
-    if (c.d[1] >= HALFRAD)
-    { t +=1.0;
-      __sub(&c,&mpone,&b,p);
-      __mul(&b,&hp,y,p);
+    /* Quadrant not set, so the result must be sin (X + DX), which is also in
+       S.  */
+    case 0:
+    default:
+      __mp_dbl (&s, &y, p);
     }
-    else __mul(&c,&hp,y,p);
-    n = (int) t;
-    if (x < 0) { y->d[0] = - y->d[0]; n = -n; }
-    return (n&3);
-  }
+  return y;
 }
 
-/*******************************************************************/
-/* Multi-Precision sin() function subroutine, for p=32.  It is     */
-/* based on the routines mpranred() and c32().                     */
-/*******************************************************************/
+/* Compute cos() of double-length number (X + DX) as Multi Precision number and
+   return result as double.  If REDUCE_RANGE is true, X is assumed to be the
+   original input and DX is ignored.  */
 double
 SECTION
-__mpsin1(double x)
+__mpcos (double x, double dx, bool reduce_range)
 {
-  int p;
-  int n;
-  mp_no u,s,c;
   double y;
-  p=32;
-  n=__mpranred(x,&u,p);               /* n is 0, 1, 2 or 3 */
-  __c32(&u,&c,&s,p);
-  switch (n) {                      /* in which quarter of unit circle y is*/
-  case 0:
-    __mp_dbl(&s,&y,p);
-    return y;
-    break;
+  mp_no a, b, c, s;
+  int n;
+  int p = 32;
 
-  case 2:
-    __mp_dbl(&s,&y,p);
-    return -y;
-    break;
+  if (reduce_range)
+    {
+      n = __mpranred (x, &a, p);	/* n is 0, 1, 2 or 3.  */
+      __c32 (&a, &c, &s, p);
+    }
+  else
+    {
+      n = -1;
+      __dbl_mp (x, &b, p);
+      __dbl_mp (dx, &c, p);
+      __add (&b, &c, &a, p);
+      if (x > 0.8)
+        {
+          __sub (&hp, &a, &b, p);
+          __c32 (&b, &s, &c, p);
+        }
+      else
+        __c32 (&a, &c, &s, p);	/* a = cos(x+dx)     */
+    }
 
-  case 1:
-    __mp_dbl(&c,&y,p);
-    return y;
-    break;
+  /* Convert result based on which quarter of unit circle y is in.  */
+  switch (n)
+    {
+    case 1:
+      __mp_dbl (&s, &y, p);
+      y = -y;
+      break;
 
-  case 3:
-    __mp_dbl(&c,&y,p);
-    return -y;
-    break;
+    case 3:
+      __mp_dbl (&s, &y, p);
+      break;
 
-  }
-  return 0;                     /* unreachable, to make the compiler happy */
-}
+    case 2:
+      __mp_dbl (&c, &y, p);
+      y = -y;
+      break;
 
-/*****************************************************************/
-/* Multi-Precision cos() function subroutine, for p=32.  It is   */
-/* based  on the routines mpranred() and c32().                  */
-/*****************************************************************/
+    /* Quadrant not set, so the result must be cos (X + DX), which is also
+       stored in C.  */
+    case 0:
+    default:
+      __mp_dbl (&c, &y, p);
+    }
+  return y;
+}
 
-double
+/* Perform range reduction of a double number x into multi precision number y,
+   such that y = x - n * pi / 2, abs (y) < pi / 4, n = 0, +-1, +-2, ...
+   Return int which indicates in which quarter of circle x is.  */
+int
 SECTION
-__mpcos1(double x)
+__mpranred (double x, mp_no *y, int p)
 {
-  int p;
-  int n;
-  mp_no u,s,c;
-  double y;
-
-  p=32;
-  n=__mpranred(x,&u,p);              /* n is 0, 1, 2 or 3 */
-  __c32(&u,&c,&s,p);
-  switch (n) {                     /* in what quarter of unit circle y is*/
-
-  case 0:
-    __mp_dbl(&c,&y,p);
-    return y;
-    break;
-
-  case 2:
-    __mp_dbl(&c,&y,p);
-    return -y;
-    break;
-
-  case 1:
-    __mp_dbl(&s,&y,p);
-    return -y;
-    break;
-
-  case 3:
-    __mp_dbl(&s,&y,p);
-    return y;
-    break;
+  number v;
+  double t, xn;
+  int i, k, n;
+  mp_no a, b, c;
 
-  }
-  return 0;                     /* unreachable, to make the compiler happy */
+  if (ABS (x) < 2.8e14)
+    {
+      t = (x * hpinv.d + toint.d);
+      xn = t - toint.d;
+      v.d = t;
+      n = v.i[LOW_HALF] & 3;
+      __dbl_mp (xn, &a, p);
+      __mul (&a, &hp, &b, p);
+      __dbl_mp (x, &c, p);
+      __sub (&c, &b, y, p);
+      return n;
+    }
+  else
+    {
+      /* If x is very big more precision required.  */
+      __dbl_mp (x, &a, p);
+      a.d[0] = 1.0;
+      k = a.e - 5;
+      if (k < 0)
+	k = 0;
+      b.e = -k;
+      b.d[0] = 1.0;
+      for (i = 0; i < p; i++)
+	b.d[i + 1] = toverp[i + k];
+      __mul (&a, &b, &c, p);
+      t = c.d[c.e];
+      for (i = 1; i <= p - c.e; i++)
+	c.d[i] = c.d[i + c.e];
+      for (i = p + 1 - c.e; i <= p; i++)
+	c.d[i] = 0;
+      c.e = 0;
+      if (c.d[1] >= HALFRAD)
+	{
+	  t += 1.0;
+	  __sub (&c, &mpone, &b, p);
+	  __mul (&b, &hp, y, p);
+	}
+      else
+	__mul (&c, &hp, y, p);
+      n = (int) t;
+      if (x < 0)
+	{
+	  y->d[0] = -y->d[0];
+	  n = -n;
+	}
+      return (n & 3);
+    }
 }
-/******************************************************************/
diff --git a/libc/sysdeps/ieee754/dbl-64/slowexp.c b/libc/sysdeps/ieee754/dbl-64/slowexp.c
index 8f353f634..525224f44 100644
--- a/libc/sysdeps/ieee754/dbl-64/slowexp.c
+++ b/libc/sysdeps/ieee754/dbl-64/slowexp.c
@@ -29,6 +29,8 @@
 /**************************************************************************/
 #include <math_private.h>
 
+#include <stap-probe.h>
+
 #ifndef USE_LONG_DOUBLE_FOR_MP
 # include "mpa.h"
 void __mpexp (mp_no *x, mp_no *y, int p);
@@ -60,13 +62,22 @@ __slowexp (double x)
   __mp_dbl (&mpw, &w, p);
   __mp_dbl (&mpz, &z, p);
   if (w == z)
-    return w;
+    {
+      /* Track how often we get to the slow exp code plus
+	 its input/output values.  */
+      LIBC_PROBE (slowexp_p6, 2, &x, &w);
+      return w;
+    }
   else
     {
       p = 32;
       __dbl_mp (x, &mpx, p);
       __mpexp (&mpx, &mpy, p);
       __mp_dbl (&mpy, &res, p);
+
+      /* Track how often we get to the uber-slow exp code plus
+	 its input/output values.  */
+      LIBC_PROBE (slowexp_p32, 2, &x, &res);
       return res;
     }
 #else
diff --git a/libc/sysdeps/ieee754/dbl-64/slowpow.c b/libc/sysdeps/ieee754/dbl-64/slowpow.c
index a379728b1..d200c39e5 100644
--- a/libc/sysdeps/ieee754/dbl-64/slowpow.c
+++ b/libc/sysdeps/ieee754/dbl-64/slowpow.c
@@ -34,6 +34,8 @@
 #include "mpa.h"
 #include <math_private.h>
 
+#include <stap-probe.h>
+
 #ifndef SECTION
 # define SECTION
 #endif
@@ -97,7 +99,12 @@ __slowpow (double x, double y, double z)
   __sub (&mpp, &eps, &mpr1, p);
   __mp_dbl (&mpr1, &res1, p);
   if (res == res1)
-    return res;
+    {
+      /* Track how often we get to the slow pow code plus
+	 its input/output values.  */
+      LIBC_PROBE (slowpow_p10, 4, &x, &y, &z, &res);
+      return res;
+    }
 
   /* If we don't, then we repeat using a higher precision.  768 bits of
      precision ought to be enough for anybody.  */
@@ -109,5 +116,10 @@ __slowpow (double x, double y, double z)
   __mul (&mpy, &mpz, &mpw, p);
   __mpexp (&mpw, &mpp, p);
   __mp_dbl (&mpp, &res, p);
+
+  /* Track how often we get to the uber-slow pow code plus
+     its input/output values.  */
+  LIBC_PROBE (slowpow_p32, 4, &x, &y, &z, &res);
+
   return res;
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c b/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c
index c9e09a4b7..e82228a53 100644
--- a/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c
+++ b/libc/sysdeps/ieee754/ldbl-128/printf_fphex.c
@@ -24,13 +24,15 @@ do {									      \
 	 digits we use only the implicit digits for the number before	      \
 	 the decimal point.  */						      \
       unsigned long long int num0, num1;				      \
+      union ieee854_long_double u;					      \
+      u.d = fpnum.ldbl;							      \
 									      \
       assert (sizeof (long double) == 16);				      \
 									      \
-      num0 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32      \
-	     | fpnum.ldbl.ieee.mantissa1);				      \
-      num1 = (((unsigned long long int) fpnum.ldbl.ieee.mantissa2) << 32      \
-	     | fpnum.ldbl.ieee.mantissa3);				      \
+      num0 = (((unsigned long long int) u.ieee.mantissa0) << 32		      \
+	     | u.ieee.mantissa1);					      \
+      num1 = (((unsigned long long int) u.ieee.mantissa2) << 32		      \
+	     | u.ieee.mantissa3);					      \
 									      \
       zero_mantissa = (num0|num1) == 0;					      \
 									      \
@@ -75,9 +77,9 @@ do {									      \
 	  *--wnumstr = L'0';						      \
 	}								      \
 									      \
-      leading = fpnum.ldbl.ieee.exponent == 0 ? '0' : '1';		      \
+      leading = u.ieee.exponent == 0 ? '0' : '1';			      \
 									      \
-      exponent = fpnum.ldbl.ieee.exponent;				      \
+      exponent = u.ieee.exponent;					      \
 									      \
       if (exponent == 0)						      \
 	{								      \
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
index abc78a35b..8a4a5bb7b 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c
@@ -36,8 +36,12 @@ __ieee754_acoshl(long double x)
 {
 	long double t;
 	int64_t hx;
-	u_int64_t lx;
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	uint64_t lx;
+	double xhi, xlo;
+
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
 	if(hx<0x3ff0000000000000LL) {		/* x < 1 */
 	    return (x-x)/(x-x);
 	} else if(hx >=0x41b0000000000000LL) {	/* x > 2**28 */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c
index 5d2af3034..2cb288238 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_acosl.c
@@ -151,26 +151,27 @@ static const long double
 long double
 __ieee754_acosl (long double x)
 {
-  long double z, r, w, p, q, s, t, f2;
-  ieee854_long_double_shape_type u;
+  long double a, z, r, w, p, q, s, t, f2;
 
-  u.value = __builtin_fabsl (x);
-  if (u.value == 1.0L)
+  if (__glibc_unlikely (__isnanl (x)))
+    return x + x;
+  a = __builtin_fabsl (x);
+  if (a == 1.0L)
     {
       if (x > 0.0L)
 	return 0.0;		/* acos(1) = 0  */
       else
 	return (2.0 * pio2_hi) + (2.0 * pio2_lo);	/* acos(-1)= pi */
     }
-  else if (u.value > 1.0L)
+  else if (a > 1.0L)
     {
       return (x - x) / (x - x);	/* acos(|x| > 1) is NaN */
     }
-  if (u.value < 0.5L)
+  if (a < 0.5L)
     {
-      if (u.value < 6.938893903907228e-18L)	/* |x| < 2**-57 */
+      if (a < 6.938893903907228e-18L)	/* |x| < 2**-57 */
 	return pio2_hi + pio2_lo;
-      if (u.value < 0.4375L)
+      if (a < 0.4375L)
 	{
 	  /* Arcsine of x.  */
 	  z = x * x;
@@ -199,7 +200,7 @@ __ieee754_acosl (long double x)
 	  return z;
 	}
       /* .4375 <= |x| < .5 */
-      t = u.value - 0.4375L;
+      t = a - 0.4375L;
       p = ((((((((((P10 * t
 		    + P9) * t
 		   + P8) * t
@@ -230,9 +231,9 @@ __ieee754_acosl (long double x)
 	r = acosr4375 + r;
       return r;
     }
-  else if (u.value < 0.625L)
+  else if (a < 0.625L)
     {
-      t = u.value - 0.5625L;
+      t = a - 0.5625L;
       p = ((((((((((rS10 * t
 		    + rS9) * t
 		   + rS8) * t
@@ -264,7 +265,9 @@ __ieee754_acosl (long double x)
     }
   else
     {				/* |x| >= .625 */
-      z = (one - u.value) * 0.5;
+      double shi, slo;
+
+      z = (one - a) * 0.5;
       s = __ieee754_sqrtl (z);
       /* Compute an extended precision square root from
 	 the Newton iteration  s -> 0.5 * (s + z / s).
@@ -273,12 +276,11 @@ __ieee754_acosl (long double x)
 	  Express s = f1 + f2 where f1 * f1 is exactly representable.
 	  w = (z - s^2)/2s = (z - f1^2 - 2 f1 f2 - f2^2)/2s .
 	  s + w has extended precision.  */
-      u.value = s;
-      u.parts32.w2 = 0;
-      u.parts32.w3 = 0;
-      f2 = s - u.value;
-      w = z - u.value * u.value;
-      w = w - 2.0 * u.value * f2;
+      ldbl_unpack (s, &shi, &slo);
+      a = shi;
+      f2 = slo;
+      w = z - a * a;
+      w = w - 2.0 * a * f2;
       w = w - f2 * f2;
       w = w / (2.0 * s);
       /* Arcsine of s.  */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c
index b39543949..dece11875 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_asinl.c
@@ -131,19 +131,20 @@ static const long double
 long double
 __ieee754_asinl (long double x)
 {
-  long double t, w, p, q, c, r, s;
+  long double a, t, w, p, q, c, r, s;
   int flag;
-  ieee854_long_double_shape_type u;
 
+  if (__glibc_unlikely (__isnanl (x)))
+    return x + x;
   flag = 0;
-  u.value = __builtin_fabsl (x);
-  if (u.value == 1.0L)	/* |x|>= 1 */
+  a = __builtin_fabsl (x);
+  if (a == 1.0L)	/* |x|>= 1 */
     return x * pio2_hi + x * pio2_lo;	/* asin(1)=+-pi/2 with inexact */
-  else if (u.value >= 1.0L)
+  else if (a >= 1.0L)
     return (x - x) / (x - x);	/* asin(|x|>1) is NaN */
-  else if (u.value < 0.5L)
+  else if (a < 0.5L)
     {
-      if (u.value < 6.938893903907228e-18L) /* |x| < 2**-57 */
+      if (a < 6.938893903907228e-18L) /* |x| < 2**-57 */
 	{
 	  if (huge + x > one)
 	    return x;		/* return x with inexact if x!=0 */
@@ -155,9 +156,9 @@ __ieee754_asinl (long double x)
 	  flag = 1;
 	}
     }
-  else if (u.value < 0.625L)
+  else if (a < 0.625L)
     {
-      t = u.value - 0.5625;
+      t = a - 0.5625;
       p = ((((((((((rS10 * t
 		    + rS9) * t
 		   + rS8) * t
@@ -190,7 +191,7 @@ __ieee754_asinl (long double x)
   else
     {
       /* 1 > |x| >= 0.625 */
-      w = one - u.value;
+      w = one - a;
       t = w * 0.5;
     }
 
@@ -223,17 +224,14 @@ __ieee754_asinl (long double x)
     }
 
   s = __ieee754_sqrtl (t);
-  if (u.value > 0.975L)
+  if (a > 0.975L)
     {
       w = p / q;
       t = pio2_hi - (2.0 * (s + s * w) - pio2_lo);
     }
   else
     {
-      u.value = s;
-      u.parts32.w3 = 0;
-      u.parts32.w2 = 0;
-      w = u.value;
+      w = ldbl_high (s);
       c = (t - w * w) / (s + w);
       r = p / q;
       p = 2.0 * s * r - (pio2_lo - 2.0 * c);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c
index 3e0535561..b625323df 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_atan2l.c
@@ -56,11 +56,15 @@ __ieee754_atan2l(long double y, long double x)
 {
 	long double z;
 	int64_t k,m,hx,hy,ix,iy;
-	u_int64_t lx,ly;
+	uint64_t lx;
+	double xhi, xlo, yhi;
 
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
 	ix = hx&0x7fffffffffffffffLL;
-	GET_LDOUBLE_WORDS64(hy,ly,y);
+	yhi = ldbl_high (y);
+	EXTRACT_WORDS64 (hy, yhi);
 	iy = hy&0x7fffffffffffffffLL;
 	if(((ix)>0x7ff0000000000000LL)||
 	   ((iy)>0x7ff0000000000000LL))	/* x or y is NaN */
@@ -70,7 +74,7 @@ __ieee754_atan2l(long double y, long double x)
 	m = ((hy>>63)&1)|((hx>>62)&2);	/* 2*sign(x)+sign(y) */
 
     /* when y = 0 */
-	if((iy|(ly&0x7fffffffffffffffLL))==0) {
+	if(iy==0) {
 	    switch(m) {
 		case 0:
 		case 1: return y;	/* atan(+-0,+anything)=+-0 */
@@ -79,7 +83,7 @@ __ieee754_atan2l(long double y, long double x)
 	    }
 	}
     /* when x = 0 */
-	if((ix|(lx&0x7fffffffffffffff))==0) return (hy<0)?  -pi_o_2-tiny: pi_o_2+tiny;
+	if(ix==0) return (hy<0)?  -pi_o_2-tiny: pi_o_2+tiny;
 
     /* when x is INF */
 	if(ix==0x7ff0000000000000LL) {
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c
index f35182f03..29f2e9207 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_atanhl.c
@@ -40,8 +40,10 @@ __ieee754_atanhl(long double x)
 {
 	long double t;
 	int64_t hx,ix;
-	u_int64_t lx __attribute__ ((unused));
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	double xhi;
+
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (hx, xhi);
 	ix = hx&0x7fffffffffffffffLL;
 	if (ix >= 0x3ff0000000000000LL) { /* |x|>=1 */
 	    if (ix > 0x3ff0000000000000LL)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c
index 3e8e1875c..05683bc02 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_coshl.c
@@ -41,9 +41,11 @@ __ieee754_coshl (long double x)
 {
 	long double t,w;
 	int64_t ix;
+	double xhi;
 
     /* High word of |x|. */
-	GET_LDOUBLE_MSW64(ix,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (ix, xhi);
 	ix &= 0x7fffffffffffffffLL;
 
     /* x is INF or NaN */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c
index 1eaf2fe39..49121ca31 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_exp10l.c
@@ -36,9 +36,9 @@ __ieee754_exp10l (long double arg)
   else if (arg > LDBL_MAX_10_EXP + 1)
     return LDBL_MAX * LDBL_MAX;
 
-  u.d = arg;
-  arg_high = u.dd[0];
-  arg_low = u.dd[1];
+  u.ld = arg;
+  arg_high = u.d[0].d;
+  arg_low = u.d[1].d;
   exp_high = arg_high * log10_high;
   exp_low = arg_high * log10_low + arg_low * M_LN10l;
   return __ieee754_expl (exp_high) * __ieee754_expl (exp_low);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c
index 1b994cd7a..f7c50bfd3 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_expl.c
@@ -162,39 +162,39 @@ __ieee754_expl (long double x)
       x = x + xl;
 
       /* Compute ex2 = 2^n_0 e^(argtable[tval1]) e^(argtable[tval2]).  */
-      ex2_u.d = __expl_table[T_EXPL_RES1 + tval1]
-		* __expl_table[T_EXPL_RES2 + tval2];
+      ex2_u.ld = (__expl_table[T_EXPL_RES1 + tval1]
+		  * __expl_table[T_EXPL_RES2 + tval2]);
       n_i = (int)n;
       /* 'unsafe' is 1 iff n_1 != 0.  */
       unsafe = fabsl(n_i) >= -LDBL_MIN_EXP - 1;
-      ex2_u.ieee.exponent += n_i >> unsafe;
+      ex2_u.d[0].ieee.exponent += n_i >> unsafe;
       /* Fortunately, there are no subnormal lowpart doubles in
 	 __expl_table, only normal values and zeros.
 	 But after scaling it can be subnormal.  */
-      exponent2 = ex2_u.ieee.exponent2 + (n_i >> unsafe);
-      if (ex2_u.ieee.exponent2 == 0)
-	/* assert ((ex2_u.ieee.mantissa2|ex2_u.ieee.mantissa3) == 0) */;
+      exponent2 = ex2_u.d[1].ieee.exponent + (n_i >> unsafe);
+      if (ex2_u.d[1].ieee.exponent == 0)
+	/* assert ((ex2_u.d[1].ieee.mantissa0|ex2_u.d[1].ieee.mantissa1) == 0) */;
       else if (exponent2 > 0)
-	ex2_u.ieee.exponent2 = exponent2;
+	ex2_u.d[1].ieee.exponent = exponent2;
       else if (exponent2 <= -54)
 	{
-	  ex2_u.ieee.exponent2 = 0;
-	  ex2_u.ieee.mantissa2 = 0;
-	  ex2_u.ieee.mantissa3 = 0;
+	  ex2_u.d[1].ieee.exponent = 0;
+	  ex2_u.d[1].ieee.mantissa0 = 0;
+	  ex2_u.d[1].ieee.mantissa1 = 0;
 	}
       else
 	{
 	  static const double
 	    two54 = 1.80143985094819840000e+16, /* 4350000000000000 */
 	    twom54 = 5.55111512312578270212e-17; /* 3C90000000000000 */
-	  ex2_u.dd[1] *= two54;
-	  ex2_u.ieee.exponent2 += n_i >> unsafe;
-	  ex2_u.dd[1] *= twom54;
+	  ex2_u.d[1].d *= two54;
+	  ex2_u.d[1].ieee.exponent += n_i >> unsafe;
+	  ex2_u.d[1].d *= twom54;
 	}
 
       /* Compute scale = 2^n_1.  */
-      scale_u.d = 1.0L;
-      scale_u.ieee.exponent += n_i - (n_i >> unsafe);
+      scale_u.ld = 1.0L;
+      scale_u.d[0].ieee.exponent += n_i - (n_i >> unsafe);
 
       /* Approximate e^x2 - 1, using a seventh-degree polynomial,
 	 with maximum error in [-2^-16-2^-53,2^-16+2^-53]
@@ -204,7 +204,7 @@ __ieee754_expl (long double x)
       /* Return result.  */
       fesetenv (&oldenv);
 
-      result = x22 * ex2_u.d + ex2_u.d;
+      result = x22 * ex2_u.ld + ex2_u.ld;
 
       /* Now we can test whether the result is ultimate or if we are unsure.
 	 In the later case we should probably call a mpn based routine to give
@@ -238,7 +238,7 @@ __ieee754_expl (long double x)
       if (!unsafe)
 	return result;
       else
-	return result * scale_u.d;
+	return result * scale_u.ld;
     }
   /* Exceptional cases:  */
   else if (isless (x, himark))
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c
index a60963c84..a140fb322 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_fmodl.c
@@ -27,76 +27,83 @@ static const long double one = 1.0, Zero[] = {0.0, -0.0,};
 long double
 __ieee754_fmodl (long double x, long double y)
 {
-	int64_t n,hx,hy,hz,ix,iy,sx, i;
-	u_int64_t lx,ly,lz;
-	int temp;
+	int64_t hx, hy, hz, sx, sy;
+	uint64_t lx, ly, lz;
+	int n, ix, iy;
+	double xhi, xlo, yhi, ylo;
 
-	GET_LDOUBLE_WORDS64(hx,lx,x);
-	GET_LDOUBLE_WORDS64(hy,ly,y);
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
+	ldbl_unpack (y, &yhi, &ylo);
+	EXTRACT_WORDS64 (hy, yhi);
+	EXTRACT_WORDS64 (ly, ylo);
 	sx = hx&0x8000000000000000ULL;		/* sign of x */
-	hx ^=sx;				/* |x| */
-	hy &= 0x7fffffffffffffffLL;		/* |y| */
+	hx ^= sx;				/* |x| */
+	sy = hy&0x8000000000000000ULL;		/* sign of y */
+	hy ^= sy;				/* |y| */
 
     /* purge off exception values */
-	if(__builtin_expect((hy|(ly&0x7fffffffffffffff))==0 ||
+	if(__builtin_expect(hy==0 ||
 			    (hx>=0x7ff0000000000000LL)|| /* y=0,or x not finite */
 			    (hy>0x7ff0000000000000LL),0))	/* or y is NaN */
 	    return (x*y)/(x*y);
-	if(__builtin_expect(hx<=hy,0)) {
-	    if((hx<hy)||(lx<ly)) return x;	/* |x|<|y| return x */
-	    if(lx==ly)
-		return Zero[(u_int64_t)sx>>63];	/* |x|=|y| return x*0*/
+	if (__builtin_expect (hx <= hy, 0))
+	  {
+	    /* If |x| < |y| return x.  */
+	    if (hx < hy)
+	      return x;
+	    /* At this point the absolute value of the high doubles of
+	       x and y must be equal.  */
+	    /* If the low double of y is the same sign as the high
+	       double of y (ie. the low double increases |y|)...  */
+	    if (((ly ^ sy) & 0x8000000000000000LL) == 0
+		/* ... then a different sign low double to high double
+		   for x or same sign but lower magnitude...  */
+		&& (int64_t) (lx ^ sx) < (int64_t) (ly ^ sy))
+	      /* ... means |x| < |y|.  */
+	      return x;
+	    /* If the low double of x differs in sign to the high
+	       double of x (ie. the low double decreases |x|)...  */
+	    if (((lx ^ sx) & 0x8000000000000000LL) != 0
+		/* ... then a different sign low double to high double
+		   for y with lower magnitude (we've already caught
+		   the same sign for y case above)...  */
+		&& (int64_t) (lx ^ sx) > (int64_t) (ly ^ sy))
+	      /* ... means |x| < |y|.  */
+	      return x;
+	    /* If |x| == |y| return x*0.  */
+	    if ((lx ^ sx) == (ly ^ sy))
+	      return Zero[(uint64_t) sx >> 63];
 	}
 
-    /* determine ix = ilogb(x) */
-	if(__builtin_expect(hx<0x0010000000000000LL,0)) {	/* subnormal x */
-	    if(hx==0) {
-		for (ix = -1043, i=lx; i>0; i<<=1) ix -=1;
-	    } else {
-		for (ix = -1022, i=(hx<<11); i>0; i<<=1) ix -=1;
-	    }
-	} else ix = (hx>>52)-0x3ff;
-
-    /* determine iy = ilogb(y) */
-	if(__builtin_expect(hy<0x0010000000000000LL,0)) {	/* subnormal y */
-	    if(hy==0) {
-		for (iy = -1043, i=ly; i>0; i<<=1) iy -=1;
-	    } else {
-		for (iy = -1022, i=(hy<<11); i>0; i<<=1) iy -=1;
-	    }
-	} else iy = (hy>>52)-0x3ff;
-
     /* Make the IBM extended format 105 bit mantissa look like the ieee854 112
        bit mantissa so the following operations will give the correct
        result.  */
-	ldbl_extract_mantissa(&hx, &lx, &temp, x);
-	ldbl_extract_mantissa(&hy, &ly, &temp, y);
+	ldbl_extract_mantissa(&hx, &lx, &ix, x);
+	ldbl_extract_mantissa(&hy, &ly, &iy, y);
 
-    /* set up {hx,lx}, {hy,ly} and align y to x */
-	if(__builtin_expect(ix >= -1022, 1))
-	    hx = 0x0001000000000000LL|(0x0000ffffffffffffLL&hx);
-	else {		/* subnormal x, shift x to normal */
-	    n = -1022-ix;
-	    if(n<=63) {
-		hx = (hx<<n)|(lx>>(64-n));
-		lx <<= n;
-	    } else {
-		hx = lx<<(n-64);
-		lx = 0;
-	    }
-	}
-	if(__builtin_expect(iy >= -1022, 1))
-	    hy = 0x0001000000000000LL|(0x0000ffffffffffffLL&hy);
-	else {		/* subnormal y, shift y to normal */
-	    n = -1022-iy;
-	    if(n<=63) {
-		hy = (hy<<n)|(ly>>(64-n));
-		ly <<= n;
-	    } else {
-		hy = ly<<(n-64);
-		ly = 0;
-	    }
-	}
+	if (__builtin_expect (ix == -IEEE754_DOUBLE_BIAS, 0))
+	  {
+	    /* subnormal x, shift x to normal.  */
+	    while ((hx & (1LL << 48)) == 0)
+	      {
+		hx = (hx << 1) | (lx >> 63);
+		lx = lx << 1;
+		ix -= 1;
+	      }
+	  }
+
+	if (__builtin_expect (iy == -IEEE754_DOUBLE_BIAS, 0))
+	  {
+	    /* subnormal y, shift y to normal.  */
+	    while ((hy & (1LL << 48)) == 0)
+	      {
+		hy = (hy << 1) | (ly >> 63);
+		ly = ly << 1;
+		iy -= 1;
+	      }
+	  }
 
     /* fix point fmod */
 	n = ix - iy;
@@ -104,7 +111,7 @@ __ieee754_fmodl (long double x, long double y)
 	    hz=hx-hy;lz=lx-ly; if(lx<ly) hz -= 1;
 	    if(hz<0){hx = hx+hx+(lx>>63); lx = lx+lx;}
 	    else {
-		if((hz|(lz&0x7fffffffffffffff))==0)		/* return sign(x)*0 */
+		if((hz|lz)==0)		/* return sign(x)*0 */
 		    return Zero[(u_int64_t)sx>>63];
 		hx = hz+hz+(lz>>63); lx = lz+lz;
 	    }
@@ -113,7 +120,7 @@ __ieee754_fmodl (long double x, long double y)
 	if(hz>=0) {hx=hz;lx=lz;}
 
     /* convert back to floating value and restore the sign */
-	if((hx|(lx&0x7fffffffffffffff))==0)			/* return sign(x)*0 */
+	if((hx|lx)==0)			/* return sign(x)*0 */
 	    return Zero[(u_int64_t)sx>>63];
 	while(hx<0x0001000000000000LL) {	/* normalize x */
 	    hx = hx+hx+(lx>>63); lx = lx+lx;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c
index 90d8e3f0d..84c13de9b 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_gammal_r.c
@@ -122,11 +122,12 @@ long double
 __ieee754_gammal_r (long double x, int *signgamp)
 {
   int64_t hx;
-  u_int64_t lx;
+  double xhi;
 
-  GET_LDOUBLE_WORDS64 (hx, lx, x);
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
 
-  if (((hx | lx) & 0x7fffffffffffffffLL) == 0)
+  if ((hx & 0x7fffffffffffffffLL) == 0)
     {
       /* Return value for x == 0 is Inf with divide by zero exception.  */
       *signgamp = 0;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c
index 768bd3b06..3b07a47b4 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_hypotl.c
@@ -45,76 +45,84 @@
 #include <math.h>
 #include <math_private.h>
 
-static const long double two600 = 0x1.0p+600L;
-static const long double two1022 = 0x1.0p+1022L;
-
 long double
 __ieee754_hypotl(long double x, long double y)
 {
-	long double a,b,t1,t2,y1,y2,w,kld;
+	long double a,b,a1,a2,b1,b2,w,kld;
 	int64_t j,k,ha,hb;
+	double xhi, yhi, hi, lo;
 
-	GET_LDOUBLE_MSW64(ha,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (ha, xhi);
+	yhi = ldbl_high (y);
+	EXTRACT_WORDS64 (hb, yhi);
 	ha &= 0x7fffffffffffffffLL;
-	GET_LDOUBLE_MSW64(hb,y);
 	hb &= 0x7fffffffffffffffLL;
 	if(hb > ha) {a=y;b=x;j=ha; ha=hb;hb=j;} else {a=x;b=y;}
 	a = fabsl(a);	/* a <- |a| */
 	b = fabsl(b);	/* b <- |b| */
-	if((ha-hb)>0x780000000000000LL) {return a+b;} /* x/y > 2**120 */
+	if((ha-hb)>0x0780000000000000LL) {return a+b;} /* x/y > 2**120 */
 	k=0;
 	kld = 1.0L;
 	if(ha > 0x5f30000000000000LL) {	/* a>2**500 */
 	   if(ha >= 0x7ff0000000000000LL) {	/* Inf or NaN */
-	       u_int64_t low;
 	       w = a+b;			/* for sNaN */
-	       GET_LDOUBLE_LSW64(low,a);
-	       if(((ha&0xfffffffffffffLL)|(low&0x7fffffffffffffffLL))==0)
+	       if(ha == 0x7ff0000000000000LL)
 		 w = a;
-	       GET_LDOUBLE_LSW64(low,b);
-	       if(((hb^0x7ff0000000000000LL)|(low&0x7fffffffffffffffLL))==0)
+	       if(hb == 0x7ff0000000000000LL)
 		 w = b;
 	       return w;
 	   }
 	   /* scale a and b by 2**-600 */
-	   ha -= 0x2580000000000000LL; hb -= 0x2580000000000000LL; k += 600;
-	   a /= two600;
-	   b /= two600;
-	   k += 600;
-	   kld = two600;
+	   a *= 0x1p-600L;
+	   b *= 0x1p-600L;
+	   k = 600;
+	   kld = 0x1p+600L;
 	}
-	if(hb < 0x23d0000000000000LL) {	/* b < 2**-450 */
+	else if(hb < 0x23d0000000000000LL) {	/* b < 2**-450 */
 	    if(hb <= 0x000fffffffffffffLL) {	/* subnormal b or 0 */
-		u_int64_t low;
-		GET_LDOUBLE_LSW64(low,b);
-		if((hb|(low&0x7fffffffffffffffLL))==0) return a;
-		t1=two1022;	/* t1=2^1022 */
-		b *= t1;
-		a *= t1;
-		k -= 1022;
-		kld = kld / two1022;
+		if(hb==0) return a;
+		a *= 0x1p+1022L;
+		b *= 0x1p+1022L;
+		k = -1022;
+		kld = 0x1p-1022L;
 	    } else {		/* scale a and b by 2^600 */
-		ha += 0x2580000000000000LL;	/* a *= 2^600 */
-		hb += 0x2580000000000000LL;	/* b *= 2^600 */
-		k -= 600;
-		a *= two600;
-		b *= two600;
-		kld = kld / two600;
+		a *= 0x1p+600L;
+		b *= 0x1p+600L;
+		k = -600;
+		kld = 0x1p-600L;
 	    }
 	}
     /* medium size a and b */
 	w = a-b;
 	if (w>b) {
-	    SET_LDOUBLE_WORDS64(t1,ha,0);
-	    t2 = a-t1;
-	    w  = __ieee754_sqrtl(t1*t1-(b*(-b)-t2*(a+t1)));
+	    ldbl_unpack (a, &hi, &lo);
+	    a1 = hi;
+	    a2 = lo;
+	    /* a*a + b*b
+	       = (a1+a2)*a + b*b
+	       = a1*a + a2*a + b*b
+	       = a1*(a1+a2) + a2*a + b*b
+	       = a1*a1 + a1*a2 + a2*a + b*b
+	       = a1*a1 + a2*(a+a1) + b*b  */
+	    w  = __ieee754_sqrtl(a1*a1-(b*(-b)-a2*(a+a1)));
 	} else {
 	    a  = a+a;
-	    SET_LDOUBLE_WORDS64(y1,hb,0);
-	    y2 = b - y1;
-	    SET_LDOUBLE_WORDS64(t1,ha+0x0010000000000000LL,0);
-	    t2 = a - t1;
-	    w  = __ieee754_sqrtl(t1*y1-(w*(-w)-(t1*y2+t2*b)));
+	    ldbl_unpack (b, &hi, &lo);
+	    b1 = hi;
+	    b2 = lo;
+	    ldbl_unpack (a, &hi, &lo);
+	    a1 = hi;
+	    a2 = lo;
+	    /* a*a + b*b
+	       = a*a + (a-b)*(a-b) - (a-b)*(a-b) + b*b
+	       = a*a + w*w  - (a*a - 2*a*b + b*b) + b*b
+	       = w*w + 2*a*b
+	       = w*w + (a1+a2)*b
+	       = w*w + a1*b + a2*b
+	       = w*w + a1*(b1+b2) + a2*b
+	       = w*w + a1*b1 + a1*b2 + a2*b  */
+	    w  = __ieee754_sqrtl(a1*b1-(w*(-w)-(a1*b2+a2*b)));
 	}
 	if(k!=0)
 	    return w*kld;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c
index 55f87ed42..aeace7c97 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_ilogbl.c
@@ -31,26 +31,24 @@ static char rcsid[] = "$NetBSD: $";
 
 int __ieee754_ilogbl(long double x)
 {
-	int64_t hx,lx;
+	int64_t hx;
 	int ix;
+	double xhi;
 
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (hx, xhi);
 	hx &= 0x7fffffffffffffffLL;
 	if(hx <= 0x0010000000000000LL) {
-	    if((hx|(lx&0x7fffffffffffffffLL))==0)
+	    if(hx==0)
 		return FP_ILOGB0;	/* ilogbl(0) = FP_ILOGB0 */
 	    else			/* subnormal x */
-		if(hx==0) {
-		    for (ix = -1043; lx>0; lx<<=1) ix -=1;
-		} else {
-		    for (ix = -1022, hx<<=11; hx>0; hx<<=1) ix -=1;
-		}
+		for (ix = -1022, hx<<=11; hx>0; hx<<=1) ix -=1;
 	    return ix;
 	}
 	else if (hx<0x7ff0000000000000LL) return (hx>>52)-0x3ff;
 	else if (FP_ILOGBNAN != INT_MAX) {
 	    /* ISO C99 requires ilogbl(+-Inf) == INT_MAX.  */
-	    if (((hx^0x7ff0000000000000LL)|lx) == 0)
+	    if (hx==0x7ff0000000000000LL)
 		return INT_MAX;
 	}
 	return FP_ILOGBNAN;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c
index 40012e41e..817977da5 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_jnl.c
@@ -70,26 +70,25 @@ static const long double
 long double
 __ieee754_jnl (int n, long double x)
 {
-  u_int32_t se;
+  uint32_t se, lx;
   int32_t i, ix, sgn;
   long double a, b, temp, di;
   long double z, w;
-  ieee854_long_double_shape_type u;
+  double xhi;
 
 
   /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x)
    * Thus, J(-n,x) = J(n,-x)
    */
 
-  u.value = x;
-  se = u.parts32.w0;
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS (se, lx, xhi);
   ix = se & 0x7fffffff;
 
   /* if J(n,NaN) is NaN */
   if (ix >= 0x7ff00000)
     {
-      if ((u.parts32.w0 & 0xfffff) | u.parts32.w1
-	  | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3)
+      if (((ix - 0x7ff00000) | lx) != 0)
 	return x + x;
     }
 
@@ -298,21 +297,20 @@ strong_alias (__ieee754_jnl, __jnl_finite)
 long double
 __ieee754_ynl (int n, long double x)
 {
-  u_int32_t se;
+  uint32_t se, lx;
   int32_t i, ix;
   int32_t sign;
   long double a, b, temp;
-  ieee854_long_double_shape_type u;
+  double xhi;
 
-  u.value = x;
-  se = u.parts32.w0;
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS (se, lx, xhi);
   ix = se & 0x7fffffff;
 
   /* if Y(n,NaN) is NaN */
   if (ix >= 0x7ff00000)
     {
-      if ((u.parts32.w0 & 0xfffff) | u.parts32.w1
-	  | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3)
+      if (((ix - 0x7ff00000) | lx) != 0)
 	return x + x;
     }
   if (x <= 0.0L)
@@ -377,14 +375,16 @@ __ieee754_ynl (int n, long double x)
       a = __ieee754_y0l (x);
       b = __ieee754_y1l (x);
       /* quit if b is -inf */
-      u.value = b;
-      se = u.parts32.w0 & 0xfff00000;
+      xhi = ldbl_high (b);
+      GET_HIGH_WORD (se, xhi);
+      se &= 0xfff00000;
       for (i = 1; i < n && se != 0xfff00000; i++)
 	{
 	  temp = b;
 	  b = ((long double) (i + i) / x) * b - a;
-	  u.value = b;
-	  se = u.parts32.w0 & 0xfff00000;
+	  xhi = ldbl_high (b);
+	  GET_HIGH_WORD (se, xhi);
+	  se &= 0xfff00000;
 	  a = temp;
 	}
     }
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c
index fae774cea..1a6a4a0fa 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_log10l.c
@@ -182,11 +182,13 @@ __ieee754_log10l (long double x)
   long double z;
   long double y;
   int e;
-  int64_t hx, lx;
+  int64_t hx;
+  double xhi;
 
 /* Test for domain */
-  GET_LDOUBLE_WORDS64 (hx, lx, x);
-  if (((hx & 0x7fffffffffffffffLL) | (lx & 0x7fffffffffffffffLL)) == 0)
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
+  if ((hx & 0x7fffffffffffffffLL) == 0)
     return (-1.0L / (x - x));
   if (hx < 0)
     return (x - x) / (x - x);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c
index f0098f6c7..323ded0c0 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_log2l.c
@@ -177,11 +177,13 @@ __ieee754_log2l (x)
   long double z;
   long double y;
   int e;
-  int64_t hx, lx;
+  int64_t hx;
+  double xhi;
 
 /* Test for domain */
-  GET_LDOUBLE_WORDS64 (hx, lx, x);
-  if (((hx & 0x7fffffffffffffffLL) | (lx & 0x7fffffffffffffffLL)) == 0)
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
+  if ((hx & 0x7fffffffffffffffLL) == 0)
     return (-1.0L / (x - x));
   if (hx < 0)
     return (x - x) / (x - x);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c
index 15b5edfab..b7db2b978 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_logl.c
@@ -188,18 +188,20 @@ static const long double
 long double
 __ieee754_logl(long double x)
 {
-  long double z, y, w;
-  ieee854_long_double_shape_type u, t;
+  long double z, y, w, t;
   unsigned int m;
   int k, e;
+  double xhi;
+  uint32_t hx, lx;
 
-  u.value = x;
-  m = u.parts32.w0;
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS (hx, lx, xhi);
+  m = hx;
 
   /* Check for IEEE special cases.  */
   k = m & 0x7fffffff;
   /* log(0) = -infinity. */
-  if ((k | u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) == 0)
+  if ((k | lx) == 0)
     {
       return -0.5L / ZERO;
     }
@@ -219,7 +221,7 @@ __ieee754_logl(long double x)
     {
       z = x - 1.0L;
       k = 64;
-      t.value  = 1.0L;
+      t = 1.0L;
       e = 0;
     }
   else
@@ -236,10 +238,8 @@ __ieee754_logl(long double x)
 	  k = (m - 0xff000) >> 13;
 	  /* t is the argument 0.5 + (k+26)/128
 	     of the nearest item to u in the lookup table.  */
-	  t.parts32.w0 = 0x3ff00000 + (k << 13);
-	  t.parts32.w1 = 0;
-	  t.parts32.w2 = 0;
-	  t.parts32.w3 = 0;
+	  INSERT_WORDS (xhi, 0x3ff00000 + (k << 13), 0);
+	  t = xhi;
 	  w0 += 0x100000;
 	  e -= 1;
 	  k += 64;
@@ -247,17 +247,15 @@ __ieee754_logl(long double x)
       else
 	{
 	  k = (m - 0xfe000) >> 14;
-	  t.parts32.w0 = 0x3fe00000 + (k << 14);
-	  t.parts32.w1 = 0;
-	  t.parts32.w2 = 0;
-	  t.parts32.w3 = 0;
+	  INSERT_WORDS (xhi, 0x3fe00000 + (k << 14), 0);
+	  t = xhi;
 	}
-      u.value = __scalbnl (u.value, ((int) ((w0 - u.parts32.w0) * 2)) >> 21);
+      x = __scalbnl (x, ((int) ((w0 - hx) * 2)) >> 21);
       /* log(u) = log( t u/t ) = log(t) + log(u/t)
 	 log(t) is tabulated in the lookup table.
 	 Express log(u/t) = log(1+z),  where z = u/t - 1 = (u-t)/t.
 	 cf. Cody & Waite. */
-      z = (u.value - t.value) / t.value;
+      z = (x - t) / t;
     }
   /* Series expansion of log(1+z).  */
   w = z * z;
@@ -284,7 +282,7 @@ __ieee754_logl(long double x)
   y += e * ln2b;  /* Base 2 exponent offset times ln(2).  */
   y += z;
   y += logtbl[k-26]; /* log(t) - (t-1) */
-  y += (t.value - 1.0L);
+  y += (t - 1.0L);
   y += e * ln2a;
   return y;
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c
index 8bd35d0c8..c942f2f24 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_powl.c
@@ -151,37 +151,32 @@ __ieee754_powl (long double x, long double y)
   long double y1, t1, t2, r, s, t, u, v, w;
   long double s2, s_h, s_l, t_h, t_l, ay;
   int32_t i, j, k, yisint, n;
-  u_int32_t ix, iy;
-  int32_t hx, hy;
-  ieee854_long_double_shape_type o, p, q;
+  uint32_t ix, iy;
+  int32_t hx, hy, hax;
+  double ohi, xhi, xlo, yhi, ylo;
+  uint32_t lx, ly, lj;
 
-  p.value = x;
-  hx = p.parts32.w0;
+  ldbl_unpack (x, &xhi, &xlo);
+  EXTRACT_WORDS (hx, lx, xhi);
   ix = hx & 0x7fffffff;
 
-  q.value = y;
-  hy = q.parts32.w0;
+  ldbl_unpack (y, &yhi, &ylo);
+  EXTRACT_WORDS (hy, ly, yhi);
   iy = hy & 0x7fffffff;
 
-
   /* y==zero: x**0 = 1 */
-  if ((iy | q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
+  if ((iy | ly) == 0)
     return one;
 
   /* 1.0**y = 1; -1.0**+-Inf = 1 */
   if (x == one)
     return one;
-  if (x == -1.0L && iy == 0x7ff00000
-      && (q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
+  if (x == -1.0L && ((iy - 0x7ff00000) | ly) == 0)
     return one;
 
   /* +-NaN return x+y */
-  if ((ix > 0x7ff00000)
-      || ((ix == 0x7ff00000)
-	  && ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) != 0))
-      || (iy > 0x7ff00000)
-      || ((iy == 0x7ff00000)
-	  && ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) != 0)))
+  if ((ix >= 0x7ff00000 && ((ix - 0x7ff00000) | lx) != 0)
+      || (iy >= 0x7ff00000 && ((iy - 0x7ff00000) | ly) != 0))
     return x + y;
 
   /* determine if y is an odd int when x < 0
@@ -192,7 +187,10 @@ __ieee754_powl (long double x, long double y)
   yisint = 0;
   if (hx < 0)
     {
-      if ((q.parts32.w2 & 0x7fffffff) >= 0x43400000)	/* Low part >= 2^53 */
+      uint32_t low_ye;
+
+      GET_HIGH_WORD (low_ye, ylo);
+      if ((low_ye & 0x7fffffff) >= 0x43400000)	/* Low part >= 2^53 */
 	yisint = 2;		/* even integer y */
       else if (iy >= 0x3ff00000)	/* 1.0 */
 	{
@@ -207,42 +205,43 @@ __ieee754_powl (long double x, long double y)
 	}
     }
 
+  ax = fabsl (x);
+
   /* special value of y */
-  if ((q.parts32.w1 | (q.parts32.w2 & 0x7fffffff) | q.parts32.w3) == 0)
+  if (ly == 0)
     {
-      if (iy == 0x7ff00000 && q.parts32.w1 == 0)	/* y is +-inf */
+      if (iy == 0x7ff00000)	/* y is +-inf */
 	{
-	  if (((ix - 0x3ff00000) | p.parts32.w1
-	       | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
-	    return y - y;	/* inf**+-1 is NaN */
-	  else if (ix > 0x3ff00000 || fabsl (x) > 1.0L)
+	  if (ax > one)
 	    /* (|x|>1)**+-inf = inf,0 */
 	    return (hy >= 0) ? y : zero;
 	  else
 	    /* (|x|<1)**-,+inf = inf,0 */
 	    return (hy < 0) ? -y : zero;
 	}
-      if (iy == 0x3ff00000)
-	{			/* y is  +-1 */
-	  if (hy < 0)
-	    return one / x;
-	  else
-	    return x;
-	}
-      if (hy == 0x40000000)
-	return x * x;		/* y is  2 */
-      if (hy == 0x3fe00000)
-	{			/* y is  0.5 */
-	  if (hx >= 0)		/* x >= +0 */
-	    return __ieee754_sqrtl (x);
+      if (ylo == 0.0)
+	{
+	  if (iy == 0x3ff00000)
+	    {			/* y is  +-1 */
+	      if (hy < 0)
+		return one / x;
+	      else
+		return x;
+	    }
+	  if (hy == 0x40000000)
+	    return x * x;		/* y is  2 */
+	  if (hy == 0x3fe00000)
+	    {			/* y is  0.5 */
+	      if (hx >= 0)		/* x >= +0 */
+		return __ieee754_sqrtl (x);
+	    }
 	}
     }
 
-  ax = fabsl (x);
   /* special value of x */
-  if ((p.parts32.w1 | (p.parts32.w2 & 0x7fffffff) | p.parts32.w3) == 0)
+  if (lx == 0)
     {
-      if (ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000)
+      if (ix == 0x7ff00000 || ix == 0 || (ix == 0x3ff00000 && xlo == 0.0))
 	{
 	  z = ax;		/*x is +-0,+-inf,+-1 */
 	  if (hy < 0)
@@ -294,8 +293,8 @@ __ieee754_powl (long double x, long double y)
     {
       ax *= two113;
       n -= 113;
-      o.value = ax;
-      ix = o.parts32.w0;
+      ohi = ldbl_high (ax);
+      GET_HIGH_WORD (ix, ohi);
     }
   n += ((ix) >> 20) - 0x3ff;
   j = ix & 0x000fffff;
@@ -312,26 +311,19 @@ __ieee754_powl (long double x, long double y)
       ix -= 0x00100000;
     }
 
-  o.value = ax;
-  o.value = __scalbnl (o.value, ((int) ((ix - o.parts32.w0) * 2)) >> 21);
-  ax = o.value;
+  ohi = ldbl_high (ax);
+  GET_HIGH_WORD (hax, ohi);
+  ax = __scalbnl (ax, ((int) ((ix - hax) * 2)) >> 21);
 
   /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
   u = ax - bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
   v = one / (ax + bp[k]);
   s = u * v;
-  s_h = s;
+  s_h = ldbl_high (s);
 
-  o.value = s_h;
-  o.parts32.w3 = 0;
-  o.parts32.w2 = 0;
-  s_h = o.value;
   /* t_h=ax+bp[k] High */
   t_h = ax + bp[k];
-  o.value = t_h;
-  o.parts32.w3 = 0;
-  o.parts32.w2 = 0;
-  t_h = o.value;
+  t_h = ldbl_high (t_h);
   t_l = ax - (t_h - bp[k]);
   s_l = v * ((u - s_h * t_h) - s_h * t_l);
   /* compute log(ax) */
@@ -342,30 +334,21 @@ __ieee754_powl (long double x, long double y)
   r += s_l * (s_h + s);
   s2 = s_h * s_h;
   t_h = 3.0 + s2 + r;
-  o.value = t_h;
-  o.parts32.w3 = 0;
-  o.parts32.w2 = 0;
-  t_h = o.value;
+  t_h = ldbl_high (t_h);
   t_l = r - ((t_h - 3.0) - s2);
   /* u+v = s*(1+...) */
   u = s_h * t_h;
   v = s_l * t_h + t_l * s;
   /* 2/(3log2)*(s+...) */
   p_h = u + v;
-  o.value = p_h;
-  o.parts32.w3 = 0;
-  o.parts32.w2 = 0;
-  p_h = o.value;
+  p_h = ldbl_high (p_h);
   p_l = v - (p_h - u);
   z_h = cp_h * p_h;		/* cp_h+cp_l = 2/(3*log2) */
   z_l = cp_l * p_h + p_l * cp + dp_l[k];
   /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
   t = (long double) n;
   t1 = (((z_h + z_l) + dp_h[k]) + t);
-  o.value = t1;
-  o.parts32.w3 = 0;
-  o.parts32.w2 = 0;
-  t1 = o.value;
+  t1 = ldbl_high (t1);
   t2 = z_l - (((t1 - t) - dp_h[k]) - z_h);
 
   /* s (sign of result -ve**odd) = -1 else = 1 */
@@ -374,21 +357,16 @@ __ieee754_powl (long double x, long double y)
     s = -one;			/* (-ve)**(odd int) */
 
   /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */
-  y1 = y;
-  o.value = y1;
-  o.parts32.w3 = 0;
-  o.parts32.w2 = 0;
-  y1 = o.value;
+  y1 = ldbl_high (y);
   p_l = (y - y1) * t1 + y * t2;
   p_h = y1 * t1;
   z = p_l + p_h;
-  o.value = z;
-  j = o.parts32.w0;
+  ohi = ldbl_high (z);
+  EXTRACT_WORDS (j, lj, ohi);
   if (j >= 0x40d00000) /* z >= 16384 */
     {
       /* if z > 16384 */
-      if (((j - 0x40d00000) | o.parts32.w1
-	| (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
+      if (((j - 0x40d00000) | lj) != 0)
 	return s * huge * huge;	/* overflow */
       else
 	{
@@ -399,8 +377,7 @@ __ieee754_powl (long double x, long double y)
   else if ((j & 0x7fffffff) >= 0x40d01b90)	/* z <= -16495 */
     {
       /* z < -16495 */
-      if (((j - 0xc0d01bc0) | o.parts32.w1
-	 | (o.parts32.w2 & 0x7fffffff) | o.parts32.w3) != 0)
+      if (((j - 0xc0d01bc0) | lj) != 0)
 	return s * tiny * tiny;	/* underflow */
       else
 	{
@@ -419,10 +396,7 @@ __ieee754_powl (long double x, long double y)
       p_h -= t;
     }
   t = p_l + p_h;
-  o.value = t;
-  o.parts32.w3 = 0;
-  o.parts32.w2 = 0;
-  t = o.value;
+  t = ldbl_high (t);
   u = t * lg2_h;
   v = (p_l - (t - p_h)) * lg2 + t * lg2_l;
   z = u + v;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
index 6a72d6a85..36bc03226 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_rem_pio2l.c
@@ -200,10 +200,11 @@ int32_t __ieee754_rem_pio2l(long double x, long double *y)
   double tx[8];
   int exp;
   int64_t n, ix, hx, ixd;
-  u_int64_t lx __attribute__ ((unused));
   u_int64_t lxd;
+  double xhi;
 
-  GET_LDOUBLE_WORDS64 (hx, lx, x);
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
   ix = hx & 0x7fffffffffffffffLL;
   if (ix <= 0x3fe921fb54442d10LL)	/* x in <-pi/4, pi/4> */
     {
@@ -243,7 +244,7 @@ int32_t __ieee754_rem_pio2l(long double x, long double *y)
      We split the 113 bits of the mantissa into 5 24bit integers
      stored in a double array.  */
   /* Make the IBM extended format 105 bit mantissa look like the ieee854 112
-     bit mantissa so the next operatation will give the correct result.  */
+     bit mantissa so the next operation will give the correct result.  */
   ldbl_extract_mantissa (&ixd, &lxd, &exp, x);
   exp = exp - 23;
   /* This is faster than doing this in floating point, because we
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c
index 67d7db7fb..800416f29 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_remainderl.c
@@ -33,18 +33,22 @@ __ieee754_remainderl(long double x, long double p)
 	int64_t hx,hp;
 	u_int64_t sx,lx,lp;
 	long double p_half;
+	double xhi, xlo, phi, plo;
 
-	GET_LDOUBLE_WORDS64(hx,lx,x);
-	GET_LDOUBLE_WORDS64(hp,lp,p);
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
+	ldbl_unpack (p, &phi, &plo);
+	EXTRACT_WORDS64 (hp, phi);
+	EXTRACT_WORDS64 (lp, plo);
 	sx = hx&0x8000000000000000ULL;
 	hp &= 0x7fffffffffffffffLL;
 	hx &= 0x7fffffffffffffffLL;
 
     /* purge off exception values */
-	if((hp|(lp&0x7fffffffffffffff))==0) return (x*p)/(x*p);	/* p = 0 */
+	if(hp==0) return (x*p)/(x*p);	/* p = 0 */
 	if((hx>=0x7ff0000000000000LL)||			/* x not finite */
-	  ((hp>=0x7ff0000000000000LL)&&			/* p is NaN */
-	  (((hp-0x7ff0000000000000LL)|lp)!=0)))
+	   (hp>0x7ff0000000000000LL))			/* p is NaN */
 	    return (x*p)/(x*p);
 
 
@@ -64,8 +68,8 @@ __ieee754_remainderl(long double x, long double p)
 		if(x>=p_half) x -= p;
 	    }
 	}
-	GET_LDOUBLE_MSW64(hx,x);
-	SET_LDOUBLE_MSW64(x,hx^sx);
+	if (sx)
+	  x = -x;
 	return x;
 }
 strong_alias (__ieee754_remainderl, __remainderl_finite)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c
index 4e8481c41..1790bef87 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_sinhl.c
@@ -38,9 +38,11 @@ __ieee754_sinhl(long double x)
 {
 	long double t,w,h;
 	int64_t ix,jx;
+	double xhi;
 
     /* High word of |x|. */
-	GET_LDOUBLE_MSW64(jx,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (jx, xhi);
 	ix = jx&0x7fffffffffffffffLL;
 
     /* x is INF or NaN */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
index 2b0f7c62e..61feb367f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_sqrtl.c
@@ -34,15 +34,13 @@
 
 #include <math_private.h>
 
-typedef unsigned int int4;
-typedef union {int4 i[4]; long double x; double d[2]; } mynumber;
+typedef union {int64_t i[2]; long double x; double d[2]; } mynumber;
 
-static const  mynumber
-  t512 = {{0x5ff00000, 0x00000000, 0x00000000, 0x00000000 }},  /* 2^512  */
-  tm256 = {{0x2ff00000, 0x00000000, 0x00000000, 0x00000000 }};  /* 2^-256 */
 static const double
-two54 = 1.80143985094819840000e+16, /* 0x4350000000000000 */
-twom54 = 5.55111512312578270212e-17; /* 0x3C90000000000000 */
+  t512 = 0x1p512,
+  tm256 = 0x1p-256,
+  two54 = 0x1p54,	/* 0x4350000000000000 */
+  twom54 = 0x1p-54;	/* 0x3C90000000000000 */
 
 /*********************************************************************/
 /* An ultimate sqrt routine. Given an IEEE double machine number x   */
@@ -54,56 +52,53 @@ long double __ieee754_sqrtl(long double x)
   static const long double big = 134217728.0, big1 = 134217729.0;
   long double t,s,i;
   mynumber a,c;
-  int4 k, l, m;
-  int n;
+  uint64_t k, l;
+  int64_t m, n;
   double d;
 
   a.x=x;
-  k=a.i[0] & 0x7fffffff;
+  k=a.i[0] & INT64_C(0x7fffffffffffffff);
   /*----------------- 2^-1022  <= | x |< 2^1024  -----------------*/
-  if (k>0x000fffff && k<0x7ff00000) {
+  if (k>INT64_C(0x000fffff00000000) && k<INT64_C(0x7ff0000000000000)) {
     if (x < 0) return (big1-big1)/(big-big);
-    l = (k&0x001fffff)|0x3fe00000;
-    if (((a.i[2] & 0x7fffffff) | a.i[3]) != 0) {
-      n = (int) ((l - k) * 2) >> 21;
-      m = (a.i[2] >> 20) & 0x7ff;
+    l = (k&INT64_C(0x001fffffffffffff))|INT64_C(0x3fe0000000000000);
+    if ((a.i[1] & INT64_C(0x7fffffffffffffff)) != 0) {
+      n = (int64_t) ((l - k) * 2) >> 53;
+      m = (a.i[1] >> 52) & 0x7ff;
       if (m == 0) {
 	a.d[1] *= two54;
-	m = ((a.i[2] >> 20) & 0x7ff) - 54;
+	m = ((a.i[1] >> 52) & 0x7ff) - 54;
       }
       m += n;
-      if ((int) m > 0)
-	a.i[2] = (a.i[2] & 0x800fffff) | (m << 20);
-      else if ((int) m <= -54) {
-	a.i[2] &= 0x80000000;
-	a.i[3] = 0;
+      if (m > 0)
+	a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52);
+      else if (m <= -54) {
+	a.i[1] &= INT64_C(0x8000000000000000);
       } else {
 	m += 54;
-	a.i[2] = (a.i[2] & 0x800fffff) | (m << 20);
+	a.i[1] = (a.i[1] & INT64_C(0x800fffffffffffff)) | (m << 52);
 	a.d[1] *= twom54;
       }
     }
     a.i[0] = l;
     s = a.x;
     d = __ieee754_sqrt (a.d[0]);
-    c.i[0] = 0x20000000+((k&0x7fe00000)>>1);
+    c.i[0] = INT64_C(0x2000000000000000)+((k&INT64_C(0x7fe0000000000000))>>1);
     c.i[1] = 0;
-    c.i[2] = 0;
-    c.i[3] = 0;
     i = d;
     t = 0.5L * (i + s / i);
     i = 0.5L * (t + s / t);
     return c.x * i;
   }
   else {
-    if (k>=0x7ff00000) {
-      if (a.i[0] == 0xfff00000 && a.i[1] == 0)
+    if (k>=INT64_C(0x7ff0000000000000)) {
+      if (a.i[0] == INT64_C(0xfff0000000000000))
 	return (big1-big1)/(big-big); /* sqrt (-Inf) = NaN.  */
       return x; /* sqrt (NaN) = NaN, sqrt (+Inf) = +Inf.  */
     }
     if (x == 0) return x;
     if (x < 0) return (big1-big1)/(big-big);
-    return tm256.x*__ieee754_sqrtl(x*t512.x);
+    return tm256*__ieee754_sqrtl(x*t512);
   }
 }
 strong_alias (__ieee754_sqrtl, __sqrtl_finite)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h b/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h
index 9e94f53b0..0c97a9920 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/ieee754.h
@@ -111,61 +111,6 @@ union ieee754_double
 #define IEEE754_DOUBLE_BIAS	0x3ff /* Added to exponent.  */
 
 
-union ieee854_long_double
-  {
-    long double d;
-
-    /* This is the IEEE 854 quad-precision format.  */
-    struct
-      {
-#if	__BYTE_ORDER == __BIG_ENDIAN
-	unsigned int negative:1;
-	unsigned int exponent:15;
-	/* Together these comprise the mantissa.  */
-	unsigned int mantissa0:16;
-	unsigned int mantissa1:32;
-	unsigned int mantissa2:32;
-	unsigned int mantissa3:32;
-#endif				/* Big endian.  */
-#if	__BYTE_ORDER == __LITTLE_ENDIAN
-	/* Together these comprise the mantissa.  */
-	unsigned int mantissa3:32;
-	unsigned int mantissa2:32;
-	unsigned int mantissa1:32;
-	unsigned int mantissa0:16;
-	unsigned int exponent:15;
-	unsigned int negative:1;
-#endif				/* Little endian.  */
-      } ieee;
-
-    /* This format makes it easier to see if a NaN is a signalling NaN.  */
-    struct
-      {
-#if	__BYTE_ORDER == __BIG_ENDIAN
-	unsigned int negative:1;
-	unsigned int exponent:15;
-	unsigned int quiet_nan:1;
-	/* Together these comprise the mantissa.  */
-	unsigned int mantissa0:15;
-	unsigned int mantissa1:32;
-	unsigned int mantissa2:32;
-	unsigned int mantissa3:32;
-#else
-	/* Together these comprise the mantissa.  */
-	unsigned int mantissa3:32;
-	unsigned int mantissa2:32;
-	unsigned int mantissa1:32;
-	unsigned int mantissa0:15;
-	unsigned int quiet_nan:1;
-	unsigned int exponent:15;
-	unsigned int negative:1;
-#endif
-      } ieee_nan;
-  };
-
-#define IEEE854_LONG_DOUBLE_BIAS 0x3fff /* Added to exponent.  */
-
-
 /* IBM extended format for long double.
 
    Each long double is made up of two IEEE doubles.  The value of the
@@ -179,49 +124,10 @@ union ieee854_long_double
 
 union ibm_extended_long_double
   {
-    long double d;
-    double dd[2];
-
-    /* This is the IBM extended format long double.  */
-    struct
-      { /* Big endian.  There is no other.  */
-
-	unsigned int negative:1;
-	unsigned int exponent:11;
-	/* Together Mantissa0-3 comprise the mantissa.  */
-	unsigned int mantissa0:20;
-	unsigned int mantissa1:32;
-
-	unsigned int negative2:1;
-	unsigned int exponent2:11;
-	/* There is an implied 1 here?  */
-	/* Together these comprise the mantissa.  */
-	unsigned int mantissa2:20;
-	unsigned int mantissa3:32;
-      } ieee;
-
-    /* This format makes it easier to see if a NaN is a signalling NaN.  */
-    struct
-      { /* Big endian.  There is no other.  */
-
-	unsigned int negative:1;
-	unsigned int exponent:11;
-	unsigned int quiet_nan:1;
-	/* Together Mantissa0-3 comprise the mantissa.  */
-	unsigned int mantissa0:19;
-	unsigned int mantissa1:32;
-
-	unsigned int negative2:1;
-	unsigned int exponent2:11;
-	/* There is an implied 1 here?  */
-	/* Together these comprise the mantissa.  */
-	unsigned int mantissa2:20;
-	unsigned int mantissa3:32;
-      } ieee_nan;
+    long double ld;
+    union ieee754_double d[2];
    };
 
-#define IBM_EXTENDED_LONG_DOUBLE_BIAS 0x3ff /* Added to exponent.  */
-
 __END_DECLS
 
 #endif /* ieee754.h */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c
index 0b81782fd..046f3b573 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_cosl.c
@@ -81,8 +81,11 @@ __kernel_cosl(long double x, long double y)
 {
   long double h, l, z, sin_l, cos_l_m1;
   int64_t ix;
-  u_int32_t tix, hix, index;
-  GET_LDOUBLE_MSW64 (ix, x);
+  uint32_t tix, hix, index;
+  double xhi, hhi;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (ix, xhi);
   tix = ((u_int64_t)ix) >> 32;
   tix &= ~0x80000000;			/* tix = |x|'s high 32 bits */
   if (tix < 0x3fc30000)			/* |x| < 0.1484375 */
@@ -136,7 +139,8 @@ __kernel_cosl(long double x, long double y)
 	case 2: index = (hix - 0x3fc30000) >> 14; break;
 	}
 */
-      SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+      INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+      h = hhi;
       l = y - (h - x);
       z = l * l;
       sin_l = l*(ONE+z*(SSIN1+z*(SSIN2+z*(SSIN3+z*(SSIN4+z*SSIN5)))));
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c
index fc1ead659..3ba9d7e90 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_sincosl.c
@@ -100,9 +100,12 @@ __kernel_sincosl(long double x, long double y, long double *sinx, long double *c
 {
   long double h, l, z, sin_l, cos_l_m1;
   int64_t ix;
-  u_int32_t tix, hix, index;
-  GET_LDOUBLE_MSW64 (ix, x);
-  tix = ((u_int64_t)ix) >> 32;
+  uint32_t tix, hix, index;
+  double xhi, hhi;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (ix, xhi);
+  tix = ((uint64_t)ix) >> 32;
   tix &= ~0x80000000;			/* tix = |x|'s high 32 bits */
   if (tix < 0x3fc30000)			/* |x| < 0.1484375 */
     {
@@ -164,7 +167,8 @@ __kernel_sincosl(long double x, long double y, long double *sinx, long double *c
 	case 2: index = (hix - 0x3fc30000) >> 14; break;
 	}
 */
-      SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+      INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+      h = hhi;
       if (iy)
 	l = y - (h - x);
       else
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c
index f17c0ae5d..b12ea134d 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_sinl.c
@@ -82,7 +82,10 @@ __kernel_sinl(long double x, long double y, int iy)
   long double h, l, z, sin_l, cos_l_m1;
   int64_t ix;
   u_int32_t tix, hix, index;
-  GET_LDOUBLE_MSW64 (ix, x);
+  double xhi, hhi;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (ix, xhi);
   tix = ((u_int64_t)ix) >> 32;
   tix &= ~0x80000000;			/* tix = |x|'s high 32 bits */
   if (tix < 0x3fc30000)			/* |x| < 0.1484375 */
@@ -132,7 +135,8 @@ __kernel_sinl(long double x, long double y, int iy)
 	case 2: index = (hix - 0x3fc30000) >> 14; break;
 	}
 */
-      SET_LDOUBLE_WORDS64(h, ((u_int64_t)hix) << 32, 0);
+      INSERT_WORDS64 (hhi, ((uint64_t)hix) << 32);
+      h = hhi;
       if (iy)
 	l = (ix < 0 ? -y : y) - (h - x);
       else
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c
index 1f6bad241..bcf8b5e7d 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/k_tanl.c
@@ -85,17 +85,17 @@ long double
 __kernel_tanl (long double x, long double y, int iy)
 {
   long double z, r, v, w, s;
-  int32_t ix, sign;
-  ieee854_long_double_shape_type u, u1;
+  int32_t ix, sign, hx, lx;
+  double xhi;
 
-  u.value = x;
-  ix = u.parts32.w0 & 0x7fffffff;
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS (hx, lx, xhi);
+  ix = hx & 0x7fffffff;
   if (ix < 0x3c600000)		/* x < 2**-57 */
     {
-      if ((int) x == 0)
-	{			/* generate inexact */
-	  if ((ix | u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3
-	       | (iy + 1)) == 0)
+      if ((int) x == 0)		/* generate inexact */
+	{
+	  if ((ix | lx | (iy + 1)) == 0)
 	    return one / fabs (x);
 	  else
 	    return (iy == 1) ? x : -one / x;
@@ -103,7 +103,7 @@ __kernel_tanl (long double x, long double y, int iy)
     }
   if (ix >= 0x3fe59420) /* |x| >= 0.6743316650390625 */
     {
-      if ((u.parts32.w0 & 0x80000000) != 0)
+      if ((hx & 0x80000000) != 0)
 	{
 	  x = -x;
 	  y = -y;
@@ -139,15 +139,13 @@ __kernel_tanl (long double x, long double y, int iy)
     {				/* if allow error up to 2 ulp,
 				   simply return -1.0/(x+r) here */
       /*  compute -1.0/(x+r) accurately */
-      u1.value = w;
-      u1.parts32.w2 = 0;
-      u1.parts32.w3 = 0;
-      v = r - (u1.value - x);		/* u1+v = r+x */
+      long double u1, z1;
+
+      u1 = ldbl_high (w);
+      v = r - (u1 - x);		/* u1+v = r+x */
       z = -1.0 / w;
-      u.value = z;
-      u.parts32.w2 = 0;
-      u.parts32.w3 = 0;
-      s = 1.0 + u.value * u1.value;
-      return u.value + z * (s + u.value * v);
+      z1 = ldbl_high (z);
+      s = 1.0 + z1 * u1;
+      return z1 + z * (s + z1 * v);
     }
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c b/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
index 00e44b8b9..e46fde74f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/ldbl2mpn.c
@@ -36,34 +36,44 @@ __mpn_extract_long_double (mp_ptr res_ptr, mp_size_t size,
   union ibm_extended_long_double u;
   unsigned long long hi, lo;
   int ediff;
-  u.d = value;
 
-  *is_neg = u.ieee.negative;
-  *expt = (int) u.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
+  u.ld = value;
 
-  lo = ((long long) u.ieee.mantissa2 << 32) | u.ieee.mantissa3;
-  hi = ((long long) u.ieee.mantissa0 << 32) | u.ieee.mantissa1;
-  /* If the lower double is not a denomal or zero then set the hidden
+  *is_neg = u.d[0].ieee.negative;
+  *expt = (int) u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
+
+  lo = ((long long) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
+  hi = ((long long) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
+
+  /* If the lower double is not a denormal or zero then set the hidden
      53rd bit.  */
-  if (u.ieee.exponent2 > 0)
-    {
-      lo |= 1LL << 52;
+  if (u.d[1].ieee.exponent != 0)
+    lo |= 1ULL << 52;
+  else
+    lo = lo << 1;
 
-      /* The lower double is normalized separately from the upper.  We may
-	 need to adjust the lower manitissa to reflect this.  */
-      ediff = u.ieee.exponent - u.ieee.exponent2;
-      if (ediff > 53)
-	lo = lo >> (ediff-53);
+  /* The lower double is normalized separately from the upper.  We may
+     need to adjust the lower manitissa to reflect this.  */
+  ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;
+  if (ediff > 0)
+    {
+      if (ediff < 64)
+	lo = lo >> ediff;
+      else
+	lo = 0;
     }
+  else if (ediff < 0)
+    lo = lo << -ediff;
+
   /* The high double may be rounded and the low double reflects the
      difference between the long double and the rounded high double
      value.  This is indicated by a differnce between the signs of the
      high and low doubles.  */
-  if ((u.ieee.negative != u.ieee.negative2)
-      && ((u.ieee.exponent2 != 0) && (lo != 0L)))
+  if (u.d[0].ieee.negative != u.d[1].ieee.negative
+      && lo != 0)
     {
       lo = (1ULL << 53) - lo;
-      if (hi == 0LL)
+      if (hi == 0)
 	{
 	  /* we have a borrow from the hidden bit, so shift left 1.  */
 	  hi = 0x0ffffffffffffeLL | (lo >> 51);
@@ -92,7 +102,7 @@ __mpn_extract_long_double (mp_ptr res_ptr, mp_size_t size,
 #define NUM_LEADING_ZEROS (BITS_PER_MP_LIMB \
 			   - (LDBL_MANT_DIG - ((N - 1) * BITS_PER_MP_LIMB)))
 
-  if (u.ieee.exponent == 0)
+  if (u.d[0].ieee.exponent == 0)
     {
       /* A biased exponent of zero is a special case.
 	 Either it is a zero or it is a denormal number.  */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
index 046293e59..1b6e27a9f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h
@@ -2,10 +2,13 @@
 #error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
 #endif
 
-#include <sysdeps/ieee754/ldbl-128/math_ldbl.h>
 #include <ieee754.h>
 #include <stdint.h>
 
+/* To suit our callers we return *hi64 and *lo64 as if they came from
+   an ieee854 112 bit mantissa, that is, 48 bits in *hi64 (plus one
+   implicit bit) and 64 bits in *lo64.  */
+
 static inline void
 ldbl_extract_mantissa (int64_t *hi64, uint64_t *lo64, int *exp, long double x)
 {
@@ -14,77 +17,119 @@ ldbl_extract_mantissa (int64_t *hi64, uint64_t *lo64, int *exp, long double x)
      the number before the decimal point and the second implicit bit
      as bit 53 of the mantissa.  */
   uint64_t hi, lo;
-  int ediff;
-  union ibm_extended_long_double eldbl;
-  eldbl.d = x;
-  *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS;
-
-  lo = ((int64_t)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;
-  hi = ((int64_t)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;
-  /* If the lower double is not a denomal or zero then set the hidden
-     53rd bit.  */
-  if (eldbl.ieee.exponent2 > 0x001)
-    {
-      lo |= (1ULL << 52);
-      lo = lo << 7; /* pre-shift lo to match ieee854.  */
-      /* The lower double is normalized separately from the upper.  We
-	 may need to adjust the lower manitissa to reflect this.  */
-      ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;
-      if (ediff > 53)
-	lo = lo >> (ediff-53);
-      hi |= (1ULL << 52);
-    }
+  union ibm_extended_long_double u;
 
-  if ((eldbl.ieee.negative != eldbl.ieee.negative2)
-      && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL)))
+  u.ld = x;
+  *exp = u.d[0].ieee.exponent - IEEE754_DOUBLE_BIAS;
+
+  lo = ((uint64_t) u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;
+  hi = ((uint64_t) u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;
+
+  if (u.d[0].ieee.exponent != 0)
     {
-      hi--;
-      lo = (1ULL << 60) - lo;
-      if (hi < (1ULL << 52))
+      int ediff;
+
+      /* If not a denormal or zero then we have an implicit 53rd bit.  */
+      hi |= (uint64_t) 1 << 52;
+
+      if (u.d[1].ieee.exponent != 0)
+	lo |= (uint64_t) 1 << 52;
+      else
+	/* A denormal is to be interpreted as having a biased exponent
+	   of 1.  */
+	lo = lo << 1;
+
+      /* We are going to shift 4 bits out of hi later, because we only
+	 want 48 bits in *hi64.  That means we want 60 bits in lo, but
+	 we currently only have 53.  Shift the value up.  */
+      lo = lo << 7;
+
+      /* The lower double is normalized separately from the upper.
+	 We may need to adjust the lower mantissa to reflect this.
+	 The difference between the exponents can be larger than 53
+	 when the low double is much less than 1ULP of the upper
+	 (in which case there are significant bits, all 0's or all
+	 1's, between the two significands).  The difference between
+	 the exponents can be less than 53 when the upper double
+	 exponent is nearing its minimum value (in which case the low
+	 double is denormal ie. has an exponent of zero).  */
+      ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;
+      if (ediff > 0)
+	{
+	  if (ediff < 64)
+	    lo = lo >> ediff;
+	  else
+	    lo = 0;
+	}
+      else if (ediff < 0)
+	lo = lo << -ediff;
+
+      if (u.d[0].ieee.negative != u.d[1].ieee.negative
+	  && lo != 0)
 	{
-	  /* we have a borrow from the hidden bit, so shift left 1.  */
-	  hi = (hi << 1) | (lo >> 59);
-	  lo = 0xfffffffffffffffLL & (lo << 1);
-	  *exp = *exp - 1;
+	  hi--;
+	  lo = ((uint64_t) 1 << 60) - lo;
+	  if (hi < (uint64_t) 1 << 52)
+	    {
+	      /* We have a borrow from the hidden bit, so shift left 1.  */
+	      hi = (hi << 1) | (lo >> 59);
+	      lo = (((uint64_t) 1 << 60) - 1) & (lo << 1);
+	      *exp = *exp - 1;
+	    }
 	}
     }
+  else
+    /* If the larger magnitude double is denormal then the smaller
+       one must be zero.  */
+    hi = hi << 1;
+
   *lo64 = (hi << 60) | lo;
   *hi64 = hi >> 4;
 }
 
 static inline long double
-ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
+ldbl_insert_mantissa (int sign, int exp, int64_t hi64, uint64_t lo64)
 {
   union ibm_extended_long_double u;
-  unsigned long hidden2, lzcount;
-  unsigned long long hi, lo;
+  int expnt2;
+  uint64_t hi, lo;
+
+  u.d[0].ieee.negative = sign;
+  u.d[1].ieee.negative = sign;
+  u.d[0].ieee.exponent = exp + IEEE754_DOUBLE_BIAS;
+  u.d[1].ieee.exponent = 0;
+  expnt2 = exp - 53 + IEEE754_DOUBLE_BIAS;
 
-  u.ieee.negative = sign;
-  u.ieee.negative2 = sign;
-  u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS;
-  u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
   /* Expect 113 bits (112 bits + hidden) right justified in two longs.
      The low order 53 bits (52 + hidden) go into the lower double */
-  lo = (lo64 >> 7)& ((1ULL << 53) - 1);
-  hidden2 = (lo64 >> 59) &  1ULL;
+  lo = (lo64 >> 7) & (((uint64_t) 1 << 53) - 1);
   /* The high order 53 bits (52 + hidden) go into the upper double */
-  hi = (lo64 >> 60) & ((1ULL << 11) - 1);
-  hi |= (hi64 << 4);
+  hi = lo64 >> 60;
+  hi |= hi64 << 4;
 
-  if (lo != 0LL)
+  if (lo != 0)
     {
-      /* hidden2 bit of low double controls rounding of the high double.
-	 If hidden2 is '1' then round up hi and adjust lo (2nd mantissa)
+      int lzcount;
+
+      /* hidden bit of low double controls rounding of the high double.
+	 If hidden is '1' and either the explicit mantissa is non-zero
+	 or hi is odd, then round up hi and adjust lo (2nd mantissa)
 	 plus change the sign of the low double to compensate.  */
-      if (hidden2)
+      if ((lo & ((uint64_t) 1 << 52)) != 0
+	  && ((hi & 1) != 0 || (lo & (((uint64_t) 1 << 52) - 1)) != 0))
 	{
 	  hi++;
-	  u.ieee.negative2 = !sign;
-	  lo = (1ULL << 53) - lo;
+	  if ((hi & ((uint64_t) 1 << 53)) != 0)
+	    {
+	      hi = hi >> 1;
+	      u.d[0].ieee.exponent++;
+	    }
+	  u.d[1].ieee.negative = !sign;
+	  lo = ((uint64_t) 1 << 53) - lo;
 	}
-      /* The hidden bit of the lo mantissa is zero so we need to
-	 normalize the it for the low double.  Shift it left until the
-	 hidden bit is '1' then adjust the 2nd exponent accordingly.  */
+
+      /* Normalize the low double.  Shift the mantissa left until
+	 the hidden bit is '1' and adjust the exponent accordingly.  */
 
       if (sizeof (lo) == sizeof (long))
 	lzcount = __builtin_clzl (lo);
@@ -92,35 +137,31 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64)
 	lzcount = __builtin_clzl ((long) (lo >> 32));
       else
 	lzcount = __builtin_clzl ((long) lo) + 32;
-      lzcount = lzcount - 11;
-      if (lzcount > 0)
+      lzcount = lzcount - (64 - 53);
+      lo <<= lzcount;
+      expnt2 -= lzcount;
+
+      if (expnt2 >= 1)
+	/* Not denormal.  */
+	u.d[1].ieee.exponent = expnt2;
+      else
 	{
-	  int expnt2 = u.ieee.exponent2 - lzcount;
-	  if (expnt2 >= 1)
-	    {
-	      /* Not denormal.  Normalize and set low exponent.  */
-	      lo = lo << lzcount;
-	      u.ieee.exponent2 = expnt2;
-	    }
+	  /* Is denormal.  Note that biased exponent of 0 is treated
+	     as if it was 1, hence the extra shift.  */
+	  if (expnt2 > -53)
+	    lo >>= 1 - expnt2;
 	  else
-	    {
-	      /* Is denormal.  */
-	      lo = lo << (lzcount + expnt2);
-	      u.ieee.exponent2 = 0;
-	    }
+	    lo = 0;
 	}
     }
   else
-    {
-      u.ieee.negative2 = 0;
-      u.ieee.exponent2 = 0;
-    }
+    u.d[1].ieee.negative = 0;
 
-  u.ieee.mantissa3 = lo & ((1ULL << 32) - 1);
-  u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1);
-  u.ieee.mantissa1 = hi & ((1ULL << 32) - 1);
-  u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1);
-  return u.d;
+  u.d[1].ieee.mantissa1 = lo;
+  u.d[1].ieee.mantissa0 = lo >> 32;
+  u.d[0].ieee.mantissa1 = hi;
+  u.d[0].ieee.mantissa0 = hi >> 32;
+  return u.ld;
 }
 
 /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint
@@ -129,18 +170,18 @@ static inline long double
 default_ldbl_pack (double a, double aa)
 {
   union ibm_extended_long_double u;
-  u.dd[0] = a;
-  u.dd[1] = aa;
-  return u.d;
+  u.d[0].d = a;
+  u.d[1].d = aa;
+  return u.ld;
 }
 
 static inline void
 default_ldbl_unpack (long double l, double *a, double *aa)
 {
   union ibm_extended_long_double u;
-  u.d = l;
-  *a = u.dd[0];
-  *aa = u.dd[1];
+  u.ld = l;
+  *a = u.d[0].d;
+  *aa = u.d[1].d;
 }
 
 #ifndef ldbl_pack
@@ -150,6 +191,9 @@ default_ldbl_unpack (long double l, double *a, double *aa)
 # define ldbl_unpack default_ldbl_unpack
 #endif
 
+/* Extract high double.  */
+#define ldbl_high(x) ((double) x)
+
 /* Convert a finite long double to canonical form.
    Does not handle +/-Inf properly.  */
 static inline void
@@ -163,13 +207,13 @@ ldbl_canonicalize (double *a, double *aa)
   *aa = xl;
 }
 
-/* Simple inline nearbyint (double) function .
+/* Simple inline nearbyint (double) function.
    Only works in the default rounding mode
    but is useful in long double rounding functions.  */
 static inline double
 ldbl_nearbyint (double a)
 {
-  double two52 = 0x10000000000000LL;
+  double two52 = 0x1p52;
 
   if (__builtin_expect ((__builtin_fabs (a) < two52), 1))
     {
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c b/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
index 3df42c566..c96852dfd 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/mpn2ldbl.c
@@ -33,11 +33,11 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
   unsigned long long hi, lo;
   int exponent2;
 
-  u.ieee.negative = sign;
-  u.ieee.negative2 = sign;
-  u.ieee.exponent = expt + IBM_EXTENDED_LONG_DOUBLE_BIAS;
-  u.ieee.exponent2 = 0;
-  exponent2 = expt - 53 + IBM_EXTENDED_LONG_DOUBLE_BIAS;
+  u.d[0].ieee.negative = sign;
+  u.d[1].ieee.negative = sign;
+  u.d[0].ieee.exponent = expt + IEEE754_DOUBLE_BIAS;
+  u.d[1].ieee.exponent = 0;
+  exponent2 = expt - 53 + IEEE754_DOUBLE_BIAS;
 
 #if BITS_PER_MP_LIMB == 32
   /* The low order 53 bits (52 + hidden) go into the lower double */
@@ -69,19 +69,19 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
       else
 	lzcount = __builtin_clzl ((long) val) + 32;
       if (hi)
-	lzcount = lzcount - 11;
+	lzcount = lzcount - (64 - 53);
       else
-	lzcount = lzcount + 42;
+	lzcount = lzcount + 53 - (64 - 53);
 
-      if (lzcount > u.ieee.exponent)
+      if (lzcount > u.d[0].ieee.exponent)
 	{
-	  lzcount = u.ieee.exponent;
-	  u.ieee.exponent = 0;
+	  lzcount = u.d[0].ieee.exponent;
+	  u.d[0].ieee.exponent = 0;
 	  exponent2 -= lzcount;
 	}
       else
 	{
-	  u.ieee.exponent -= (lzcount - 1);
+	  u.d[0].ieee.exponent -= (lzcount - 1);
 	  exponent2 -= (lzcount - 1);
 	}
 
@@ -97,29 +97,27 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
 	}
     }
 
-  if (lo != 0L)
+  if (lo != 0)
     {
-      /* hidden2 bit of low double controls rounding of the high double.
-	 If hidden2 is '1' and either the explicit mantissa is non-zero
+      /* hidden bit of low double controls rounding of the high double.
+	 If hidden is '1' and either the explicit mantissa is non-zero
 	 or hi is odd, then round up hi and adjust lo (2nd mantissa)
 	 plus change the sign of the low double to compensate.  */
       if ((lo & (1LL << 52)) != 0
-	  && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1))))
+	  && ((hi & 1) != 0 || (lo & ((1LL << 52) - 1)) != 0))
 	{
 	  hi++;
-	  if ((hi & ((1LL << 52) - 1)) == 0)
+	  if ((hi & (1LL << 53)) != 0)
 	    {
-	      if ((hi & (1LL << 53)) != 0)
-		hi -= 1LL << 52;
-	      u.ieee.exponent++;
+	      hi >>= 1;
+	      u.d[0].ieee.exponent++;
 	    }
-	  u.ieee.negative2 = !sign;
+	  u.d[1].ieee.negative = !sign;
 	  lo = (1LL << 53) - lo;
 	}
 
-      /* The hidden bit of the lo mantissa is zero so we need to normalize
-	 it for the low double.  Shift it left until the hidden bit is '1'
-	 then adjust the 2nd exponent accordingly.  */
+      /* Normalize the low double.  Shift the mantissa left until
+	 the hidden bit is '1' and adjust the exponent accordingly.  */
 
       if (sizeof (lo) == sizeof (long))
 	lzcount = __builtin_clzl (lo);
@@ -127,24 +125,24 @@ __mpn_construct_long_double (mp_srcptr frac_ptr, int expt, int sign)
 	lzcount = __builtin_clzl ((long) (lo >> 32));
       else
 	lzcount = __builtin_clzl ((long) lo) + 32;
-      lzcount = lzcount - 11;
-      if (lzcount > 0)
-	{
-	  lo = lo << lzcount;
-	  exponent2 = exponent2 - lzcount;
-	}
+      lzcount = lzcount - (64 - 53);
+      lo <<= lzcount;
+      exponent2 -= lzcount;
+
       if (exponent2 > 0)
-	u.ieee.exponent2 = exponent2;
-      else
+	u.d[1].ieee.exponent = exponent2;
+      else if (exponent2 > -53)
 	lo >>= 1 - exponent2;
+      else
+	lo = 0;
     }
   else
-    u.ieee.negative2 = 0;
+    u.d[1].ieee.negative = 0;
 
-  u.ieee.mantissa3 = lo & 0xffffffffLL;
-  u.ieee.mantissa2 = (lo >> 32) & 0xfffff;
-  u.ieee.mantissa1 = hi & 0xffffffffLL;
-  u.ieee.mantissa0 = (hi >> 32) & ((1LL << (LDBL_MANT_DIG - 86)) - 1);
+  u.d[1].ieee.mantissa1 = lo;
+  u.d[1].ieee.mantissa0 = lo >> 32;
+  u.d[0].ieee.mantissa1 = hi;
+  u.d[0].ieee.mantissa0 = hi >> 32;
 
-  return u.d;
+  return u.ld;
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c b/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
index 247dc20e5..e0ec422b0 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/printf_fphex.c
@@ -26,31 +26,31 @@ do {									      \
       unsigned long long int num0, num1;				      \
       unsigned long long hi, lo;					      \
       int ediff;							      \
-      union ibm_extended_long_double eldbl;				      \
-      eldbl.d = fpnum.ldbl.d;						      \
+      union ibm_extended_long_double u;					      \
+      u.ld = fpnum.ldbl;						      \
 									      \
       assert (sizeof (long double) == 16);				      \
 									      \
-      lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3;    \
-      hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1;    \
+      lo = ((long long)u.d[1].ieee.mantissa0 << 32) | u.d[1].ieee.mantissa1;  \
+      hi = ((long long)u.d[0].ieee.mantissa0 << 32) | u.d[0].ieee.mantissa1;  \
       lo <<= 7; /* pre-shift lo to match ieee854.  */			      \
-      /* If the lower double is not a denomal or zero then set the hidden     \
+      /* If the lower double is not a denormal or zero then set the hidden    \
 	 53rd bit.  */							      \
-      if (eldbl.ieee.exponent2 != 0)					      \
+      if (u.d[1].ieee.exponent != 0)					      \
 	lo |= (1ULL << (52 + 7));					      \
       else								      \
 	lo <<= 1;							      \
       /* The lower double is normalized separately from the upper.  We	      \
 	 may need to adjust the lower manitissa to reflect this.  */	      \
-      ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2;		      \
-      if (ediff > 53 + 63)						      \
+      ediff = u.d[0].ieee.exponent - u.d[1].ieee.exponent - 53;		      \
+      if (ediff > 63)							      \
 	lo = 0;								      \
-      else if (ediff > 53)						      \
-	lo = lo >> (ediff - 53);					      \
-      else if (eldbl.ieee.exponent2 == 0 && ediff < 53)			      \
-	lo = lo << (53 - ediff);					      \
-      if (eldbl.ieee.negative != eldbl.ieee.negative2			      \
-	  && (eldbl.ieee.exponent2 != 0 || lo != 0L))			      \
+      else if (ediff > 0)						      \
+	lo = lo >> ediff;						      \
+      else if (ediff < 0)						      \
+	lo = lo << -ediff;						      \
+      if (u.d[0].ieee.negative != u.d[1].ieee.negative			      \
+	  && lo != 0)							      \
 	{								      \
 	  lo = (1ULL << 60) - lo;					      \
 	  if (hi == 0L)							      \
@@ -58,7 +58,7 @@ do {									      \
 	      /* we have a borrow from the hidden bit, so shift left 1.  */   \
 	      hi = 0xffffffffffffeLL | (lo >> 59);			      \
 	      lo = 0xfffffffffffffffLL & (lo << 1);			      \
-	      eldbl.ieee.exponent--;					      \
+	      u.d[0].ieee.exponent--;					      \
 	    }								      \
 	  else								      \
 	    hi--;							      \
@@ -109,9 +109,9 @@ do {									      \
 	  *--wnumstr = L'0';						      \
 	}								      \
 									      \
-      leading = eldbl.ieee.exponent == 0 ? '0' : '1';			      \
+      leading = u.d[0].ieee.exponent == 0 ? '0' : '1';			      \
 									      \
-      exponent = eldbl.ieee.exponent;					      \
+      exponent = u.d[0].ieee.exponent;					      \
 									      \
       if (exponent == 0)						      \
 	{								      \
@@ -121,18 +121,18 @@ do {									      \
 	    {								      \
 	      /* This is a denormalized number.  */			      \
 	      expnegative = 1;						      \
-	      exponent = IBM_EXTENDED_LONG_DOUBLE_BIAS - 1;		      \
+	      exponent = IEEE754_DOUBLE_BIAS - 1;			      \
 	    }								      \
 	}								      \
-      else if (exponent >= IBM_EXTENDED_LONG_DOUBLE_BIAS)		      \
+      else if (exponent >= IEEE754_DOUBLE_BIAS)				      \
 	{								      \
 	  expnegative = 0;						      \
-	  exponent -= IBM_EXTENDED_LONG_DOUBLE_BIAS;			      \
+	  exponent -= IEEE754_DOUBLE_BIAS;				      \
 	}								      \
       else								      \
 	{								      \
 	  expnegative = 1;						      \
-	  exponent = -(exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS);	      \
+	  exponent = -(exponent - IEEE754_DOUBLE_BIAS);			      \
 	}								      \
 } while (0)
 
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c
index a833457ea..63c6edbb1 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_asinhl.c
@@ -38,7 +38,10 @@ long double __asinhl(long double x)
 {
 	long double t,w;
 	int64_t hx,ix;
-	GET_LDOUBLE_MSW64(hx,x);
+	double xhi;
+
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (hx, xhi);
 	ix = hx&0x7fffffffffffffffLL;
 	if(ix>=0x7ff0000000000000LL) return x+x;	/* x is inf or NaN */
 	if(ix< 0x3e20000000000000LL) {	/* |x|<2**-29 */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c
index 2a36d16bb..41dde2399 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_atanl.c
@@ -173,23 +173,20 @@ static const long double
 long double
 __atanl (long double x)
 {
-  int k, sign;
+  int32_t k, sign, lx;
   long double t, u, p, q;
-  ieee854_long_double_shape_type s;
+  double xhi;
 
-  s.value = x;
-  k = s.parts32.w0;
-  if (k & 0x80000000)
-    sign = 1;
-  else
-    sign = 0;
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS (k, lx, xhi);
+  sign = k & 0x80000000;
 
   /* Check for IEEE special cases.  */
   k &= 0x7fffffff;
   if (k >= 0x7ff00000)
     {
       /* NaN. */
-      if ((k & 0xfffff) | s.parts32.w1 )
+      if (((k - 0x7ff00000) | lx) != 0)
 	return (x + x);
 
       /* Infinity. */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c
index 23148392f..54c6cc77d 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_cosl.c
@@ -53,9 +53,11 @@ long double __cosl(long double x)
 {
 	long double y[2],z=0.0L;
 	int64_t n, ix;
+	double xhi;
 
     /* High word of x. */
-	GET_LDOUBLE_MSW64(ix,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (ix, xhi);
 
     /* |x| ~< pi/4 */
 	ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
index 6a4475ed6..c861c65cc 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_erfl.c
@@ -760,16 +760,16 @@ long double
 __erfl (long double x)
 {
   long double a, y, z;
-  int32_t i, ix, sign;
-  ieee854_long_double_shape_type u;
+  int32_t i, ix, hx;
+  double xhi;
 
-  u.value = x;
-  sign = u.parts32.w0;
-  ix = sign & 0x7fffffff;
+  xhi = ldbl_high (x);
+  GET_HIGH_WORD (hx, xhi);
+  ix = hx & 0x7fffffff;
 
   if (ix >= 0x7ff00000)
     {				/* erf(nan)=nan */
-      i = ((sign & 0xfff00000) >> 31) << 1;
+      i = ((uint32_t) hx >> 31) << 1;
       return (long double) (1 - i) + one / x;	/* erf(+-inf)=+-1 */
     }
 
@@ -778,7 +778,7 @@ __erfl (long double x)
       if (ix >= 0x4039A0DE)
 	{
 	/* __erfcl (x) underflows if x > 25.6283 */
-	  if (sign)
+	  if ((hx & 0x80000000) == 0)
 	    return one-tiny;
 	  else
 	    return tiny-one;
@@ -789,8 +789,9 @@ __erfl (long double x)
 	  return (one - y);
 	}
     }
-  u.parts32.w0 = ix;
-  a = u.value;
+  a = x;
+  if ((hx & 0x80000000) != 0)
+    a = -a;
   z = x * x;
   if (ix < 0x3fec0000)  /* a < 0.875 */
     {
@@ -814,7 +815,7 @@ __erfl (long double x)
       y = erf_const + neval (a, TN2, NTN2) / deval (a, TD2, NTD2);
     }
 
-  if (sign & 0x80000000) /* x < 0 */
+  if (hx & 0x80000000) /* x < 0 */
     y = -y;
   return( y );
 }
@@ -824,18 +825,18 @@ long double
 __erfcl (long double x)
 {
   long double y, z, p, r;
-  int32_t i, ix, sign;
-  ieee854_long_double_shape_type u;
+  int32_t i, ix;
+  uint32_t hx;
+  double xhi;
 
-  u.value = x;
-  sign = u.parts32.w0;
-  ix = sign & 0x7fffffff;
-  u.parts32.w0 = ix;
+  xhi = ldbl_high (x);
+  GET_HIGH_WORD (hx, xhi);
+  ix = hx & 0x7fffffff;
 
   if (ix >= 0x7ff00000)
     {				/* erfc(nan)=nan */
       /* erfc(+-inf)=0,2 */
-      return (long double) (((u_int32_t) sign >> 31) << 1) + one / x;
+      return (long double) ((hx >> 31) << 1) + one / x;
     }
 
   if (ix < 0x3fd00000) /* |x| <1/4 */
@@ -846,7 +847,8 @@ __erfcl (long double x)
     }
   if (ix < 0x3ff40000) /* 1.25 */
     {
-      x = u.value;
+      if ((hx & 0x80000000) != 0)
+	x = -x;
       i = 8.0 * x;
       switch (i)
 	{
@@ -891,7 +893,7 @@ __erfcl (long double x)
 	  y += C20a;
 	  break;
 	}
-      if (sign & 0x80000000)
+      if (hx & 0x80000000)
 	y = 2.0L - y;
       return y;
     }
@@ -899,10 +901,11 @@ __erfcl (long double x)
   if (ix < 0x405ac000)
     {
       /* x < -9 */
-      if ((ix >= 0x40220000) && (sign & 0x80000000))
+      if (hx >= 0xc0220000)
 	return two - tiny;
 
-      x = fabsl (x);
+      if ((hx & 0x80000000) != 0)
+	x = -x;
       z = one / (x * x);
       i = 8.0 / x;
       switch (i)
@@ -933,21 +936,17 @@ __erfcl (long double x)
 	  p = neval (z, RNr8, NRNr8) / deval (z, RDr8, NRDr8);
 	  break;
 	}
-      u.value = x;
-      u.parts32.w3 = 0;
-      u.parts32.w2 = 0;
-      u.parts32.w1 &= 0xf8000000;
-      z = u.value;
+      z = (float) x;
       r = __ieee754_expl (-z * z - 0.5625) *
 	__ieee754_expl ((z - x) * (z + x) + p);
-      if ((sign & 0x80000000) == 0)
+      if ((hx & 0x80000000) == 0)
 	return r / x;
       else
 	return two - r / x;
     }
   else
     {
-      if ((sign & 0x80000000) == 0)
+      if ((hx & 0x80000000) == 0)
 	return tiny * tiny;
       else
 	return two - tiny;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c
index 8808dcd89..007e78534 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_expm1l.c
@@ -92,19 +92,19 @@ long double
 __expm1l (long double x)
 {
   long double px, qx, xx;
-  int32_t ix, sign;
-  ieee854_long_double_shape_type u;
+  int32_t ix, lx, sign;
   int k;
+  double xhi;
 
   /* Detect infinity and NaN.  */
-  u.value = x;
-  ix = u.parts32.w0;
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS (ix, lx, xhi);
   sign = ix & 0x80000000;
   ix &= 0x7fffffff;
   if (ix >= 0x7ff00000)
     {
       /* Infinity. */
-      if (((ix & 0xfffff) | u.parts32.w1 | (u.parts32.w2&0x7fffffff) | u.parts32.w3) == 0)
+      if (((ix - 0x7ff00000) | lx) == 0)
 	{
 	  if (sign)
 	    return -1.0L;
@@ -116,7 +116,7 @@ __expm1l (long double x)
     }
 
   /* expm1(+- 0) = +- 0.  */
-  if ((ix == 0) && (u.parts32.w1 | (u.parts32.w2&0x7fffffff) | u.parts32.w3) == 0)
+  if ((ix | lx) == 0)
     return x;
 
   /* Overflow.  */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c
index 99146d802..c801c9706 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_fabsl.c
@@ -29,10 +29,16 @@ static char rcsid[] = "$NetBSD: $";
 long double __fabsl(long double x)
 {
 	u_int64_t hx, lx;
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	double xhi, xlo;
+
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
 	lx = lx ^ ( hx & 0x8000000000000000LL );
 	hx = hx & 0x7fffffffffffffffLL;
-	SET_LDOUBLE_WORDS64(x,hx,lx);
+	INSERT_WORDS64 (xhi, hx);
+	INSERT_WORDS64 (xlo, lx);
+	x = ldbl_pack (xhi, xlo);
 	return x;
 }
 long_double_symbol (libm, __fabsl, fabsl);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c
index 8edb34154..7b4655fad 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_finitel.c
@@ -29,10 +29,14 @@ static char rcsid[] = "$NetBSD: $";
 int
 ___finitel (long double x)
 {
-	int64_t hx;
-	GET_LDOUBLE_MSW64(hx,x);
-	return (int)((u_int64_t)((hx&0x7fffffffffffffffLL)
-				 -0x7ff0000000000000LL)>>63);
+  uint64_t hx;
+  double xhi;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
+  hx &= 0x7fffffffffffffffLL;
+  hx -= 0x7ff0000000000000LL;
+  return hx >> 63;
 }
 hidden_ver (___finitel, __finitel)
 weak_alias (___finitel, ____finitel)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c
index f4a90b08c..90586e822 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_fpclassifyl.c
@@ -46,8 +46,10 @@ ___fpclassifyl (long double x)
 {
   u_int64_t hx, lx;
   int retval = FP_NORMAL;
+  double xhi, xlo;
 
-  GET_LDOUBLE_WORDS64 (hx, lx, x);
+  ldbl_unpack (x, &xhi, &xlo);
+  EXTRACT_WORDS64 (hx, xhi);
   if ((hx & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL) {
       /* +/-NaN or +/-Inf */
       if (hx & 0x000fffffffffffffULL) {
@@ -65,6 +67,7 @@ ___fpclassifyl (long double x)
 	      retval = FP_NORMAL;
 	  } else {
 	      if ((hx & 0x7ff0000000000000ULL) == 0x0360000000000000ULL) {
+		  EXTRACT_WORDS64 (lx, xlo);
 		  if ((lx & 0x7fffffffffffffff)	/* lower is non-zero */
 		  && ((lx^hx) & 0x8000000000000000ULL)) { /* and sign differs */
 		      /* +/- denormal */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c
index 3ac537411..7e40663fd 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_frexpl.c
@@ -36,16 +36,21 @@ two107 = 162259276829213363391578010288128.0; /* 0x4670000000000000, 0 */
 
 long double __frexpl(long double x, int *eptr)
 {
-	u_int64_t hx, lx, ix, ixl;
+	uint64_t hx, lx, ix, ixl;
 	int64_t explo;
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	double xhi, xlo;
+
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
 	ixl = 0x7fffffffffffffffULL&lx;
 	ix =  0x7fffffffffffffffULL&hx;
 	*eptr = 0;
-	if(ix>=0x7ff0000000000000ULL||((ix|ixl)==0)) return x;	/* 0,inf,nan */
+	if(ix>=0x7ff0000000000000ULL||ix==0) return x;	/* 0,inf,nan */
 	if (ix<0x0010000000000000ULL) {		/* subnormal */
 	    x *= two107;
-	    GET_LDOUBLE_MSW64(hx,x);
+	    xhi = ldbl_high (x);
+	    EXTRACT_WORDS64 (hx, xhi);
 	    ix = hx&0x7fffffffffffffffULL;
 	    *eptr = -107;
 	}
@@ -54,7 +59,7 @@ long double __frexpl(long double x, int *eptr)
 	if (ixl != 0ULL) {
 	  explo = (ixl>>52) - (ix>>52) + 0x3fe;
 	  if ((ixl&0x7ff0000000000000ULL) == 0LL) {
-	    /* the lower double is a denomal so we need to correct its
+	    /* the lower double is a denormal so we need to correct its
 	       mantissa and perhaps its exponent.  */
 	    int cnt;
 
@@ -73,7 +78,9 @@ long double __frexpl(long double x, int *eptr)
 	  lx = 0ULL;
 
 	hx = (hx&0x800fffffffffffffULL) | 0x3fe0000000000000ULL;
-	SET_LDOUBLE_WORDS64(x,hx,lx);
+	INSERT_WORDS64 (xhi, hx);
+	INSERT_WORDS64 (xlo, lx);
+	x = ldbl_pack (xhi, xlo);
 	return x;
 }
 #ifdef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c
index c8dd9ff98..54e72c916 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinf_nsl.c
@@ -1,6 +1,7 @@
 /*
  * __isinf_nsl(x) returns != 0 if x is ±inf, else 0;
  * no branching!
+ * slightly dodgy in relying on signed shift right copying sign bit
  */
 
 #include <math.h>
@@ -9,8 +10,14 @@
 int
 __isinf_nsl (long double x)
 {
-	int64_t hx,lx;
-	GET_LDOUBLE_WORDS64(hx,lx,x);
-	return !((lx & 0x7fffffffffffffffLL)
-		 | ((hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL));
+  double xhi;
+  int64_t hx, mask;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
+
+  mask = (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL;
+  mask |= -mask;
+  mask >>= 63;
+  return ~mask;
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c
index 5f5b0144b..6a728221f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_isinfl.c
@@ -11,6 +11,7 @@ static char rcsid[] = "$NetBSD: $";
 /*
  * isinfl(x) returns 1 if x is inf, -1 if x is -inf, else 0;
  * no branching!
+ * slightly dodgy in relying on signed shift right copying sign bit
  */
 
 #include <math.h>
@@ -20,12 +21,16 @@ static char rcsid[] = "$NetBSD: $";
 int
 ___isinfl (long double x)
 {
-	int64_t hx,lx;
-	GET_LDOUBLE_WORDS64(hx,lx,x);
-	lx = (lx & 0x7fffffffffffffffLL);
-	lx |= (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL;
-	lx |= -lx;
-	return ~(lx >> 63) & (hx >> 62);
+  double xhi;
+  int64_t hx, mask;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
+
+  mask = (hx & 0x7fffffffffffffffLL) ^ 0x7ff0000000000000LL;
+  mask |= -mask;
+  mask >>= 63;
+  return ~mask & (hx >> 62);
 }
 hidden_ver (___isinfl, __isinfl)
 #ifndef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c
index 264dec745..d12f1d3bf 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_isnanl.c
@@ -29,12 +29,14 @@ static char rcsid[] = "$NetBSD: $";
 int
 ___isnanl (long double x)
 {
-	int64_t hx;
-	int64_t lx __attribute__ ((unused));
-	GET_LDOUBLE_WORDS64(hx,lx,x);
-	hx &= 0x7fffffffffffffffLL;
-	hx = 0x7ff0000000000000LL - hx;
-	return (int)((u_int64_t)hx>>63);
+  uint64_t hx;
+  double xhi;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
+  hx &= 0x7fffffffffffffffLL;
+  hx = 0x7ff0000000000000LL - hx;
+  return (int) (hx >> 63);
 }
 hidden_ver (___isnanl, __isnanl)
 #ifndef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c
index 96fab1aff..bdd58f8f2 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_issignalingl.c
@@ -22,10 +22,13 @@
 int
 __issignalingl (long double x)
 {
-  u_int64_t xi;
+  uint64_t xi;
   /* For inspecting NaN status, we only have to look at the first of the pair
      of IEEE 754 64-bit precision numbers.  */
-  GET_LDOUBLE_MSW64 (xi, x);
+  double xhi;
+
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (xi, xhi);
 #ifdef HIGH_ORDER_BIT_IS_SET_FOR_SNAN
 # error untested
   /* We only have to care about the high-order bit of x's significand, because
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c
index 77c4fdea8..a34638305 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_log1pl.c
@@ -126,19 +126,18 @@ long double
 __log1pl (long double xm1)
 {
   long double x, y, z, r, s;
-  ieee854_long_double_shape_type u;
-  int32_t hx;
+  double xhi;
+  int32_t hx, lx;
   int e;
 
   /* Test for NaN or infinity input. */
-  u.value = xm1;
-  hx = u.parts32.w0;
+  xhi = ldbl_high (xm1);
+  EXTRACT_WORDS (hx, lx, xhi);
   if (hx >= 0x7ff00000)
     return xm1;
 
   /* log1p(+- 0) = +- 0.  */
-  if (((hx & 0x7fffffff) == 0)
-      && (u.parts32.w1 | (u.parts32.w2 & 0x7fffffff) | u.parts32.w3) == 0)
+  if (((hx & 0x7fffffff) | lx) == 0)
     return xm1;
 
   x = xm1 + 1.0L;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c
index 6cbfcfa1c..e14028861 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_logbl.c
@@ -27,9 +27,10 @@ long double
 __logbl (long double x)
 {
   int64_t hx, rhx;
-  int64_t lx __attribute__ ((unused));
+  double xhi;
 
-  GET_LDOUBLE_WORDS64 (hx, lx, x);
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (hx, xhi);
   hx &= 0x7fffffffffffffffLL;	/* high |x| */
   if (hx == 0)
     return -1.0 / fabs (x);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c
index 39de9d4bf..ed03ce236 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_modfl.c
@@ -37,43 +37,54 @@ long double __modfl(long double x, long double *iptr)
 {
 	int64_t i0,i1,j0;
 	u_int64_t i;
-	GET_LDOUBLE_WORDS64(i0,i1,x);
+	double xhi, xlo;
+
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (i0, xhi);
+	EXTRACT_WORDS64 (i1, xlo);
 	i1 &= 0x000fffffffffffffLL;
 	j0 = ((i0>>52)&0x7ff)-0x3ff;	/* exponent of x */
 	if(j0<52) {			/* integer part in high x */
 	    if(j0<0) {			/* |x|<1 */
 		/* *iptr = +-0 */
-	        SET_LDOUBLE_WORDS64(*iptr,i0&0x8000000000000000ULL,0);
+		INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+		*iptr = xhi;
 		return x;
 	    } else {
 		i = (0x000fffffffffffffLL)>>j0;
 		if(((i0&i)|(i1&0x7fffffffffffffffLL))==0) {		/* x is integral */
 		    *iptr = x;
 		    /* return +-0 */
-		    SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0);
+		    INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+		    x = xhi;
 		    return x;
 		} else {
-		    SET_LDOUBLE_WORDS64(*iptr,i0&(~i),0);
+		    INSERT_WORDS64 (xhi, i0&(~i));
+		    *iptr = xhi;
 		    return x - *iptr;
 		}
 	    }
 	} else if (j0>103) {		/* no fraction part */
 	    *iptr = x*one;
 	    /* We must handle NaNs separately.  */
-	    if (j0 == 0x400 && ((i0 & 0x000fffffffffffffLL) | i1))
+	    if ((i0 & 0x7fffffffffffffffLL) > 0x7ff0000000000000LL)
 	      return x*one;
 	    /* return +-0 */
-	    SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0);
+	    INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+	    x = xhi;
 	    return x;
 	} else {			/* fraction part in low x */
 	    i = -1ULL>>(j0-52);
 	    if((i1&i)==0) { 		/* x is integral */
 		*iptr = x;
 		/* return +-0 */
-		SET_LDOUBLE_WORDS64(x,i0&0x8000000000000000ULL,0);
+		INSERT_WORDS64 (xhi, i0&0x8000000000000000ULL);
+		x = xhi;
 		return x;
 	    } else {
-		SET_LDOUBLE_WORDS64(*iptr,i0,i1&(~i));
+		INSERT_WORDS64 (xhi, i0);
+		INSERT_WORDS64 (xlo, i1&(~i));
+		*iptr = ldbl_pack (xhi, xlo);
 		return x - *iptr;
 	    }
 	}
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c
index bfcd11044..92ced5218 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nearbyintl.c
@@ -34,11 +34,11 @@ __nearbyintl (long double x)
   fenv_t env;
   static const long double TWO52 = 4503599627370496.0L;
   union ibm_extended_long_double u;
-  u.d = x;
+  u.ld = x;
 
-  if (fabs (u.dd[0]) < TWO52)
+  if (fabs (u.d[0].d) < TWO52)
     {
-      double high = u.dd[0];
+      double high = u.d[0].d;
       feholdexcept (&env);
       if (high > 0.0)
 	{
@@ -52,13 +52,13 @@ __nearbyintl (long double x)
 	  high += TWO52;
           if (high == 0.0) high = -0.0;
 	}
-      u.dd[0] = high;
-      u.dd[1] = 0.0;
-      math_force_eval (u.dd[0]);
-      math_force_eval (u.dd[1]);
+      u.d[0].d = high;
+      u.d[1].d = 0.0;
+      math_force_eval (u.d[0]);
+      math_force_eval (u.d[1]);
       fesetenv (&env);
     }
-  else if (fabs (u.dd[1]) < TWO52 && u.dd[1] != 0.0)
+  else if (fabs (u.d[1].d) < TWO52 && u.d[1].d != 0.0)
     {
       double high, low, tau;
       /* In this case we have to round the low double and handle any
@@ -67,57 +67,57 @@ __nearbyintl (long double x)
          may already be rounded and the low double may have the
          opposite sign to compensate.  */
       feholdexcept (&env);
-      if (u.dd[0] > 0.0)
+      if (u.d[0].d > 0.0)
 	{
-	  if (u.dd[1] > 0.0)
+	  if (u.d[1].d > 0.0)
 	    {
 	      /* If the high/low doubles are the same sign then simply
 	         round the low double.  */
-	      high = u.dd[0];
-	      low = u.dd[1];
+	      high = u.d[0].d;
+	      low = u.d[1].d;
 	    }
-	  else if (u.dd[1] < 0.0)
+	  else if (u.d[1].d < 0.0)
 	    {
 	      /* Else the high double is pre rounded and we need to
 	         adjust for that.  */
 
-	      tau = __nextafter (u.dd[0], 0.0);
-	      tau = (u.dd[0] - tau) * 2.0;
-	      high = u.dd[0] - tau;
-	      low = u.dd[1] + tau;
+	      tau = __nextafter (u.d[0].d, 0.0);
+	      tau = (u.d[0].d - tau) * 2.0;
+	      high = u.d[0].d - tau;
+	      low = u.d[1].d + tau;
 	    }
 	  low += TWO52;
 	  low -= TWO52;
 	}
-      else if (u.dd[0] < 0.0)
+      else if (u.d[0].d < 0.0)
 	{
-	  if (u.dd[1] < 0.0)
+	  if (u.d[1].d < 0.0)
 	    {
 	      /* If the high/low doubles are the same sign then simply
 	         round the low double.  */
-	      high = u.dd[0];
-	      low = u.dd[1];
+	      high = u.d[0].d;
+	      low = u.d[1].d;
 	    }
-	  else if (u.dd[1] > 0.0)
+	  else if (u.d[1].d > 0.0)
 	    {
 	      /* Else the high double is pre rounded and we need to
 	         adjust for that.  */
-	      tau = __nextafter (u.dd[0], 0.0);
-	      tau = (u.dd[0] - tau) * 2.0;
-	      high = u.dd[0] - tau;
-	      low = u.dd[1] + tau;
+	      tau = __nextafter (u.d[0].d, 0.0);
+	      tau = (u.d[0].d - tau) * 2.0;
+	      high = u.d[0].d - tau;
+	      low = u.d[1].d + tau;
 	    }
 	  low = TWO52 - low;
 	  low = -(low - TWO52);
 	}
-      u.dd[0] = high + low;
-      u.dd[1] = high - u.dd[0] + low;
-      math_force_eval (u.dd[0]);
-      math_force_eval (u.dd[1]);
+      u.d[0].d = high + low;
+      u.d[1].d = high - u.d[0].d + low;
+      math_force_eval (u.d[0]);
+      math_force_eval (u.d[1]);
       fesetenv (&env);
     }
 
-  return u.d;
+  return u.ld;
 }
 
 long_double_symbol (libm, __nearbyintl, nearbyintl);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c
index 7e581274a..c050944c0 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nextafterl.c
@@ -30,27 +30,28 @@ static char rcsid[] = "$NetBSD: $";
 
 long double __nextafterl(long double x, long double y)
 {
-	int64_t hx,hy,ihx,ihy,ilx;
-	u_int64_t lx;
-	u_int64_t ly __attribute__ ((unused));
+	int64_t hx,hy,ihx,ihy;
+	uint64_t lx;
+	double xhi, xlo, yhi;
 
-	GET_LDOUBLE_WORDS64(hx,lx,x);
-	GET_LDOUBLE_WORDS64(hy,ly,y);
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
+	yhi = ldbl_high (y);
+	EXTRACT_WORDS64 (hy, yhi);
 	ihx = hx&0x7fffffffffffffffLL;		/* |hx| */
-	ilx = lx&0x7fffffffffffffffLL;		/* |lx| */
 	ihy = hy&0x7fffffffffffffffLL;		/* |hy| */
 
-	if((((ihx&0x7ff0000000000000LL)==0x7ff0000000000000LL)&&
-	    ((ihx&0x000fffffffffffffLL)!=0)) ||   /* x is nan */
-	   (((ihy&0x7ff0000000000000LL)==0x7ff0000000000000LL)&&
-	    ((ihy&0x000fffffffffffffLL)!=0)))     /* y is nan */
+	if((ihx>0x7ff0000000000000LL) ||	/* x is nan */
+	   (ihy>0x7ff0000000000000LL))		/* y is nan */
 	    return x+y; /* signal the nan */
 	if(x==y)
 	    return y;		/* x=y, return y */
-	if(ihx == 0 && ilx == 0) {			/* x == 0 */
-	    long double u;
+	if(ihx == 0) {				/* x == 0 */
+	    long double u;			/* return +-minsubnormal */
 	    hy = (hy & 0x8000000000000000ULL) | 1;
-	    SET_LDOUBLE_WORDS64(x,hy,0ULL);/* return +-minsubnormal */
+	    INSERT_WORDS64 (yhi, hy);
+	    x = yhi;
 	    u = math_opt_barrier (x);
 	    u = u * u;
 	    math_force_eval (u);		/* raise underflow flag */
@@ -59,10 +60,16 @@ long double __nextafterl(long double x, long double y)
 
 	long double u;
 	if(x > y) {	/* x > y, x -= ulp */
+	    /* This isn't the largest magnitude correctly rounded
+	       long double as you can see from the lowest mantissa
+	       bit being zero.  It is however the largest magnitude
+	       long double with a 106 bit mantissa, and nextafterl
+	       is insane with variable precision.  So to make
+	       nextafterl sane we assume 106 bit precision.  */
 	    if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL))
 	      return x+x;	/* overflow, return -inf */
 	    if (hx >= 0x7ff0000000000000LL) {
-	      SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL);
+	      u = 0x1.fffffffffffff7ffffffffffff8p+1023L;
 	      return u;
 	    }
 	    if(ihx <= 0x0360000000000000LL) {  /* x <= LDBL_MIN */
@@ -77,16 +84,19 @@ long double __nextafterl(long double x, long double y)
 	      return x;
 	    }
 	    if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
-	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+	      INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52));
+	      u = yhi;
 	      u *= 0x1.0000000000000p-105L;
-	    } else
-	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+	    } else {
+	      INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52));
+	      u = yhi;
+	    }
 	    return x - u;
 	} else {				/* x < y, x += ulp */
 	    if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL))
 	      return x+x;	/* overflow, return +inf */
-	    if ((u_int64_t) hx >= 0xfff0000000000000ULL) {
-	      SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL);
+	    if ((uint64_t) hx >= 0xfff0000000000000ULL) {
+	      u = -0x1.fffffffffffff7ffffffffffff8p+1023L;
 	      return u;
 	    }
 	    if(ihx <= 0x0360000000000000LL) {  /* x <= LDBL_MIN */
@@ -103,10 +113,13 @@ long double __nextafterl(long double x, long double y)
 	      return x;
 	    }
 	    if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
-	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
+	      INSERT_WORDS64 (yhi, hx & (0x7ffLL<<52));
+	      u = yhi;
 	      u *= 0x1.0000000000000p-105L;
-	    } else
-	      SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
+	    } else {
+	      INSERT_WORDS64 (yhi, (hx & (0x7ffLL<<52))-(0x069LL<<52));
+	      u = yhi;
+	    }
 	    return x + u;
 	}
 }
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
index 7e288a436..b40cf167f 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttoward.c
@@ -34,23 +34,22 @@ double __nexttoward(double x, long double y)
 {
 	int32_t hx,ix;
 	int64_t hy,iy;
-	u_int32_t lx;
-	u_int64_t ly,uly;
+	uint32_t lx;
+	double yhi;
 
 	EXTRACT_WORDS(hx,lx,x);
-	GET_LDOUBLE_WORDS64(hy,ly,y);
+	yhi = ldbl_high (y);
+	EXTRACT_WORDS64(hy,yhi);
 	ix = hx&0x7fffffff;		/* |x| */
 	iy = hy&0x7fffffffffffffffLL;	/* |y| */
-	uly = ly&0x7fffffffffffffffLL;	/* |y| */
 
 	if(((ix>=0x7ff00000)&&((ix-0x7ff00000)|lx)!=0) ||   /* x is nan */
-	   ((iy>=0x7ff0000000000000LL)&&((iy-0x7ff0000000000000LL)|uly)!=0))
-							    /* y is nan */
+	   iy>0x7ff0000000000000LL)			    /* y is nan */
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
 	if((ix|lx)==0) {			/* x == 0 */
 	    double u;
-	    INSERT_WORDS(x,(u_int32_t)((hy>>32)&0x80000000),1);/* return +-minsub */
+	    INSERT_WORDS(x,(uint32_t)((hy>>32)&0x80000000),1);/* return +-minsub */
 	    u = math_opt_barrier (x);
 	    u = u * u;
 	    math_force_eval (u);		/* raise underflow flag */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
index b387a9119..19522f476 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_nexttowardf.c
@@ -27,16 +27,16 @@ float __nexttowardf(float x, long double y)
 {
 	int32_t hx,ix;
 	int64_t hy,iy;
-	u_int64_t ly, uly;
+	double yhi;
 
 	GET_FLOAT_WORD(hx,x);
-	GET_LDOUBLE_WORDS64(hy,ly,y);
+	yhi = ldbl_high (y);
+	EXTRACT_WORDS64 (hy, yhi);
 	ix = hx&0x7fffffff;		/* |x| */
 	iy = hy&0x7fffffffffffffffLL;	/* |y| */
-	uly = ly&0x7fffffffffffffffLL;	/* |y| */
 
 	if((ix>0x7f800000) ||   /* x is nan */
-	   ((iy>=0x7ff0000000000000LL)&&((iy-0x7ff0000000000000LL)|uly)!=0))
+	   (iy>0x7ff0000000000000LL))
 				/* y is nan */
 	   return x+y;
 	if((long double) x==y) return y;	/* x=y, return y */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c
index f4777a0e1..195e108ca 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_remquol.c
@@ -33,20 +33,24 @@ __remquol (long double x, long double y, int *quo)
   int64_t hx,hy;
   u_int64_t sx,lx,ly,qs;
   int cquo;
-
-  GET_LDOUBLE_WORDS64 (hx, lx, x);
-  GET_LDOUBLE_WORDS64 (hy, ly, y);
+  double xhi, xlo, yhi, ylo;
+
+  ldbl_unpack (x, &xhi, &xlo);
+  EXTRACT_WORDS64 (hx, xhi);
+  EXTRACT_WORDS64 (lx, xlo);
+  ldbl_unpack (y, &yhi, &ylo);
+  EXTRACT_WORDS64 (hy, yhi);
+  EXTRACT_WORDS64 (ly, ylo);
   sx = hx & 0x8000000000000000ULL;
   qs = sx ^ (hy & 0x8000000000000000ULL);
   hy &= 0x7fffffffffffffffLL;
   hx &= 0x7fffffffffffffffLL;
 
   /* Purge off exception values.  */
-  if ((hy | (ly & 0x7fffffffffffffff)) == 0)
+  if (hy == 0)
     return (x * y) / (x * y); 			/* y = 0 */
   if ((hx >= 0x7ff0000000000000LL)		/* x not finite */
-      || ((hy >= 0x7ff0000000000000LL)		/* y is NaN */
-	  && (((hy - 0x7ff0000000000000LL) | ly) != 0)))
+      || (hy > 0x7ff0000000000000LL))		/* y is NaN */
     return (x * y) / (x * y);
 
   if (hy <= 0x7fbfffffffffffffLL)
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c
index d75256888..03d459727 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalblnl.c
@@ -41,11 +41,15 @@ long double __scalblnl (long double x, long int n)
 {
 	int64_t k,l,hx,lx;
 	union { int64_t i; double d; } u;
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	double xhi, xlo;
+
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
 	k = (hx>>52)&0x7ff;		/* extract exponent */
 	l = (lx>>52)&0x7ff;
 	if (k==0) {				/* 0 or subnormal x */
-	    if (((hx|lx)&0x7fffffffffffffffULL)==0) return x; /* +-0 */
+	    if ((hx&0x7fffffffffffffffULL)==0) return x; /* +-0 */
 	    u.i = hx;
 	    u.d *= two54;
 	    hx = u.i;
@@ -61,7 +65,9 @@ long double __scalblnl (long double x, long int n)
 	if (k > 0) {				/* normal result */
 	    hx = (hx&0x800fffffffffffffULL)|(k<<52);
 	    if ((lx & 0x7fffffffffffffffULL) == 0) { /* low part +-0 */
-		SET_LDOUBLE_WORDS64(x,hx,lx);
+		INSERT_WORDS64 (xhi, hx);
+		INSERT_WORDS64 (xlo, lx);
+		x = ldbl_pack (xhi, xlo);
 		return x;
 	    }
 	    if (l == 0) { /* low part subnormal */
@@ -81,14 +87,19 @@ long double __scalblnl (long double x, long int n)
 		u.d *= twom54;
 		lx = u.i;
 	    }
-	    SET_LDOUBLE_WORDS64(x,hx,lx);
+	    INSERT_WORDS64 (xhi, hx);
+	    INSERT_WORDS64 (xlo, lx);
+	    x = ldbl_pack (xhi, xlo);
 	    return x;
 	}
 	if (k <= -54)
 	  return tiny*__copysignl(tiny,x); 	/*underflow*/
 	k += 54;				/* subnormal result */
 	lx &= 0x8000000000000000ULL;
-	SET_LDOUBLE_WORDS64(x,(hx&0x800fffffffffffffULL)|(k<<52),lx);
+	hx &= 0x800fffffffffffffULL;
+	INSERT_WORDS64 (xhi, hx|(k<<52));
+	INSERT_WORDS64 (xlo, lx);
+	x = ldbl_pack (xhi, xlo);
 	return x*twolm54;
 }
 long_double_symbol (libm, __scalblnl, scalblnl);
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c
index bcdb23bda..161172db6 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_scalbnl.c
@@ -41,11 +41,15 @@ long double __scalbnl (long double x, int n)
 {
 	int64_t k,l,hx,lx;
 	union { int64_t i; double d; } u;
-	GET_LDOUBLE_WORDS64(hx,lx,x);
+	double xhi, xlo;
+
+	ldbl_unpack (x, &xhi, &xlo);
+	EXTRACT_WORDS64 (hx, xhi);
+	EXTRACT_WORDS64 (lx, xlo);
 	k = (hx>>52)&0x7ff;		/* extract exponent */
 	l = (lx>>52)&0x7ff;
 	if (k==0) {				/* 0 or subnormal x */
-	    if (((hx|lx)&0x7fffffffffffffffULL)==0) return x; /* +-0 */
+	    if ((hx&0x7fffffffffffffffULL)==0) return x; /* +-0 */
 	    u.i = hx;
 	    u.d *= two54;
 	    hx = u.i;
@@ -61,7 +65,9 @@ long double __scalbnl (long double x, int n)
 	if (k > 0) {				/* normal result */
 	    hx = (hx&0x800fffffffffffffULL)|(k<<52);
 	    if ((lx & 0x7fffffffffffffffULL) == 0) { /* low part +-0 */
-		SET_LDOUBLE_WORDS64(x,hx,lx);
+		INSERT_WORDS64 (xhi, hx);
+		INSERT_WORDS64 (xlo, lx);
+		x = ldbl_pack (xhi, xlo);
 		return x;
 	    }
 	    if (l == 0) { /* low part subnormal */
@@ -81,14 +87,19 @@ long double __scalbnl (long double x, int n)
 		u.d *= twom54;
 		lx = u.i;
 	    }
-	    SET_LDOUBLE_WORDS64(x,hx,lx);
+	    INSERT_WORDS64 (xhi, hx);
+	    INSERT_WORDS64 (xlo, lx);
+	    x = ldbl_pack (xhi, xlo);
 	    return x;
 	}
 	if (k <= -54)
 	  return tiny*__copysignl(tiny,x); 	/*underflow*/
 	k += 54;				/* subnormal result */
 	lx &= 0x8000000000000000ULL;
-	SET_LDOUBLE_WORDS64(x,(hx&0x800fffffffffffffULL)|(k<<52),lx);
+	hx &= 0x800fffffffffffffULL;
+	INSERT_WORDS64 (xhi, hx|(k<<52));
+	INSERT_WORDS64 (xlo, lx);
+	x = ldbl_pack (xhi, xlo);
 	return x*twolm54;
 }
 #ifdef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c
index ee4aea6cf..aecb1fd79 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_signbitl.c
@@ -25,8 +25,10 @@ int
 ___signbitl (long double x)
 {
   int64_t e;
+  double xhi;
 
-  GET_LDOUBLE_MSW64 (e, x);
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (e, xhi);
   return e < 0;
 }
 #ifdef IS_IN_libm
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c
index 3b1e547bd..a9e2f3d19 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_sincosl.c
@@ -27,9 +27,11 @@ void
 __sincosl (long double x, long double *sinx, long double *cosx)
 {
   int64_t ix;
+  double xhi;
 
   /* High word of x. */
-  GET_LDOUBLE_MSW64 (ix, x);
+  xhi = ldbl_high (x);
+  EXTRACT_WORDS64 (ix, xhi);
 
   /* |x| ~< pi/4 */
   ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c
index 6fec16f85..087921a91 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_sinl.c
@@ -53,9 +53,11 @@ long double __sinl(long double x)
 {
 	long double y[2],z=0.0L;
 	int64_t n, ix;
+	double xhi;
 
     /* High word of x. */
-	GET_LDOUBLE_MSW64(ix,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (ix, xhi);
 
     /* |x| ~< pi/4 */
 	ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c
index 138b63cd1..c63e25345 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanhl.c
@@ -47,10 +47,12 @@ static const long double one=1.0L, two=2.0L, tiny = 1.0e-300L;
 long double __tanhl(long double x)
 {
 	long double t,z;
-	int64_t jx,ix,lx;
+	int64_t jx,ix;
+	double xhi;
 
     /* High word of |x|. */
-	GET_LDOUBLE_WORDS64(jx,lx,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (jx, xhi);
 	ix = jx&0x7fffffffffffffffLL;
 
     /* x is INF or NaN */
@@ -61,7 +63,7 @@ long double __tanhl(long double x)
 
     /* |x| < 22 */
 	if (ix < 0x4036000000000000LL) {		/* |x|<22 */
-	    if ((ix | (lx&0x7fffffffffffffffLL)) == 0)
+	    if (ix == 0)
 		return x;		/* x == +-0 */
 	    if (ix<0x3c60000000000000LL) 	/* |x|<2**-57 */
 		return x*(one+x);    	/* tanh(small) = small */
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c
index 9967d0c20..66b8a0621 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/s_tanl.c
@@ -53,9 +53,11 @@ long double __tanl(long double x)
 {
 	long double y[2],z=0.0L;
 	int64_t n, ix;
+	double xhi;
 
     /* High word of x. */
-	GET_LDOUBLE_MSW64(ix,x);
+	xhi = ldbl_high (x);
+	EXTRACT_WORDS64 (ix, xhi);
 
     /* |x| ~< pi/4 */
 	ix &= 0x7fffffffffffffffLL;
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c b/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c
index 04e328857..93a80c5ee 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/strtold_l.c
@@ -43,11 +43,11 @@ libc_hidden_proto (STRTOF)
 #define FLOAT_HUGE_VAL	HUGE_VALL
 # define SET_MANTISSA(flt, mant) \
   do { union ibm_extended_long_double u;				      \
-       u.d = (flt);							      \
-       u.ieee_nan.mantissa0 = (mant) >> 32;				      \
-       u.ieee_nan.mantissa1 = (mant);					      \
-       if ((u.ieee.mantissa0 | u.ieee.mantissa1) != 0)			      \
-	 (flt) = u.d;							      \
+       u.ld = (flt);							      \
+       u.d[0].ieee_nan.mantissa0 = (mant) >> 32;			      \
+       u.d[0].ieee_nan.mantissa1 = (mant);				      \
+       if ((u.d[0].ieee.mantissa0 | u.d[0].ieee.mantissa1) != 0)	      \
+	 (flt) = u.ld;							      \
   } while (0)
 
 #include <strtod_l.c>
diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c b/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c
index ed0d4a506..06dcf02ff 100644
--- a/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c
+++ b/libc/sysdeps/ieee754/ldbl-128ibm/x2y2m1l.c
@@ -89,23 +89,23 @@ __x2y2m1l (long double x, long double y)
   double vals[12];
   SET_RESTORE_ROUND (FE_TONEAREST);
   union ibm_extended_long_double xu, yu;
-  xu.d = x;
-  yu.d = y;
-  if (fabs (xu.dd[1]) < 0x1p-500)
-    xu.dd[1] = 0.0;
-  if (fabs (yu.dd[1]) < 0x1p-500)
-    yu.dd[1] = 0.0;
-  mul_split (&vals[1], &vals[0], xu.dd[0], xu.dd[0]);
-  mul_split (&vals[3], &vals[2], xu.dd[0], xu.dd[1]);
+  xu.ld = x;
+  yu.ld = y;
+  if (fabs (xu.d[1].d) < 0x1p-500)
+    xu.d[1].d = 0.0;
+  if (fabs (yu.d[1].d) < 0x1p-500)
+    yu.d[1].d = 0.0;
+  mul_split (&vals[1], &vals[0], xu.d[0].d, xu.d[0].d);
+  mul_split (&vals[3], &vals[2], xu.d[0].d, xu.d[1].d);
   vals[2] *= 2.0;
   vals[3] *= 2.0;
-  mul_split (&vals[5], &vals[4], xu.dd[1], xu.dd[1]);
-  mul_split (&vals[7], &vals[6], yu.dd[0], yu.dd[0]);
-  mul_split (&vals[9], &vals[8], yu.dd[0], yu.dd[1]);
+  mul_split (&vals[5], &vals[4], xu.d[1].d, xu.d[1].d);
+  mul_split (&vals[7], &vals[6], yu.d[0].d, yu.d[0].d);
+  mul_split (&vals[9], &vals[8], yu.d[0].d, yu.d[1].d);
   vals[8] *= 2.0;
   vals[9] *= 2.0;
-  mul_split (&vals[11], &vals[10], yu.dd[1], yu.dd[1]);
-  if (xu.dd[0] >= 0.75)
+  mul_split (&vals[11], &vals[10], yu.d[1].d, yu.d[1].d);
+  if (xu.d[0].d >= 0.75)
     vals[1] -= 1.0;
   else
     {
diff --git a/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c b/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c
index f356a4843..715c93b50 100644
--- a/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c
+++ b/libc/sysdeps/ieee754/ldbl-96/printf_fphex.c
@@ -25,11 +25,13 @@ do {									      \
       /* The "strange" 80 bit format on ix86 and m68k has an explicit	      \
 	 leading digit in the 64 bit mantissa.  */			      \
       unsigned long long int num;					      \
+      union ieee854_long_double u;					      \
+      u.d = fpnum.ldbl;							      \
 									      \
       assert (sizeof (long double) == 12);				      \
 									      \
-      num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32	      \
-	     | fpnum.ldbl.ieee.mantissa1);				      \
+      num = (((unsigned long long int) u.ieee.mantissa0) << 32		      \
+	     | u.ieee.mantissa1);					      \
 									      \
       zero_mantissa = num == 0;						      \
 									      \
@@ -62,7 +64,7 @@ do {									      \
 									      \
       /* We have 3 bits from the mantissa in the leading nibble.	      \
 	 Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'.  */      \
-      exponent = fpnum.ldbl.ieee.exponent;				      \
+      exponent = u.ieee.exponent;					      \
 									      \
       if (exponent == 0)						      \
 	{								      \
diff --git a/libc/sysdeps/mach/hurd/fork.c b/libc/sysdeps/mach/hurd/fork.c
index ab11babff..321421fbb 100644
--- a/libc/sysdeps/mach/hurd/fork.c
+++ b/libc/sysdeps/mach/hurd/fork.c
@@ -34,6 +34,11 @@
 symbol_set_declare (_hurd_fork_locks)
 
 
+/* Application callbacks registered through pthread_atfork.  */
+DEFINE_HOOK (_hurd_atfork_prepare_hook, (void));
+DEFINE_HOOK (_hurd_atfork_child_hook, (void));
+DEFINE_HOOK (_hurd_atfork_parent_hook, (void));
+
 /* Things that want to be called before we fork, to prepare the parent for
    task_create, when the new child task will inherit our address space.  */
 DEFINE_HOOK (_hurd_fork_prepare_hook, (void));
@@ -62,6 +67,8 @@ __fork (void)
   error_t err;
   struct hurd_sigstate *volatile ss;
 
+  RUN_HOOK (_hurd_atfork_prepare_hook, ());
+
   ss = _hurd_self_sigstate ();
   __spin_lock (&ss->critical_section_lock);
 
@@ -695,6 +702,14 @@ __fork (void)
 
   _hurd_critical_section_unlock (ss);
 
+  if (!err)
+    {
+      if (pid != 0)
+	RUN_HOOK (_hurd_atfork_parent_hook, ());
+      else
+	RUN_HOOK (_hurd_atfork_child_hook, ());
+    }
+
   return err ? __hurd_fail (err) : pid;
 }
 libc_hidden_def (__fork)
diff --git a/libc/sysdeps/mach/hurd/i386/tls.h b/libc/sysdeps/mach/hurd/i386/tls.h
index 4f4c7c5df..da8c16aa0 100644
--- a/libc/sysdeps/mach/hurd/i386/tls.h
+++ b/libc/sysdeps/mach/hurd/i386/tls.h
@@ -118,7 +118,6 @@ _hurd_tls_init (tcbhead_t *tcb, int secondcall)
    operation can cause a failure 'errno' must not be touched.  */
 # define TLS_INIT_TP(descr, secondcall) \
     _hurd_tls_init ((tcbhead_t *) (descr), (secondcall))
-# define TLS_INIT_TP_EXPENSIVE 1
 
 /* Return the TCB address of the current thread.  */
 # define THREAD_SELF							      \
diff --git a/libc/sysdeps/posix/dirstream.h b/libc/sysdeps/posix/dirstream.h
index 8e8570dd4..be2089505 100644
--- a/libc/sysdeps/posix/dirstream.h
+++ b/libc/sysdeps/posix/dirstream.h
@@ -41,8 +41,13 @@ struct __dirstream
 
     int errcode;		/* Delayed error code.  */
 
-    /* Directory block.  */
-    char data[0] __attribute__ ((aligned (__alignof__ (void*))));
+    /* Directory block.  We must make sure that this block starts
+       at an address that is aligned adequately enough to store
+       dirent entries.  Using the alignment of "void *" is not
+       sufficient because dirents on 32-bit platforms can require
+       64-bit alignment.  We use "long double" here to be consistent
+       with what malloc uses.  */
+    char data[0] __attribute__ ((aligned (__alignof__ (long double))));
   };
 
 #define _DIR_dirfd(dirp)	((dirp)->fd)
diff --git a/libc/sysdeps/posix/getaddrinfo.c b/libc/sysdeps/posix/getaddrinfo.c
index 52177e454..0f4b88514 100644
--- a/libc/sysdeps/posix/getaddrinfo.c
+++ b/libc/sysdeps/posix/getaddrinfo.c
@@ -558,16 +558,17 @@ gaih_inet (const char *name, const struct gaih_service *service,
 	  struct gaih_addrtuple **pat = &at;
 	  int no_data = 0;
 	  int no_inet6_data = 0;
-	  service_user *nip = NULL;
+	  service_user *nip;
 	  enum nss_status inet6_status = NSS_STATUS_UNAVAIL;
 	  enum nss_status status = NSS_STATUS_UNAVAIL;
 	  int no_more;
 	  int old_res_options;
 
-	  /* If we do not have to look for IPv6 addresses, use
-	     the simple, old functions, which do not support
-	     IPv6 scope ids. */
-	  if (req->ai_family == AF_INET)
+	  /* If we do not have to look for IPv6 addresses or the canonical
+	     name, use the simple, old functions, which do not support
+	     IPv6 scope ids, nor retrieving the canonical name.  */
+	  if (req->ai_family == AF_INET
+	      && (req->ai_flags & AI_CANONNAME) == 0)
 	    {
 	      /* Allocate additional room for struct host_data.  */
 	      size_t tmpbuflen = (512 + MAX_NR_ALIASES * sizeof(char*)
@@ -791,15 +792,13 @@ gaih_inet (const char *name, const struct gaih_service *service,
 	    }
 #endif
 
-	  if (__nss_hosts_database != NULL)
-	    {
-	      no_more = 0;
-	      nip = __nss_hosts_database;
-	    }
-	  else
+	  if (__nss_hosts_database == NULL)
 	    no_more = __nss_database_lookup ("hosts", NULL,
 					     "dns [!UNAVAIL=return] files",
-					     &nip);
+					     &__nss_hosts_database);
+	  else
+	    no_more = 0;
+	  nip = __nss_hosts_database;
 
 	  /* Initialize configurations.  */
 	  if (__glibc_unlikely (!_res_hconf.initialized))
@@ -1011,8 +1010,9 @@ gaih_inet (const char *name, const struct gaih_service *service,
 				    canon = s;
 				  else
 				    {
-				      /* Set to name now to avoid using
-					 gethostbyaddr.  */
+				      /* If the canonical name cannot be
+					 determined, use the passed in
+					 string.  */
 				      if (malloc_canonbuf)
 					{
 					  free (canonbuf);
@@ -1127,70 +1127,10 @@ gaih_inet (const char *name, const struct gaih_service *service,
 	/* Only the first entry gets the canonical name.  */
 	if (at2 == at && (req->ai_flags & AI_CANONNAME) != 0)
 	  {
-	    char *tmpbuf2 = NULL;
-	    bool malloc_tmpbuf2 = false;
-
 	    if (canon == NULL)
-	      {
-		struct hostent *h = NULL;
-		int herrno;
-		struct hostent th;
-		/* Add room for struct host_data.  */
-		size_t tmpbuf2len = (512 + (MAX_NR_ALIASES+MAX_NR_ADDRS+1)
-				     * sizeof(char*) + 16 * sizeof(char));
-
-		do
-		  {
-		    if (__libc_use_alloca (alloca_used + 2 * tmpbuf2len))
-		      tmpbuf2 = extend_alloca_account (tmpbuf2, tmpbuf2len,
-						       tmpbuf2len * 2,
-						       alloca_used);
-		    else
-		      {
-			char *newp = realloc (malloc_tmpbuf2 ? tmpbuf2 : NULL,
-					      2 * tmpbuf2len);
-			if (newp == NULL)
-			  {
-			    if (malloc_tmpbuf2)
-			      free (tmpbuf2);
-			    result = -EAI_MEMORY;
-			    goto free_and_return;
-			  }
-
-			tmpbuf2 = newp;
-			tmpbuf2len = 2 * tmpbuf2len;
-			malloc_tmpbuf2 = true;
-		      }
-
-		    rc = __gethostbyaddr_r (at2->addr,
-					    ((at2->family == AF_INET6)
-					     ? sizeof (struct in6_addr)
-					     : sizeof (struct in_addr)),
-					    at2->family, &th, tmpbuf2,
-					    tmpbuf2len, &h, &herrno);
-		  }
-		while (rc == ERANGE && herrno == NETDB_INTERNAL);
-
-		if (rc != 0 && herrno == NETDB_INTERNAL)
-		  {
-		    if (malloc_tmpbuf2)
-		      free (tmpbuf2);
-
-		    __set_h_errno (herrno);
-		    result = -EAI_SYSTEM;
-		    goto free_and_return;
-		  }
-
-		if (h != NULL)
-		  canon = h->h_name;
-		else
-		  {
-		    assert (orig_name != NULL);
-		    /* If the canonical name cannot be determined, use
-		       the passed in string.  */
-		    canon = orig_name;
-		  }
-	      }
+	      /* If the canonical name cannot be determined, use
+		 the passed in string.  */
+	      canon = orig_name;
 
 #ifdef HAVE_LIBIDN
 	    if (req->ai_flags & AI_CANONIDN)
@@ -1205,9 +1145,6 @@ gaih_inet (const char *name, const struct gaih_service *service,
 		int rc = __idna_to_unicode_lzlz (canon, &out, idn_flags);
 		if (rc != IDNA_SUCCESS)
 		  {
-		    if (malloc_tmpbuf2)
-		      free (tmpbuf2);
-
 		    if (rc == IDNA_MALLOC_ERROR)
 		      result = -EAI_MEMORY;
 		    else if (rc == IDNA_DLOPEN_ERROR)
@@ -1237,17 +1174,11 @@ gaih_inet (const char *name, const struct gaih_service *service,
 		    canon = strdup (canon);
 		    if (canon == NULL)
 		      {
-			if (malloc_tmpbuf2)
-			  free (tmpbuf2);
-
 			result = -EAI_MEMORY;
 			goto free_and_return;
 		      }
 		  }
 	      }
-
-	    if (malloc_tmpbuf2)
-	      free (tmpbuf2);
 	  }
 
 	family = at2->family;
diff --git a/libc/sysdeps/powerpc/bits/fenv.h b/libc/sysdeps/powerpc/bits/fenv.h
index 07cd3c8e5..122edd3dc 100644
--- a/libc/sysdeps/powerpc/bits/fenv.h
+++ b/libc/sysdeps/powerpc/bits/fenv.h
@@ -19,75 +19,6 @@
 # error "Never use <bits/fenv.h> directly; include <fenv.h> instead."
 #endif
 
-#if defined __NO_FPRS__ && !defined _SOFT_FLOAT /* E500 */
-
-/* Define bits representing the exception.  We use the bit positions of
-   the appropriate bits in the SPEFSCR...  */
-enum
-  {
-    FE_INEXACT =
-#define FE_INEXACT	(1 << (63 - 42))
-      FE_INEXACT,
-    FE_INVALID =
-#define FE_INVALID	(1 << (63 - 43))
-      FE_INVALID,
-    FE_DIVBYZERO =
-#define FE_DIVBYZERO	(1 << (63 - 44))
-      FE_DIVBYZERO,
-    FE_UNDERFLOW =
-#define FE_UNDERFLOW	(1 << (63 - 45))
-      FE_UNDERFLOW,
-    FE_OVERFLOW =
-#define FE_OVERFLOW	(1 << (63 - 46))
-      FE_OVERFLOW
-  };
-
-#define FE_ALL_EXCEPT \
-	(FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW | FE_INVALID)
-
-/* The E500 support all of the four defined rounding modes.  We use
-   the bit pattern in the SPEFSCR as the values for the appropriate
-   macros.  */
-enum
-  {
-    FE_TONEAREST =
-#define FE_TONEAREST	0
-      FE_TONEAREST,
-    FE_TOWARDZERO =
-#define FE_TOWARDZERO	1
-      FE_TOWARDZERO,
-    FE_UPWARD =
-#define FE_UPWARD	2
-      FE_UPWARD,
-    FE_DOWNWARD =
-#define FE_DOWNWARD	3
-      FE_DOWNWARD
-  };
-
-/* Type representing exception flags.  */
-typedef unsigned int fexcept_t;
-
-typedef double fenv_t;
-
-/* If the default argument is used we use this value.  */
-extern const fenv_t __fe_dfl_env;
-#define FE_DFL_ENV	(&__fe_dfl_env)
-
-#ifdef __USE_GNU
-/* Floating-point environment where all exceptions are enabled.  Note that
-   this is not sufficient to give you SIGFPE.  */
-extern const fenv_t __fe_enabled_env;
-# define FE_ENABLED_ENV	(&__fe_enabled_env)
-
-/* Floating-point environment with all exceptions enabled.  Note that
-   just evaluating this value will set the processor into 'FPU
-   exceptions imprecise recoverable' mode, which may cause a significant
-   performance penalty (but have no other visible effect).  */
-extern const fenv_t *__fe_nomask_env (void);
-# define FE_NOMASK_ENV	(__fe_nomask_env ())
-#endif
-
-#else /* PowerPC 6xx floating-point.  */
 
 /* Define bits representing the exception.  We use the bit positions of
    the appropriate bits in the FPSCR...  */
@@ -244,5 +175,3 @@ extern const fenv_t *__fe_mask_env (void);
 __END_DECLS
 
 #endif
-
-#endif
diff --git a/libc/sysdeps/powerpc/bits/mathinline.h b/libc/sysdeps/powerpc/bits/mathinline.h
index 140fff08e..cef5b29b1 100644
--- a/libc/sysdeps/powerpc/bits/mathinline.h
+++ b/libc/sysdeps/powerpc/bits/mathinline.h
@@ -61,21 +61,28 @@
 __MATH_INLINE int
 __NTH (__signbitf (float __x))
 {
+#if __GNUC_PREREQ (4, 0)
+  return __builtin_signbitf (__x);
+#else
   __extension__ union { float __f; int __i; } __u = { __f: __x };
   return __u.__i < 0;
+#endif
 }
 __MATH_INLINE int
 __NTH (__signbit (double __x))
 {
-  __extension__ union { double __d; int __i[2]; } __u = { __d: __x };
-  return __u.__i[0] < 0;
+#if __GNUC_PREREQ (4, 0)
+  return __builtin_signbit (__x);
+#else
+  __extension__ union { double __d; long long __i; } __u = { __d: __x };
+  return __u.__i < 0;
+#endif
 }
 #  ifdef __LONG_DOUBLE_128__
 __MATH_INLINE int
 __NTH (__signbitl (long double __x))
 {
-  __extension__ union { long double __d; int __i[4]; } __u = { __d: __x };
-  return __u.__i[0] < 0;
+  return __signbit ((double) __x);
 }
 #  endif
 # endif
@@ -92,22 +99,17 @@ __NTH (lrint (double __x))
 {
   union {
     double __d;
-    int __ll[2];
+    long long __ll;
   } __u;
   __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
-  return __u.__ll[1];
+  return __u.__ll;
 }
 
 __MATH_INLINE long int lrintf (float __x) __THROW;
 __MATH_INLINE long int
 __NTH (lrintf (float __x))
 {
-  union {
-    double __d;
-    int __ll[2];
-  } __u;
-  __asm__ ("fctiw %0,%1" : "=f"(__u.__d) : "f"(__x));
-  return __u.__ll[1];
+  return lrint ((double) __x);
 }
 # endif
 
diff --git a/libc/sysdeps/powerpc/ffs.c b/libc/sysdeps/powerpc/ffs.c
index e0fee46b3..deba0cdd0 100644
--- a/libc/sysdeps/powerpc/ffs.c
+++ b/libc/sysdeps/powerpc/ffs.c
@@ -35,6 +35,7 @@ __ffs (int x)
   return 32 - cnt;
 }
 weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
 libc_hidden_builtin_def (ffs)
 #if ULONG_MAX == UINT_MAX
 #undef ffsl
diff --git a/libc/sysdeps/powerpc/fpu/e_sqrt.c b/libc/sysdeps/powerpc/fpu/e_sqrt.c
index 3efe277f3..2d50fb525 100644
--- a/libc/sysdeps/powerpc/fpu/e_sqrt.c
+++ b/libc/sysdeps/powerpc/fpu/e_sqrt.c
@@ -145,7 +145,7 @@ __slow_ieee754_sqrt (double x)
       feraiseexcept (FE_INVALID_SQRT);
 
       fenv_union_t u = { .fenv = fegetenv_register () };
-      if ((u.l[1] & FE_INVALID) == 0)
+      if ((u.l & FE_INVALID) == 0)
 #endif
 	feraiseexcept (FE_INVALID);
       x = a_nan.value;
diff --git a/libc/sysdeps/powerpc/fpu/e_sqrtf.c b/libc/sysdeps/powerpc/fpu/e_sqrtf.c
index 6e50a3cd7..91d2d37d7 100644
--- a/libc/sysdeps/powerpc/fpu/e_sqrtf.c
+++ b/libc/sysdeps/powerpc/fpu/e_sqrtf.c
@@ -121,7 +121,7 @@ __slow_ieee754_sqrtf (float x)
       feraiseexcept (FE_INVALID_SQRT);
 
       fenv_union_t u = { .fenv = fegetenv_register () };
-      if ((u.l[1] & FE_INVALID) == 0)
+      if ((u.l & FE_INVALID) == 0)
 #endif
 	feraiseexcept (FE_INVALID);
       x = a_nan.value;
diff --git a/libc/sysdeps/powerpc/fpu/fclrexcpt.c b/libc/sysdeps/powerpc/fpu/fclrexcpt.c
index 86575dba6..7f66e21ce 100644
--- a/libc/sysdeps/powerpc/fpu/fclrexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fclrexcpt.c
@@ -28,8 +28,8 @@ __feclearexcept (int excepts)
   u.fenv = fegetenv_register ();
 
   /* Clear the relevant bits.  */
-  u.l[1] = u.l[1] & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
-		      | (excepts & FPSCR_STICKY_BITS));
+  u.l = u.l & ~((-(excepts >> (31 - FPSCR_VX) & 1) & FE_ALL_INVALID)
+		| (excepts & FPSCR_STICKY_BITS));
 
   /* Put the new state in effect.  */
   fesetenv_register (u.fenv);
diff --git a/libc/sysdeps/powerpc/fpu/fedisblxcpt.c b/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
index 659566b67..f2c45a60c 100644
--- a/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fedisblxcpt.c
@@ -32,15 +32,15 @@ fedisableexcept (int excepts)
 
   fe.fenv = fegetenv_register ();
   if (excepts & FE_INEXACT)
-    fe.l[1] &= ~(1 << (31 - FPSCR_XE));
+    fe.l &= ~(1 << (31 - FPSCR_XE));
   if (excepts & FE_DIVBYZERO)
-    fe.l[1] &= ~(1 << (31 - FPSCR_ZE));
+    fe.l &= ~(1 << (31 - FPSCR_ZE));
   if (excepts & FE_UNDERFLOW)
-    fe.l[1] &= ~(1 << (31 - FPSCR_UE));
+    fe.l &= ~(1 << (31 - FPSCR_UE));
   if (excepts & FE_OVERFLOW)
-    fe.l[1] &= ~(1 << (31 - FPSCR_OE));
+    fe.l &= ~(1 << (31 - FPSCR_OE));
   if (excepts & FE_INVALID)
-    fe.l[1] &= ~(1 << (31 - FPSCR_VE));
+    fe.l &= ~(1 << (31 - FPSCR_VE));
   fesetenv_register (fe.fenv);
 
   new = __fegetexcept ();
diff --git a/libc/sysdeps/powerpc/fpu/feenablxcpt.c b/libc/sysdeps/powerpc/fpu/feenablxcpt.c
index fc4bfffad..472796d15 100644
--- a/libc/sysdeps/powerpc/fpu/feenablxcpt.c
+++ b/libc/sysdeps/powerpc/fpu/feenablxcpt.c
@@ -32,15 +32,15 @@ feenableexcept (int excepts)
 
   fe.fenv = fegetenv_register ();
   if (excepts & FE_INEXACT)
-    fe.l[1] |= (1 << (31 - FPSCR_XE));
+    fe.l |= (1 << (31 - FPSCR_XE));
   if (excepts & FE_DIVBYZERO)
-    fe.l[1] |= (1 << (31 - FPSCR_ZE));
+    fe.l |= (1 << (31 - FPSCR_ZE));
   if (excepts & FE_UNDERFLOW)
-    fe.l[1] |= (1 << (31 - FPSCR_UE));
+    fe.l |= (1 << (31 - FPSCR_UE));
   if (excepts & FE_OVERFLOW)
-    fe.l[1] |= (1 << (31 - FPSCR_OE));
+    fe.l |= (1 << (31 - FPSCR_OE));
   if (excepts & FE_INVALID)
-    fe.l[1] |= (1 << (31 - FPSCR_VE));
+    fe.l |= (1 << (31 - FPSCR_VE));
   fesetenv_register (fe.fenv);
 
   new = __fegetexcept ();
diff --git a/libc/sysdeps/powerpc/fpu/fegetexcept.c b/libc/sysdeps/powerpc/fpu/fegetexcept.c
index f3d5724e9..23d47a27e 100644
--- a/libc/sysdeps/powerpc/fpu/fegetexcept.c
+++ b/libc/sysdeps/powerpc/fpu/fegetexcept.c
@@ -27,15 +27,15 @@ __fegetexcept (void)
 
   fe.fenv = fegetenv_register ();
 
-  if (fe.l[1] & (1 << (31 - FPSCR_XE)))
+  if (fe.l & (1 << (31 - FPSCR_XE)))
       result |= FE_INEXACT;
-  if (fe.l[1] & (1 << (31 - FPSCR_ZE)))
+  if (fe.l & (1 << (31 - FPSCR_ZE)))
       result |= FE_DIVBYZERO;
-  if (fe.l[1] & (1 << (31 - FPSCR_UE)))
+  if (fe.l & (1 << (31 - FPSCR_UE)))
       result |= FE_UNDERFLOW;
-  if (fe.l[1] & (1 << (31 - FPSCR_OE)))
+  if (fe.l & (1 << (31 - FPSCR_OE)))
       result |= FE_OVERFLOW;
-  if (fe.l[1] & (1 << (31 - FPSCR_VE)))
+  if (fe.l & (1 << (31 - FPSCR_VE)))
       result |= FE_INVALID;
 
   return result;
diff --git a/libc/sysdeps/powerpc/fpu/feholdexcpt.c b/libc/sysdeps/powerpc/fpu/feholdexcpt.c
index 013d2bfbb..0ecf0f7bc 100644
--- a/libc/sysdeps/powerpc/fpu/feholdexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/feholdexcpt.c
@@ -30,13 +30,12 @@ feholdexcept (fenv_t *envp)
 
   /* Clear everything except for the rounding modes and non-IEEE arithmetic
      flag.  */
-  new.l[1] = old.l[1] & 7;
-  new.l[0] = old.l[0];
+  new.l = old.l & 0xffffffff00000007LL;
 
   /* If the old env had any enabled exceptions, then mask SIGFPE in the
      MSR FE0/FE1 bits.  This may allow the FPU to run faster because it
      always takes the default action and can not generate SIGFPE. */
-  if ((old.l[1] & _FPU_MASK_ALL) != 0)
+  if ((old.l & _FPU_MASK_ALL) != 0)
     (void)__fe_mask_env ();
 
   /* Put the new state in effect.  */
diff --git a/libc/sysdeps/powerpc/fpu/fenv_libc.h b/libc/sysdeps/powerpc/fpu/fenv_libc.h
index 191095156..baa2a7d39 100644
--- a/libc/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/libc/sysdeps/powerpc/fpu/fenv_libc.h
@@ -69,7 +69,7 @@ libm_hidden_proto (__fe_nomask_env)
 typedef union
 {
   fenv_t fenv;
-  unsigned int l[2];
+  unsigned long long l;
 } fenv_union_t;
 
 
diff --git a/libc/sysdeps/powerpc/fpu/fesetenv.c b/libc/sysdeps/powerpc/fpu/fesetenv.c
index e92adb4c5..6c00b267a 100644
--- a/libc/sysdeps/powerpc/fpu/fesetenv.c
+++ b/libc/sysdeps/powerpc/fpu/fesetenv.c
@@ -34,14 +34,14 @@ __fesetenv (const fenv_t *envp)
      exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
      hardware into "precise mode" and may cause the FPU to run slower on some
      hardware.  */
-  if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
     (void)__fe_nomask_env ();
 
   /* If the old env had any enabled exceptions and the new env has no enabled
      exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
      FPU to run faster because it always takes the default action and can not
      generate SIGFPE. */
-  if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
     (void)__fe_mask_env ();
 
   fesetenv_register (*envp);
diff --git a/libc/sysdeps/powerpc/fpu/feupdateenv.c b/libc/sysdeps/powerpc/fpu/feupdateenv.c
index 6500ea173..677504416 100644
--- a/libc/sysdeps/powerpc/fpu/feupdateenv.c
+++ b/libc/sysdeps/powerpc/fpu/feupdateenv.c
@@ -34,20 +34,20 @@ __feupdateenv (const fenv_t *envp)
   /* Restore rounding mode and exception enable from *envp and merge
      exceptions.  Leave fraction rounded/inexact and FP result/CC bits
      unchanged.  */
-  new.l[1] = (old.l[1] & 0x1FFFFF00) | (new.l[1] & 0x1FF80FFF);
+  new.l = (old.l & 0xffffffff1fffff00LL) | (new.l & 0x1ff80fff);
 
   /* If the old env has no enabled exceptions and the new env has any enabled
      exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put
      the hardware into "precise mode" and may cause the FPU to run slower on
      some hardware.  */
-  if ((old.l[1] & _FPU_MASK_ALL) == 0 && (new.l[1] & _FPU_MASK_ALL) != 0)
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
     (void)__fe_nomask_env ();
 
   /* If the old env had any enabled exceptions and the new env has no enabled
      exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
      FPU to run faster because it always takes the default action and can not
      generate SIGFPE. */
-  if ((old.l[1] & _FPU_MASK_ALL) != 0 && (new.l[1] & _FPU_MASK_ALL) == 0)
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
     (void)__fe_mask_env ();
 
   /* Atomically enable and raise (if appropriate) exceptions set in `new'. */
diff --git a/libc/sysdeps/powerpc/fpu/fgetexcptflg.c b/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
index f6327ce17..1395bede0 100644
--- a/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
+++ b/libc/sysdeps/powerpc/fpu/fgetexcptflg.c
@@ -27,7 +27,7 @@ __fegetexceptflag (fexcept_t *flagp, int excepts)
   u.fenv = fegetenv_register ();
 
   /* Return (all of) it.  */
-  *flagp = u.l[1] & excepts & FE_ALL_EXCEPT;
+  *flagp = u.l & excepts & FE_ALL_EXCEPT;
 
   /* Success.  */
   return 0;
diff --git a/libc/sysdeps/powerpc/fpu/fraiseexcpt.c b/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
index 9118c1954..6193071bd 100644
--- a/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
+++ b/libc/sysdeps/powerpc/fpu/fraiseexcpt.c
@@ -33,11 +33,11 @@ __feraiseexcept (int excepts)
   u.fenv = fegetenv_register ();
 
   /* Add the exceptions */
-  u.l[1] = (u.l[1]
-	    | (excepts & FPSCR_STICKY_BITS)
-	    /* Turn FE_INVALID into FE_INVALID_SOFTWARE.  */
-	    | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
-	       & FE_INVALID_SOFTWARE));
+  u.l = (u.l
+	 | (excepts & FPSCR_STICKY_BITS)
+	 /* Turn FE_INVALID into FE_INVALID_SOFTWARE.  */
+	 | (excepts >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+	    & FE_INVALID_SOFTWARE));
 
   /* Store the new status word (along with the rest of the environment),
      triggering any appropriate exceptions.  */
@@ -49,7 +49,7 @@ __feraiseexcept (int excepts)
 	 don't have FE_INVALID_SOFTWARE implemented.  Detect this
 	 case and raise FE_INVALID_SNAN instead.  */
       u.fenv = fegetenv_register ();
-      if ((u.l[1] & FE_INVALID) == 0)
+      if ((u.l & FE_INVALID) == 0)
 	set_fpscr_bit (FPSCR_VXSNAN);
     }
 
diff --git a/libc/sysdeps/powerpc/fpu/fsetexcptflg.c b/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
index c050d4022..0d309c8d5 100644
--- a/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
+++ b/libc/sysdeps/powerpc/fpu/fsetexcptflg.c
@@ -31,10 +31,10 @@ __fesetexceptflag (const fexcept_t *flagp, int excepts)
   flag = *flagp & excepts;
 
   /* Replace the exception status */
-  u.l[1] = ((u.l[1] & ~(FPSCR_STICKY_BITS & excepts))
-	    | (flag & FPSCR_STICKY_BITS)
-	    | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
-	       & FE_INVALID_SOFTWARE));
+  u.l = ((u.l & ~(FPSCR_STICKY_BITS & excepts))
+	 | (flag & FPSCR_STICKY_BITS)
+	 | (flag >> ((31 - FPSCR_VX) - (31 - FPSCR_VXSOFT))
+	    & FE_INVALID_SOFTWARE));
 
   /* Store the new status word (along with the rest of the environment).
      This may cause floating-point exceptions if the restored state
diff --git a/libc/sysdeps/powerpc/fpu/ftestexcept.c b/libc/sysdeps/powerpc/fpu/ftestexcept.c
index 0dbc3befb..86eea0fb0 100644
--- a/libc/sysdeps/powerpc/fpu/ftestexcept.c
+++ b/libc/sysdeps/powerpc/fpu/ftestexcept.c
@@ -28,6 +28,6 @@ fetestexcept (int excepts)
 
   /* The FE_INVALID bit is dealt with correctly by the hardware, so we can
      just:  */
-  return u.l[1] & excepts;
+  return u.l & excepts;
 }
 libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/fpu/libm-test-ulps b/libc/sysdeps/powerpc/fpu/libm-test-ulps
index 6fdace9ee..37b2ca192 100644
--- a/libc/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/libc/sysdeps/powerpc/fpu/libm-test-ulps
@@ -5927,11 +5927,31 @@ double: 1
 idouble: 1
 
 # gamma
+Test "gamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-5)":
+double: 1
+idouble: 1
 Test "gamma (0.7)":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "gamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "gamma (0x1p-30)":
+double: 1
+idouble: 1
 Test "gamma (1.2)":
 double: 1
 float: 2
@@ -6131,9 +6151,9 @@ ildouble: 1
 ldouble: 1
 Test "jn (10, 10.0)":
 double: 2
-float: 1
+float: 2
 idouble: 2
-ifloat: 1
+ifloat: 2
 ildouble: 4
 ldouble: 4
 Test "jn (10, 2.0)":
@@ -6146,6 +6166,14 @@ double: 2
 float: 2
 idouble: 2
 ifloat: 2
+Test "jn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p127)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
 Test "jn (2, 2.4048255576957729)":
 double: 2
 float: 1
@@ -6226,11 +6254,31 @@ ildouble: 7
 ldouble: 7
 
 # lgamma
+Test "lgamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-5)":
+double: 1
+idouble: 1
 Test "lgamma (0.7)":
 double: 1
 float: 1
 idouble: 1
 ifloat: 1
+Test "lgamma (0x1p-10)":
+float: 1
+ifloat: 1
+Test "lgamma (0x1p-30)":
+double: 1
+idouble: 1
 Test "lgamma (1.2)":
 double: 1
 float: 2
@@ -6592,6 +6640,9 @@ float: 1
 ifloat: 1
 ildouble: 2
 ldouble: 2
+Test "tan_towardzero (2)":
+ildouble: 1
+ldouble: 1
 Test "tan_towardzero (3)":
 float: 1
 ifloat: 1
@@ -7334,6 +7385,19 @@ idouble: 3
 ifloat: 1
 ildouble: 1
 ldouble: 1
+Test "yn (2, 0x1.ffff62p+99)":
+double: 1
+idouble: 1
+Test "yn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p127)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
 Test "yn (3, 0.125)":
 double: 1
 idouble: 1
diff --git a/libc/sysdeps/powerpc/fpu/s_float_bitwise.h b/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
index 8e4adca86..c0a4e56be 100644
--- a/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
+++ b/libc/sysdeps/powerpc/fpu/s_float_bitwise.h
@@ -23,18 +23,19 @@
 #include <math_private.h>
 
 /* Returns (int)(num & 0x7FFFFFF0 == value) */
-static inline
-int __float_and_test28 (float num, float value)
+static inline int
+__float_and_test28 (float num, float value)
 {
   float ret;
 #ifdef _ARCH_PWR7
-  vector int mask = (vector int) {
-    0x7ffffffe, 0x00000000, 0x00000000, 0x0000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7ffffff0 };
   __asm__ (
-  /* the 'f' constrain is use on mask because we just need
+  /* the 'f' constraint is used on mask because we just need
    * to compare floats, not full vector */
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
 #else
   int32_t inum;
@@ -46,16 +47,17 @@ int __float_and_test28 (float num, float value)
 }
 
 /* Returns (int)(num & 0x7FFFFF00 == value) */
-static inline
-int __float_and_test24 (float num, float value)
+static inline int
+__float_and_test24 (float num, float value)
 {
   float ret;
 #ifdef _ARCH_PWR7
-  vector int mask = (vector int) {
-    0x7fffffe0, 0x00000000, 0x00000000, 0x0000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7fffff00 };
   __asm__ (
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
 #else
   int32_t inum;
@@ -67,16 +69,17 @@ int __float_and_test24 (float num, float value)
 }
 
 /* Returns (float)(num & 0x7F800000) */
-static inline
-float __float_and8 (float num)
+static inline float
+__float_and8 (float num)
 {
   float ret;
 #ifdef _ARCH_PWR7
-  vector int mask = (vector int) {
-    0x7ff00000, 0x00000000, 0x00000000, 0x00000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7f800000 };
   __asm__ (
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
 #else
   int32_t inum;
@@ -88,17 +91,18 @@ float __float_and8 (float num)
 }
 
 /* Returns ((int32_t)(num & 0x7F800000) >> 23) */
-static inline
-int32_t __float_get_exp (float num)
+static inline int32_t
+__float_get_exp (float num)
 {
   int32_t inum;
 #ifdef _ARCH_PWR7
   float ret;
-  vector int mask = (vector int) {
-    0x7ff00000, 0x00000000, 0x00000000, 0x00000000
-  };
+  union {
+    int i;
+    float f;
+  } mask = { .i = 0x7f800000 };
   __asm__ (
-    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask)
+    "xxland %x0,%x1,%x2" : "=f" (ret) : "f" (num), "f" (mask.f)
   );
   GET_FLOAT_WORD(inum, ret);
 #else
diff --git a/libc/sysdeps/powerpc/fpu/s_llround.c b/libc/sysdeps/powerpc/fpu/s_llround.c
index 9a0182653..995d0a724 100644
--- a/libc/sysdeps/powerpc/fpu/s_llround.c
+++ b/libc/sysdeps/powerpc/fpu/s_llround.c
@@ -19,29 +19,28 @@
 #include <math.h>
 #include <math_ldbl_opt.h>
 
-/* I think that what this routine is supposed to do is round a value
-   to the nearest integer, with values exactly on the boundary rounded
-   away from zero.  */
-/* This routine relies on (long long)x, when x is out of range of a long long,
-   clipping to MAX_LLONG or MIN_LLONG.  */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+   rounded away from zero, regardless of the current rounding mode.
+   If (long long)x, when x is out of range of a long long, clips at
+   LLONG_MAX or LLONG_MIN, then this implementation also clips.  */
 
 long long int
 __llround (double x)
 {
-  double xrf;
-  long long int xr;
-  xr = (long long int) x;
-  xrf = (double) xr;
+  long long xr = (long long) x;
+  double xrf = (double) xr;
+
   if (x >= 0.0)
-    if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
-      return x+1;
-    else
-      return x;
+    {
+      if (x - xrf >= 0.5)
+	xr += (long long) ((unsigned long long) xr + 1) > 0;
+    }
   else
-    if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
-      return x-1;
-    else
-      return x;
+    {
+      if (xrf - x >= 0.5)
+	xr -= (long long) ((unsigned long long) xr - 1) < 0;
+    }
+  return xr;
 }
 weak_alias (__llround, llround)
 #ifdef NO_LONG_DOUBLE
diff --git a/libc/sysdeps/powerpc/fpu/s_llroundf.c b/libc/sysdeps/powerpc/fpu/s_llroundf.c
index 07d12adbf..0935de662 100644
--- a/libc/sysdeps/powerpc/fpu/s_llroundf.c
+++ b/libc/sysdeps/powerpc/fpu/s_llroundf.c
@@ -18,28 +18,27 @@
 
 #include <math.h>
 
-/* I think that what this routine is supposed to do is round a value
-   to the nearest integer, with values exactly on the boundary rounded
-   away from zero.  */
-/* This routine relies on (long long)x, when x is out of range of a long long,
-   clipping to MAX_LLONG or MIN_LLONG.  */
+/* Round to the nearest integer, with values exactly on a 0.5 boundary
+   rounded away from zero, regardless of the current rounding mode.
+   If (long long)x, when x is out of range of a long long, clips at
+   LLONG_MAX or LLONG_MIN, then this implementation also clips.  */
 
 long long int
 __llroundf (float x)
 {
-  float xrf;
-  long long int xr;
-  xr = (long long int) x;
-  xrf = (float) xr;
+  long long xr = (long long) x;
+  float xrf = (float) xr;
+
   if (x >= 0.0)
-    if (x - xrf >= 0.5 && x - xrf < 1.0 && x+1 > 0)
-      return x+1;
-    else
-      return x;
+    {
+      if (x - xrf >= 0.5)
+	xr += (long long) ((unsigned long long) xr + 1) > 0;
+    }
   else
-    if (xrf - x >= 0.5 && xrf - x < 1.0 && x-1 < 0)
-      return x-1;
-    else
-      return x;
+    {
+      if (xrf - x >= 0.5)
+	xr -= (long long) ((unsigned long long) xr - 1) < 0;
+    }
+  return xr;
 }
 weak_alias (__llroundf, llroundf)
diff --git a/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c b/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
index feffa6b4f..cc9b320bf 100644
--- a/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
+++ b/libc/sysdeps/powerpc/fpu/tst-setcontext-fpscr.c
@@ -83,7 +83,7 @@ ElfW(Addr) query_auxv(int type)
   return 0;
 }
 
-typedef unsigned long long di_fpscr_t __attribute__ ((__mode__ (__DI__)));
+typedef unsigned int di_fpscr_t __attribute__ ((__mode__ (__DI__)));
 typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__)));
 
 #define _FPSCR_RESERVED 0xfffffff8ffffff04ULL
@@ -95,50 +95,51 @@ typedef unsigned int si_fpscr_t __attribute__ ((__mode__ (__SI__)));
 #define _FPSCR_TEST1_RN  0x0000000000000002ULL
 
 /* Macros for accessing the hardware control word on Power6[x].  */
-# define _GET_DI_FPSCR(__fpscr) ({					     \
-   union { double d;							     \
-           di_fpscr_t fpscr; }						     \
-     tmp __attribute__ ((__aligned__(8)));				     \
-   __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0");		     \
-   (__fpscr)=tmp.fpscr;							     \
-   tmp.fpscr; })
-
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+#define _GET_DI_FPSCR(__fpscr)						\
+  ({union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    __asm__ ("mffs %0" : "=f" (fr));					\
+    u.d = fr;								\
+    (__fpscr) = u.fpscr;						\
+    u.fpscr;								\
+  })
+
+/* We make sure to zero fp after we use it in order to prevent stale data
    in an fp register from making a test-case pass erroneously.  */
-# define _SET_DI_FPSCR(__fpscr) {					     \
-  union { double d; di_fpscr_t fpscr; }					     \
-    tmp __attribute__ ((__aligned__(8)));				     \
-  tmp.fpscr = __fpscr;							     \
-  /* Set the entire 64-bit FPSCR.  */					     \
-  __asm__ ("lfd%U0 0,%0; "						     \
-	   ".machine push; "						     \
-	   ".machine \"power6\"; "					     \
-	   "mtfsf 255,0,1,0; "						     \
-	   ".machine pop" : : "m" (tmp.d) : "fr0");			     \
-  tmp.d = 0;								     \
-  __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0");			     \
-}
-
-# define _GET_SI_FPSCR(__fpscr) ({					     \
-   union { double d;							     \
-           si_fpscr_t cw[2]; }						     \
-     tmp __attribute__ ((__aligned__(8)));				     \
-   __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0");		     \
-   (__fpscr)=tmp.cw[1];							     \
-   tmp.cw[0]; })
-
-/* We make sure to zero fp0 after we use it in order to prevent stale data
+# define _SET_DI_FPSCR(__fpscr)						\
+  { union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    u.fpscr = __fpscr;							\
+    fr = u.d;								\
+    /* Set the entire 64-bit FPSCR.  */					\
+    __asm__ (".machine push; "						\
+	     ".machine \"power6\"; "					\
+	     "mtfsf 255,%0,1,0; "					\
+	     ".machine pop" : : "f" (fr));				\
+    fr = 0.0;								\
+  }
+
+# define _GET_SI_FPSCR(__fpscr)						\
+  ({union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    __asm__ ("mffs %0" : "=f" (fr));					\
+    u.d = fr;								\
+    (__fpscr) = (si_fpscr_t) u.fpscr;					\
+    (si_fpscr_t) u.fpscr;						\
+  })
+
+/* We make sure to zero fp after we use it in order to prevent stale data
    in an fp register from making a test-case pass erroneously.  */
-# define _SET_SI_FPSCR(__fpscr) {					     \
-  union { double d; si_fpscr_t fpscr[2]; }				     \
-    tmp __attribute__ ((__aligned__(8)));				     \
-  /* More-or-less arbitrary; this is a QNaN. */				     \
-  tmp.fpscr[0] = 0xFFF80000;						     \
-  tmp.fpscr[1] = __fpscr;						     \
-  __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0");		     \
-  tmp.d = 0;								     \
-  __asm__("lfd%U0 0,%0" : : "m" (tmp.d) : "fr0");			     \
-}
+# define _SET_SI_FPSCR(__fpscr)						\
+  { union { double d; di_fpscr_t fpscr; } u;				\
+    register double fr;							\
+    /* More-or-less arbitrary; this is a QNaN. */			\
+    u.fpscr = 0xfff80000ULL << 32;					\
+    u.fpscr |= __fpscr & 0xffffffffULL;					\
+    fr = u.d;								\
+    __asm__ ("mtfsf 255,%0" : : "f" (fr));				\
+    fr = 0.0;								\
+  }
 
 void prime_special_regs(int which)
 {
diff --git a/libc/sysdeps/powerpc/fpu_control.h b/libc/sysdeps/powerpc/fpu_control.h
index d03b8eb80..e82e7913c 100644
--- a/libc/sysdeps/powerpc/fpu_control.h
+++ b/libc/sysdeps/powerpc/fpu_control.h
@@ -28,7 +28,7 @@ typedef unsigned int fpu_control_t;
 # define _FPU_SETCW(cw) (void) (cw)
 extern fpu_control_t __fpu_control;
 
-#elif defined __NO_FPRS__ /* E500 */
+#elif defined __NO_FPRS__ /* e500 */
 
 /* rounding control */
 # define _FPU_RC_NEAREST 0x00   /* RECOMMENDED */
@@ -37,33 +37,28 @@ extern fpu_control_t __fpu_control;
 # define _FPU_RC_ZERO    0x01
 
 /* masking of interrupts */
-#define _FPU_MASK_ZM  0x10 /* zero divide */
-#define _FPU_MASK_OM  0x40 /* overflow */
-#define _FPU_MASK_UM  0x80 /* underflow */
-#define _FPU_MASK_XM  0x40 /* inexact */
-#define _FPU_MASK_IM  0x20 /* invalid operation */
+# define _FPU_MASK_ZM  0x10 /* zero divide */
+# define _FPU_MASK_OM  0x04 /* overflow */
+# define _FPU_MASK_UM  0x08 /* underflow */
+# define _FPU_MASK_XM  0x40 /* inexact */
+# define _FPU_MASK_IM  0x20 /* invalid operation */
 
-#define _FPU_RESERVED 0xff3fff7f /* These bits are reserved are not changed. */
+# define _FPU_RESERVED 0x00c10080 /* These bits are reserved and not changed. */
 
-/* The fdlibm code requires no interrupts for exceptions.  */
-#define _FPU_DEFAULT  0x00000000 /* Default value.  */
-
-/* IEEE:  same as above, but (some) exceptions;
-   we leave the 'inexact' exception off.
- */
-#define _FPU_IEEE     0x000003c0
+/* Correct IEEE semantics require traps to be enabled at the hardware
+   level; the kernel then does the emulation and determines whether
+   generation of signals from those traps was enabled using prctl.  */
+# define _FPU_DEFAULT  0x0000003c /* Default value.  */
+# define _FPU_IEEE     _FPU_DEFAULT
 
 /* Type of the control word.  */
 typedef unsigned int fpu_control_t;
 
 /* Macros for accessing the hardware control word.  */
-#define _FPU_GETCW(__cw) ({ \
-  unsigned int env; \
-  asm volatile ("mfspefscr %0" : "=r" (env)); \
-  (__cw) = env; })
-#define _FPU_SETCW(__cw) ({ \
-  unsigned int env = __cw; \
-  asm volatile ("mtspefscr %0" : : "r" (env)); })
+# define _FPU_GETCW(cw) \
+  __asm__ volatile ("mfspefscr %0" : "=r" (cw))
+# define _FPU_SETCW(cw) \
+  __asm__ volatile ("mtspefscr %0" : : "r" (cw))
 
 /* Default control word set at startup.  */
 extern fpu_control_t __fpu_control;
@@ -96,22 +91,26 @@ extern fpu_control_t __fpu_control;
 # define _FPU_IEEE     0x000000f0
 
 /* Type of the control word.  */
-typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__)));
+typedef unsigned int fpu_control_t;
 
 /* Macros for accessing the hardware control word.  */
-# define _FPU_GETCW(__cw) ( { \
-  union { double d; fpu_control_t cw[2]; } \
-    tmp __attribute__ ((__aligned__(8))); \
-  __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \
-  (__cw)=tmp.cw[1]; \
-  tmp.cw[1]; } )
-# define _FPU_SETCW(__cw) { \
-  union { double d; fpu_control_t cw[2]; } \
-    tmp __attribute__ ((__aligned__(8))); \
-  tmp.cw[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */ \
-  tmp.cw[1] = __cw; \
-  __asm__ ("lfd%U0 0,%0; mtfsf 255,0" : : "m" (tmp.d) : "fr0"); \
-}
+# define _FPU_GETCW(cw)						\
+  ({union { double __d; unsigned long long __ll; } __u;		\
+    register double __fr;					\
+    __asm__ ("mffs %0" : "=f" (__fr));				\
+    __u.__d = __fr;						\
+    (cw) = (fpu_control_t) __u.__ll;				\
+    (fpu_control_t) __u.__ll;					\
+  })
+
+# define _FPU_SETCW(cw)						\
+  { union { double __d; unsigned long long __ll; } __u;		\
+    register double __fr;					\
+    __u.__ll = 0xfff80000LL << 32; /* This is a QNaN.  */	\
+    __u.__ll |= (cw) & 0xffffffffLL;				\
+    __fr = __u.__d;						\
+    __asm__ ("mtfsf 255,%0" : : "f" (__fr));			\
+  }
 
 /* Default control word set at startup.  */
 extern fpu_control_t __fpu_control;
diff --git a/libc/sysdeps/powerpc/jmpbuf-offsets.h b/libc/sysdeps/powerpc/jmpbuf-offsets.h
index 64c658a58..f2116bd70 100644
--- a/libc/sysdeps/powerpc/jmpbuf-offsets.h
+++ b/libc/sysdeps/powerpc/jmpbuf-offsets.h
@@ -21,12 +21,10 @@
 #define JB_LR     2  /* The address we will return to */
 #if __WORDSIZE == 64
 # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18*2 words total.  */
-# define JB_CR     21 /* Condition code registers with the VRSAVE at */
-                       /* offset 172 (low half of the double word.  */
+# define JB_CR     21 /* Shared dword with VRSAVE.  CR word at offset 172.  */
 # define JB_FPRS   22 /* FPRs 14 through 31 are saved, 18*2 words total.  */
 # define JB_SIZE   (64 * 8) /* As per PPC64-VMX ABI.  */
-# define JB_VRSAVE 21 /* VRSAVE shares a double word with the CR at offset */
-                       /* 168 (high half of the double word).  */
+# define JB_VRSAVE 21 /* Shared dword with CR.  VRSAVE word at offset 168.  */
 # define JB_VRS    40 /* VRs 20 through 31 are saved, 12*4 words total.  */
 #else
 # define JB_GPRS   3  /* GPRs 14 through 31 are saved, 18 in total.  */
diff --git a/libc/sysdeps/powerpc/longjmp.c b/libc/sysdeps/powerpc/longjmp.c
index 198c89420..189fc03ab 100644
--- a/libc/sysdeps/powerpc/longjmp.c
+++ b/libc/sysdeps/powerpc/longjmp.c
@@ -55,6 +55,6 @@ weak_alias (__vmx__libc_siglongjmp, __vmxsiglongjmp)
 
 default_symbol_version (__vmx__libc_longjmp, __libc_longjmp, GLIBC_PRIVATE);
 default_symbol_version (__vmx__libc_siglongjmp, __libc_siglongjmp, GLIBC_PRIVATE);
-default_symbol_version (__vmx_longjmp, _longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxlongjmp, longjmp, GLIBC_2.3.4);
-default_symbol_version (__vmxsiglongjmp, siglongjmp, GLIBC_2.3.4);
+versioned_symbol (libc, __vmx_longjmp, _longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxlongjmp, longjmp, GLIBC_2_3_4);
+versioned_symbol (libc, __vmxsiglongjmp, siglongjmp, GLIBC_2_3_4);
diff --git a/libc/sysdeps/powerpc/nofpu/Makefile b/libc/sysdeps/powerpc/nofpu/Makefile
new file mode 100644
index 000000000..b9cbf8023
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Makefile
@@ -0,0 +1,30 @@
+# Makefile fragment for PowerPC with no FPU.
+
+ifeq ($(subdir),soft-fp)
+sysdep_routines += $(gcc-single-routines) $(gcc-double-routines) \
+		   sim-full
+endif
+
+ifeq ($(subdir),math)
+libm-support += fenv_const
+CPPFLAGS += -I../soft-fp/
+# The follow CFLAGS are a work around for GCC Bugzilla Bug 29253
+# "expand_abs wrong default code for floating point"
+# As this is not a regression, a fix is not likely to go into
+# gcc-4.1.1 and may be too late for gcc-4.2.  So we need these flags
+# until the fix in a gcc release and glibc drops support for earlier
+# versions of gcc.
+CFLAGS-e_hypotl.c += -fno-builtin-fabsl
+CFLAGS-e_powl.c += -fno-builtin-fabsl
+CFLAGS-s_ccoshl.c += -fno-builtin-fabsl
+CFLAGS-s_csinhl.c += -fno-builtin-fabsl
+CFLAGS-s_clogl.c += -fno-builtin-fabsl
+CFLAGS-s_clog10l.c += -fno-builtin-fabsl
+CFLAGS-s_csinl.c += -fno-builtin-fabsl
+CFLAGS-s_csqrtl.c += -fno-builtin-fabsl
+CFLAGS-w_acosl.c += -fno-builtin-fabsl
+CFLAGS-w_asinl.c += -fno-builtin-fabsl
+CFLAGS-w_atanhl.c += -fno-builtin-fabsl
+CFLAGS-w_j0l.c += -fno-builtin-fabsl
+CFLAGS-w_j1l.c += -fno-builtin-fabsl
+endif
diff --git a/libc/sysdeps/powerpc/nofpu/Subdirs b/libc/sysdeps/powerpc/nofpu/Subdirs
new file mode 100644
index 000000000..87eadf302
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Subdirs
@@ -0,0 +1 @@
+soft-fp
diff --git a/libc/sysdeps/powerpc/nofpu/Versions b/libc/sysdeps/powerpc/nofpu/Versions
new file mode 100644
index 000000000..1a29319d5
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/Versions
@@ -0,0 +1,20 @@
+libc {
+  GLIBC_2.3.2 {
+    __sim_exceptions; __sim_disabled_exceptions; __sim_round_mode;
+    __adddf3; __addsf3; __divdf3; __divsf3; __eqdf2; __eqsf2;
+    __extendsfdf2; __fixdfdi; __fixdfsi; __fixsfdi; __fixsfsi;
+    __fixunsdfdi; __fixunsdfsi; __fixunssfdi; __fixunssfsi;
+    __floatdidf; __floatdisf; __floatsidf; __floatsisf;
+    __gedf2; __gesf2; __ledf2; __lesf2; __muldf3; __mulsf3;
+    __negdf2; __negsf2; __sqrtdf2; __sqrtsf2; __subdf3;
+    __subsf3; __truncdfsf2;
+  }
+  GLIBC_2.4 {
+    __floatundidf; __floatundisf;
+    __floatunsidf; __floatunsisf;
+    __unorddf2; __unordsf2;
+    __nedf2; __nesf2;
+    __gtdf2; __gtsf2;
+    __ltdf2; __ltsf2;
+  }
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fclrexcpt.c b/libc/sysdeps/powerpc/nofpu/fclrexcpt.c
new file mode 100644
index 000000000..fabda0ab9
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fclrexcpt.c
@@ -0,0 +1,37 @@
+/* Clear floating-point exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__feclearexcept (int x)
+{
+  __sim_exceptions &= ~x;
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feclearexcept, __old_feclearexcept)
+compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feclearexcept, feclearexcept)
+versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c b/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c
new file mode 100644
index 000000000..e06c8f767
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fedisblxcpt.c
@@ -0,0 +1,32 @@
+/* Disable exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <fenv.h>
+
+int
+fedisableexcept (int x)
+{
+  int old_exceptions = ~__sim_disabled_exceptions & FE_ALL_EXCEPT;
+
+  __sim_disabled_exceptions |= x;
+
+  return old_exceptions;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/feenablxcpt.c b/libc/sysdeps/powerpc/nofpu/feenablxcpt.c
new file mode 100644
index 000000000..93249abf6
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feenablxcpt.c
@@ -0,0 +1,32 @@
+/* Enable exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+
+extern int __sim_disabled_exceptions;
+
+int
+feenableexcept (int exceptions)
+{
+  int old_exceptions = ~__sim_disabled_exceptions & FE_ALL_EXCEPT;
+
+  __sim_disabled_exceptions &= ~exceptions;
+
+  return old_exceptions;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fegetenv.c b/libc/sysdeps/powerpc/nofpu/fegetenv.c
new file mode 100644
index 000000000..51bcef30a
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetenv.c
@@ -0,0 +1,48 @@
+/* Store current floating-point environment (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002, 2010.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+extern int __sim_exceptions;
+extern int __sim_disabled_exceptions;
+extern int __sim_round_mode;
+
+int
+__fegetenv (fenv_t *envp)
+{
+  fenv_union_t u;
+
+  u.l[0] = __sim_exceptions;
+  u.l[0] |= __sim_round_mode;
+  u.l[1] = __sim_disabled_exceptions;
+
+  *envp = u.fenv;
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetenv, __old_fegetenv)
+compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fegetenv, fegetenv)
+versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fegetexcept.c b/libc/sysdeps/powerpc/nofpu/fegetexcept.c
new file mode 100644
index 000000000..ea39a82b7
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetexcept.c
@@ -0,0 +1,27 @@
+/* Get floating-point exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fegetexcept (void)
+{
+  return (__sim_disabled_exceptions ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/fegetround.c b/libc/sysdeps/powerpc/nofpu/fegetround.c
new file mode 100644
index 000000000..c232ae379
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fegetround.c
@@ -0,0 +1,28 @@
+/* Return current rounding mode (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+#undef fegetround
+int
+fegetround (void)
+{
+  return __sim_round_mode;
+}
diff --git a/libc/sysdeps/powerpc/nofpu/feholdexcpt.c b/libc/sysdeps/powerpc/nofpu/feholdexcpt.c
new file mode 100644
index 000000000..ba6a53acc
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feholdexcpt.c
@@ -0,0 +1,43 @@
+/* Store current floating-point environment and clear exceptions
+   (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+feholdexcept (fenv_t *envp)
+{
+  fenv_union_t u;
+
+  /* Get the current state.  */
+  __fegetenv (envp);
+
+  u.fenv = *envp;
+  /* Clear everything except the rounding mode.  */
+  u.l[0] &= 0x3;
+  /* Disable exceptions */
+  u.l[1] = FE_ALL_EXCEPT;
+
+  /* Put the new state in effect.  */
+  fesetenv (&u.fenv);
+
+  return 0;
+}
+libm_hidden_def (feholdexcept)
diff --git a/libc/sysdeps/powerpc/nofpu/fenv_const.c b/libc/sysdeps/powerpc/nofpu/fenv_const.c
new file mode 100644
index 000000000..291b1accc
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fenv_const.c
@@ -0,0 +1,34 @@
+/* Constants for fenv_bits.h (soft float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* We want to specify the bit pattern of the __fe_*_env constants, so
+   pretend they're really `long long' instead of `double'.  */
+
+/* If the default argument is used we use this value.  Disable all
+   signalling exceptions as default.  */
+const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) =
+0x000000003e000000ULL;
+
+/* Floating-point environment where none of the exceptions are masked.  */
+const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) =
+0xfff80000000000f8ULL;
+
+/* Floating-point environment with the NI bit set.  */
+const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) =
+0xfff8000000000004ULL;
diff --git a/libc/sysdeps/powerpc/nofpu/fenv_libc.h b/libc/sysdeps/powerpc/nofpu/fenv_libc.h
new file mode 100644
index 000000000..14a2d04a2
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fenv_libc.h
@@ -0,0 +1,28 @@
+/* Internal libc stuff for floating point environment routines.
+   Copyright (C) 2007-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _FENV_LIBC_H
+#define _FENV_LIBC_H	1
+
+/* fenv_libc.h is used in libm implementations of ldbl-128ibm.  So we
+   need this version in the soft-fp to at minimum include fenv.h to
+   get the fegetround definition.  */
+
+#include <fenv.h>
+
+#endif /* fenv_libc.h */
diff --git a/libc/sysdeps/powerpc/nofpu/fesetenv.c b/libc/sysdeps/powerpc/nofpu/fesetenv.c
new file mode 100644
index 000000000..3f35909b6
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fesetenv.c
@@ -0,0 +1,42 @@
+/* Set floating point environment (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fesetenv (const fenv_t *envp)
+{
+  fenv_union_t u;
+
+  u.fenv = *envp;
+  __sim_exceptions = u.l[0] & FE_ALL_EXCEPT;
+  __sim_round_mode = u.l[0] & 0x3;
+  __sim_disabled_exceptions = u.l[1];
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetenv, __old_fesetenv)
+compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fesetenv, fesetenv)
+versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fesetround.c b/libc/sysdeps/powerpc/nofpu/fesetround.c
new file mode 100644
index 000000000..028c1300c
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fesetround.c
@@ -0,0 +1,33 @@
+/* Set rounding mode (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fesetround (int round)
+{
+  if ((unsigned int) round > FE_DOWNWARD)
+    return 1;
+
+  __sim_round_mode = round;
+
+  return 0;
+}
+libm_hidden_def (fesetround)
diff --git a/libc/sysdeps/powerpc/nofpu/feupdateenv.c b/libc/sysdeps/powerpc/nofpu/feupdateenv.c
new file mode 100644
index 000000000..163f67310
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/feupdateenv.c
@@ -0,0 +1,51 @@
+/* Install given floating-point environment and raise exceptions
+   (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <signal.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+  int saved_exceptions;
+
+  /* Save currently set exceptions.  */
+  saved_exceptions = __sim_exceptions;
+
+  /* Set environment.  */
+  fesetenv (envp);
+
+  /* Raise old exceptions.  */
+  __sim_exceptions |= saved_exceptions;
+  if (saved_exceptions & ~__sim_disabled_exceptions)
+    raise (SIGFPE);
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feupdateenv, __old_feupdateenv)
+compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feupdateenv, feupdateenv)
+versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c b/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c
new file mode 100644
index 000000000..2373fa400
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fgetexcptflg.c
@@ -0,0 +1,37 @@
+/* Store current representation for exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+  *flagp = (fexcept_t) __sim_exceptions  & excepts & FE_ALL_EXCEPT;
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetexceptflag, __old_fegetexceptflag)
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c b/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c
new file mode 100644
index 000000000..cd142b60b
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fraiseexcpt.c
@@ -0,0 +1,41 @@
+/* Raise given exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+#include <signal.h>
+
+#undef feraiseexcept
+int
+__feraiseexcept (int x)
+{
+  __sim_exceptions |= x;
+  if (x & ~__sim_disabled_exceptions)
+    raise (SIGFPE);
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feraiseexcept, __old_feraiseexcept)
+compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feraiseexcept, feraiseexcept)
+versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c b/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c
new file mode 100644
index 000000000..3dc368fdd
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/fsetexcptflg.c
@@ -0,0 +1,38 @@
+/* Set floating-point environment exception handling (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+__fesetexceptflag(const fexcept_t *flagp, int excepts)
+{
+  /* Ignore exceptions not listed in 'excepts'.  */
+  __sim_exceptions = (__sim_exceptions & ~excepts) | (*flagp & excepts);
+
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetexceptflag, __old_fesetexceptflag)
+compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/nofpu/ftestexcept.c b/libc/sysdeps/powerpc/nofpu/ftestexcept.c
new file mode 100644
index 000000000..f5d01e881
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/ftestexcept.c
@@ -0,0 +1,28 @@
+/* Test floating-point exceptions (soft-float edition).
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+int
+fetestexcept (int x)
+{
+  return __sim_exceptions & x;
+}
+libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h b/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h
new file mode 100644
index 000000000..20eb81030
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/get-rounding-mode.h
@@ -0,0 +1,35 @@
+/* Determine floating-point rounding mode within libc.  PowerPC
+   soft-float version.
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _POWERPC_NOFPU_GET_ROUNDING_MODE_H
+#define _POWERPC_NOFPU_GET_ROUNDING_MODE_H	1
+
+#include <fenv.h>
+
+#include "soft-supp.h"
+
+/* Return the floating-point rounding mode.  */
+
+static inline int
+get_rounding_mode (void)
+{
+  return __sim_round_mode;
+}
+
+#endif /* get-rounding-mode.h */
diff --git a/libc/sysdeps/powerpc/nofpu/libm-test-ulps b/libc/sysdeps/powerpc/nofpu/libm-test-ulps
new file mode 100644
index 000000000..ad5a9cd42
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/libm-test-ulps
@@ -0,0 +1,7297 @@
+# Begin of automatic generation
+
+# acos
+Test "acos (-0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "acos (-0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "acos (2e-17)":
+ildouble: 1
+ldouble: 1
+
+# acos_downward
+Test "acos_downward (-0)":
+float: 1
+ifloat: 1
+Test "acos_downward (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "acos_downward (-1)":
+float: 1
+ifloat: 1
+Test "acos_downward (0)":
+float: 1
+ifloat: 1
+Test "acos_downward (0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# acos_towardzero
+Test "acos_towardzero (-0)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "acos_towardzero (-1)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (0)":
+float: 1
+ifloat: 1
+Test "acos_towardzero (0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# acos_upward
+Test "acos_upward (-0)":
+ildouble: 2
+ldouble: 2
+Test "acos_upward (-1)":
+ildouble: 2
+ldouble: 2
+Test "acos_upward (0)":
+ildouble: 2
+ldouble: 2
+
+# asin
+Test "asin (-0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "asin (-0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "asin (0.75)":
+ildouble: 2
+ldouble: 2
+Test "asin (0x0.ffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "asin (0x0.ffffffp0)":
+ildouble: 1
+ldouble: 1
+
+# asin_downward
+Test "asin_downward (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_downward (-1.0)":
+ildouble: 1
+ldouble: 1
+Test "asin_downward (0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_downward (1.0)":
+float: 1
+ifloat: 1
+
+# asin_towardzero
+Test "asin_towardzero (-0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_towardzero (-1.0)":
+float: 1
+ifloat: 1
+Test "asin_towardzero (0.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "asin_towardzero (1.0)":
+float: 1
+ifloat: 1
+
+# asin_upward
+Test "asin_upward (-1.0)":
+float: 1
+ifloat: 1
+Test "asin_upward (1.0)":
+ildouble: 1
+ldouble: 1
+
+# atan2
+Test "atan2 (-0.00756827042671106339, -.001792735857538728036)":
+ildouble: 1
+ldouble: 1
+Test "atan2 (-0.75, -1.0)":
+float: 1
+ifloat: 1
+Test "atan2 (-inf, -inf)":
+ildouble: 1
+ldouble: 1
+Test "atan2 (-max_value, -min_value)":
+float: 1
+ifloat: 1
+Test "atan2 (0.75, -1.0)":
+float: 1
+ifloat: 1
+Test "atan2 (1.390625, 0.9296875)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "atan2 (inf, -inf)":
+ildouble: 1
+ldouble: 1
+
+# atanh
+Test "atanh (0.75)":
+float: 1
+ifloat: 1
+
+# cabs
+Test "cabs (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+
+# cacos
+Test "Imaginary part of: cacos (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.25 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.25 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + +0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (-0x1.0000000000000002p0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000000002p0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacos (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-100 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-100 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 + 1.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-129 - 1.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (-0x1.fp-30 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1.fp-30 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-105 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-105 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-112 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-23 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-0x1p-23 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-23 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-52 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-0x1p-63 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-1.0 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 + 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 + 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-1.0 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 - 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-1.0 - 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 + 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0.5 - 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (0x0.ffffffp0 - 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacos (0x1.0000000000001p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1.0000000000001p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1.000002p0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacos (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (0x1.000002p0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacos (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (0x1.fp-100 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacos (0x1.fp-100 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacos (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 + 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (1.0 + 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: cacos (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacos (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacos (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacos (1.0 - 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacos (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacos (1.0 - 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+
+# cacosh
+Test "Real part of: cacosh (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.25 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.25 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0.5 + +0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0.5 - 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-112 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (-0x1.0000000000000002p0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000000002p0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-100 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-100 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 + 1.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x0.ffffffp0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (-0x1.fp-129 - 1.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-30 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1.fp-30 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-105 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-112 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-23 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-52 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-0x1p-52 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-0x1p-63 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-1.0 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 + 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 + 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-1.0 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 - 0x1.fp-100 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-1.0 - 0x1p50 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 + 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0.5 - 0x1p-63 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: cacosh (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1.0000000000001p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1.0000000000001p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (0x1.000002p0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacosh (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (0x1.000002p0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: cacosh (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cacosh (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1.fp-100 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1.fp-100 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cacosh (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 + 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 + 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cacosh (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: cacosh (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cacosh (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: cacosh (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 - 0x1.fp-10 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cacosh (1.0 - 0x1.fp-100 i)":
+ildouble: 2
+ldouble: 2
+
+# carg
+Test "carg (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "carg (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+
+# casin
+Test "Imaginary part of: casin (+0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (+0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (+0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffffffffffffcp0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: casin (-0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casin (-0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casin (-0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (-0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-106 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-106 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 + 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1p-23 + 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-0x1p-23 - 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (-0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-0x1p-23 - 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (-0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-63 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-0x1p-63 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 + 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (-1.0 - 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.0 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.0 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.25 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.25 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 + 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-105 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-112 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0.5 - 1.0 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x0.fffffffffffff8p0 + 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x0.fffffffffffff8p0 - 0x1p-52 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffffffffffffcp0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffp0 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffffffffffffp0 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x0.ffffffp0 + 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Real part of: casin (0x0.ffffffp0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 - 0x1.fp-129 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x0.ffffffp0 - 0x1p-23 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casin (0x1.0000000000001p0 + 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.0000000000001p0 - 0x1p-52 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.000002p0 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1.000002p0 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casin (0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-10 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-10 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-100 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-100 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-1000 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1000 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-1025 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 + 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-129 - 1.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casin (0x1.fp-30 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp-30 - 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-105 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-105 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-105 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-105 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-105 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-105 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-106 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-106 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-112 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-112 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 + 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1p-23 + 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1p-23 + 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 + 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (0x1p-23 - 0.5 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: casin (0x1p-23 - 0x0.ffffffp0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (0x1p-23 - 0x1.000002p0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-23 - 0x1.000002p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-52 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casin (0x1p-52 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-63 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-63 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (0x1p-63 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (0x1p-63 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 + 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 + 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 + 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casin (1.0 - 0.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: casin (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 - 0.5 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casin (1.0 - 0x1.fp-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casin (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# casinh
+Test "Imaginary part of: casinh (-0.0 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.0 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0.25 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0.25 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0.5 + +0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-0.5 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (-0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x0.ffffffp0 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x0.ffffffp0 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000000000000000000000000008p0 + 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000000000000000000000000008p0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000000000000000000000000008p0 - 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000002p0 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.000002p0 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-1025 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-1025 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-129 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-129 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1.fp-129 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-0x1.fp-129 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-105 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-105 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-112 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-112 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (-0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-23 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1p-23 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-23 - 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (-0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-23 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (-0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-63 + 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (-0x1p-63 - 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + +0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 + 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 - 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (-1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.0 - 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (-1.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (-1.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0.0 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.0 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0.25 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0.25 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0.5 + +0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 + 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 + 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1.fp-129 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-105 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 0x1p-112 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-52 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (0.5 - 0x1p-63 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 0x1p-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.5 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: casinh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x0.ffffffffffffffffffffffffffcp0 + 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x0.ffffffffffffffffffffffffffcp0 - 0x1p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x0.ffffffp0 + 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x0.ffffffp0 + 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x0.ffffffp0 - 0x1p-23 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x0.ffffffp0 - 0x1p-23 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 + 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 + 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000000000000000000000000008p0 + 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 - 0.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000000000000000000000000008p0 - 0x1.fp-1025 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000000000000000000000000008p0 - 0x1p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.000002p0 + 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000002p0 + 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x1.000002p0 - 0x1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.000002p0 - 0x1p-23 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (0x1.fp-10 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-10 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-10 - 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-10 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-1025 + 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1.fp-1025 - 0x0.ffffffffffffffffffffffffffcp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-129 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x1.fp-129 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp-129 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (0x1.fp-129 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-105 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-105 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-112 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-112 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 + 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 + 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (0x1p-23 + 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-23 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1p-23 - 0.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-23 - 0x0.ffffffp0 i)":
+float: 2
+ifloat: 2
+Test "Imaginary part of: casinh (0x1p-23 - 0x0.ffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-23 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: casinh (0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-52 + 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-52 + 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-52 - 0.5 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (0x1p-52 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (0x1p-52 - 0x1.0000000000001p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-63 + 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: casinh (0x1p-63 - 0x0.ffffffffffffffffp0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + +0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (1.0 + 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 + 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 - 0.5 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: casinh (1.0 - 0x1.fp-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-100 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-129 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.0 - 0x1.fp-30 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: casinh (1.5 + +0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 + 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 + 0x1.fp-129 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0x1.fp-1025 i)":
+double: 1
+idouble: 1
+Test "Real part of: casinh (1.5 - 0x1.fp-129 i)":
+double: 1
+idouble: 1
+
+# catan
+Test "Imaginary part of: catan (-0x0.fffffffffffff8p0 + 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x0.fffffffffffff8p0 - 0x1p-27 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x0.ffffffp0 + 0x1p-13 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000000000000000000000000008p0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-0x1.000000000000000000000000008p0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.0000000000001p0 - 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (-0x1.000002p0 + 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 - 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1.000002p0 - 0x1p-13 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-0x1p-1020 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (-0x1p-1020 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (-0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-54 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-54 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-57 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (-0x1p-57 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (-1.0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x0.fffffffffffff8p0 + 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x0.fffffffffffff8p0 - 0x1p-27 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x0.ffffffp0 + 0x1p-13 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000000000000000000000000008p0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (0x1.000000000000000000000000008p0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.0000000000001p0 - 0x1p-27 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (0x1.000002p0 + 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 - 0x1p-126 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1.000002p0 - 0x1p-13 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catan (0x1p-1020 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catan (0x1p-1020 - 1.0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catan (0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-54 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-54 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-57 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catan (0x1p-57 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 + 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catan (1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catan (1.0 - 0x1p-54 i)":
+ildouble: 1
+ldouble: 1
+
+# catanh
+Test "Real part of: catanh (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-126 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-126 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-13 + 0x1.000002p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-13 - 0x1.000002p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (-0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-0x1p-27 + 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1p-27 + 0x1.0000000000001p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catanh (-0x1p-27 - 0x0.fffffffffffff8p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-0x1p-27 - 0x1.0000000000001p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (-0x1p-54 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (-1.0 + 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 + 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 + 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (-1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-1.0 - 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (-2 - 3 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (0x1.000002p0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1.000002p0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1.fp1023 + 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp1023 - 0x1.fp1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp127 + 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (0x1.fp127 - 0x1.fp127 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-126 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-126 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-13 + 0x0.ffffffp0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 + 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-13 - 0x0.ffffffp0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 - 0x1.000002p0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: catanh (0x1p-13 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (0x1p-27 + 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Real part of: catanh (0x1p-27 - 0x0.fffffffffffff8p0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (0x1p-54 + 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 + 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 - 0x1.000000000000000000000000008p0 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: catanh (0x1p-54 - 1.0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: catanh (1.0 + 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (1.0 + 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 + 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 + 0x1p-57 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-1020 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: catanh (1.0 - 0x1p-13 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-54 i)":
+float: 1
+ifloat: 1
+Test "Real part of: catanh (1.0 - 0x1p-57 i)":
+float: 1
+ifloat: 1
+
+# cbrt
+Test "cbrt (-27.0)":
+double: 1
+idouble: 1
+Test "cbrt (0.75)":
+double: 1
+idouble: 1
+Test "cbrt (0.9921875)":
+double: 1
+idouble: 1
+
+# ccos
+Test "Imaginary part of: ccos (-0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (-0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (-0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (-0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: ccos (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: ccos (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccos (0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccos (0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccos (0x1p-1074 + 1440 i)":
+double: 1
+idouble: 1
+
+# ccosh
+Test "Real part of: ccosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccosh (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: ccosh (-710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (-710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (-89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (-89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: ccosh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ccosh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ccosh (89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ccosh (89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cexp
+Test "Imaginary part of: cexp (-2.0 - 3.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cexp (-95 + 0.75 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cexp (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: cexp (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Real part of: cexp (50 + 0x1p127 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (50 + 0x1p127 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: cexp (500 + 0x1p1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: cexp (500 + 0x1p1023 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cexp (709.8125 + 0.75 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: cexp (709.8125 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Real part of: cexp (88.75 + 0.75 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: cexp (88.75 + 0.75 i)":
+float: 2
+ifloat: 2
+
+# clog
+Test "Real part of: clog (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog (-0x1.234566p-40 - 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1.fp+127 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1.fp+127 - 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (-0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (-2 - 3 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (-inf + inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (-inf - inf i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0.75 + 1.25 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (0x0.ffffffp0 + 0x0.ffffffp-100 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.000566p0 + 0x1.234p-10 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.fp+127 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1.fp+127 - 0x1p-149 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x11682p-23 + 0x7ffed1p-23 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x13836d58a13448d750b4b9p-85 + 0x195ca7bc3ab4f9161edbe6p-85 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x155f8afc4c48685bf63610p-85 + 0x17d0cf2652cdbeb1294e19p-85 i)":
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: clog (0x15cfbd1990d1ffp-53 + 0x176a3973e09a9ap-53 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog (0x1p-147 + 0x1p-147 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1p-149 + 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Real part of: clog (0x1p-149 - 0x1.fp+127 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x2818p-15 + 0x798fp-15 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog (0x4d9c37e2b5cb4533p-63 + 0x65c98be2385a042ep-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0x6241ef0da53f539f02fad67dabp-106 + 0x3fb46641182f7efd9caa769dac0p-106 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xa1f2c1p-24 + 0xc643aep-24 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xa4722f19346cp-51 + 0x7f9631c5e7f07p-51 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog (0xf2p-10 + 0x3e3p-10 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog (1.0 + 0x1.234566p-10 i)":
+float: 1
+ifloat: 1
+
+# clog10
+Test "Imaginary part of: clog10 (-0 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 2
+idouble: 2
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 2
+idouble: 2
+Test "Imaginary part of: clog10 (-0x1.0000000123456p0 + 0x1.2345678p-30 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+1023 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+1023 - 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1.fp+127 + 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0x1.fp+127 - 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-0x1p-1074 + 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-1074 - 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-149 + 0x1.fp+127 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-0x1p-149 - 0x1.fp+127 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-1.0 + 0x1.234566p-20 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-2 - 3 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (-3 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-3 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + 1 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf + inf i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (-inf - 0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (-inf - 1 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x0.fffffffffffff8p0 + 0x0.fffffffffffff8p-1000 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x0.ffffffp0 + 0x0.ffffffp-100 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "Real part of: clog10 (0x1.000566p0 + 0x1.234p-10 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.000566p0 + 0x1.234p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x1.000566p0 + 0x1.234p-100 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-30 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-50 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.234566p-60 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1.fffffep+127 + 0x1.fffffep+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x1.fffffep+127 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x10673dd0f2481p-51 + 0x7ef1d17cefbd2p-51 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x1367a310575591p-54 + 0x3cfcc0a0541f60p-54 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1367a310575591p-54 + 0x3cfcc0a0541f60p-54 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x13836d58a13448d750b4b9p-85 + 0x195ca7bc3ab4f9161edbe6p-85 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x155f8afc4c48685bf63610p-85 + 0x17d0cf2652cdbeb1294e19p-85 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: clog10 (0x15d8ab6ed05ca514086ac3a1e84p-105 + 0x1761e480aa094c0b10b34b09ce9p-105 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x164c74eea876p-45 + 0x16f393482f77p-45 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1a6p-10 + 0x3a5p-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1df515eb171a808b9e400266p-95 + 0x7c71eb0cd4688dfe98581c77p-95 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1p-1073 + 0x1p-1073 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 + 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-1074 - 0x1.fp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-147 + 0x1p-147 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 + 0x1.fp+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 + 0x1p-149 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-149 - 0x1.fp+127 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-509 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-510 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-511 + 1.0 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x1p-61 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x1p-62 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x1p-63 + 1.0 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: clog10 (0x2818p-15 + 0x798fp-15 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x2818p-15 + 0x798fp-15 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x298c62cb546588a7p-63 + 0x7911b1dfcc4ecdaep-63 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x2dd46725bp-35 + 0x7783a1284p-35 i)":
+double: 1
+idouble: 1
+Test "Real part of: clog10 (0x2ede88p-23 + 0x771c3fp-23 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (0x2ede88p-23 + 0x771c3fp-23 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0x4447d7175p-35 + 0x6c445e00ap-35 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x4d4ep-15 + 0x6605p-15 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x5b06b680ea2ccp-52 + 0xef452b965da9fp-52 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0x81b7efa81fc35ad1p-65 + 0x1ef4b835f1c79d812p-65 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0x9b57bp-20 + 0xcb7b4p-20 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0xe33f66c9542ca25cc43c867p-95 + 0x7f35a68ebd3704a43c465864p-95 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: clog10 (0xf2p-10 + 0x3e3p-10 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (0xf2p-10 + 0x3e3p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (0xfe961079616p-45 + 0x1bc37e09e6d1p-45 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: clog10 (1.0 + 0x1.234566p-10 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: clog10 (3 + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (3 - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (inf + inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: clog10 (inf - inf i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+
+# cos
+Test "cos (0x1p+120)":
+float: 1
+ifloat: 1
+Test "cos (0x1p+127)":
+float: 1
+ifloat: 1
+Test "cos (M_PI_6l * 2.0)":
+double: 1
+idouble: 1
+Test "cos (M_PI_6l * 4.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cos_downward
+Test "cos_downward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_downward (10)":
+ildouble: 1
+ldouble: 1
+Test "cos_downward (2)":
+float: 1
+ifloat: 1
+Test "cos_downward (3)":
+float: 1
+ifloat: 1
+Test "cos_downward (4)":
+float: 1
+ifloat: 1
+Test "cos_downward (5)":
+float: 1
+ifloat: 1
+Test "cos_downward (6)":
+ildouble: 1
+ldouble: 1
+Test "cos_downward (7)":
+float: 1
+ifloat: 1
+Test "cos_downward (8)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "cos_downward (9)":
+ildouble: 1
+ldouble: 1
+
+# cos_tonearest
+Test "cos_tonearest (7)":
+float: 1
+ifloat: 1
+
+# cos_towardzero
+Test "cos_towardzero (1)":
+ildouble: 1
+ldouble: 1
+Test "cos_towardzero (10)":
+ildouble: 1
+ldouble: 1
+Test "cos_towardzero (2)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (3)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (5)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (7)":
+float: 1
+ifloat: 1
+Test "cos_towardzero (8)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# cos_upward
+Test "cos_upward (10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_upward (4)":
+ildouble: 1
+ldouble: 1
+Test "cos_upward (5)":
+ildouble: 1
+ldouble: 1
+Test "cos_upward (6)":
+float: 1
+ifloat: 1
+Test "cos_upward (7)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cos_upward (9)":
+float: 2
+ifloat: 2
+
+# cosh_downward
+Test "cosh_downward (22)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_downward (23)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_downward (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# cosh_tonearest
+Test "cosh_tonearest (24)":
+ildouble: 1
+ldouble: 1
+
+# cosh_towardzero
+Test "cosh_towardzero (22)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_towardzero (23)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "cosh_towardzero (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# cosh_upward
+Test "cosh_upward (22)":
+ildouble: 2
+ldouble: 2
+Test "cosh_upward (23)":
+ildouble: 2
+ldouble: 2
+Test "cosh_upward (24)":
+ildouble: 2
+ldouble: 2
+
+# cpow
+Test "Real part of: cpow (0.75 + 1.25 i, 0.0 + 1.0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: cpow (0.75 + 1.25 i, 0.75 + 1.25 i)":
+double: 1
+float: 4
+idouble: 1
+ifloat: 4
+ildouble: 2
+ldouble: 2
+Test "Real part of: cpow (0.75 + 1.25 i, 1.0 + 0.0 i)":
+ildouble: 2
+ldouble: 2
+Test "Real part of: cpow (0.75 + 1.25 i, 1.0 + 1.0 i)":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "Real part of: cpow (2 + 0 i, 10 + 0 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: cpow (2 + 3 i, 4 + 0 i)":
+double: 1
+float: 4
+idouble: 1
+ifloat: 4
+Test "Imaginary part of: cpow (2 + 3 i, 4 + 0 i)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+# csin
+Test "Real part of: csin (-0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (-0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (-0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (-0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0.75 + 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (0.75 + 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0.75 - 710.5 i)":
+double: 1
+idouble: 1
+Test "Real part of: csin (0.75 - 89.5 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csin (0x1p-1074 + 1440 i)":
+double: 1
+idouble: 1
+
+# csinh
+Test "Imaginary part of: csinh (-2 - 3 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (-710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (-710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (-89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: csinh (-89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: csinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (0.75 + 1.25 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csinh (1440 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (710.5 + 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (710.5 - 0.75 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csinh (89.5 + 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: csinh (89.5 - 0.75 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# csqrt
+Test "Real part of: csqrt (-0x1.000002p-126 - 0x1.000002p-126 i)":
+double: 1
+idouble: 1
+Test "Real part of: csqrt (-2 + 3 i)":
+float: 1
+ifloat: 1
+Test "Real part of: csqrt (-2 - 3 i)":
+float: 1
+ifloat: 1
+Test "Imaginary part of: csqrt (0x1.000002p-126 + 0x1.000002p-126 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffep+127 + 1.0 i)":
+float: 1
+ifloat: 1
+Test "Real part of: csqrt (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffffffffffp+1023 + 0x1.fffffffffffffp+1023 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: csqrt (0x1.fffffffffffffp+1023 + 0x1p+1023 i)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: csqrt (0x1p-1074 + 0x1p-1074 i)":
+ildouble: 1
+ldouble: 1
+
+# ctan
+Test "Real part of: ctan (-2 - 3 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan (-2 - 3 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ctan (0.75 + 1.25 i)":
+double: 1
+idouble: 1
+Test "Real part of: ctan (0x1p1023 + 1 i)":
+double: 1
+idouble: 1
+Test "Imaginary part of: ctan (0x1p1023 + 1 i)":
+ildouble: 1
+ldouble: 1
+Test "Real part of: ctan (0x1p127 + 1 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan (0x1p127 + 1 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Real part of: ctan (0x3.243f6cp-1 + 0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Real part of: ctan (1 + 47 i)":
+ildouble: 2
+ldouble: 2
+
+# ctan_downward
+Test "Real part of: ctan_downward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 3
+ldouble: 3
+Test "Real part of: ctan_downward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctan_downward (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+# ctan_tonearest
+Test "Real part of: ctan_tonearest (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctan_tonearest (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# ctan_towardzero
+Test "Real part of: ctan_towardzero (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 4
+ldouble: 4
+Test "Imaginary part of: ctan_towardzero (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 13
+ldouble: 13
+Test "Real part of: ctan_towardzero (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctan_towardzero (0x1.921fb6p+0 + 0x1p-149 i)":
+float: 1
+ifloat: 1
+ildouble: 8
+ldouble: 8
+
+# ctan_upward
+Test "Real part of: ctan_upward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 6
+Test "Imaginary part of: ctan_upward (0x1.921fb54442d18p+0 + 0x1p-1074 i)":
+ildouble: 10
+ldouble: 10
+Test "Real part of: ctan_upward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+Test "Imaginary part of: ctan_upward (0x1.921fb6p+0 + 0x1p-149 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+# ctanh
+Test "Real part of: ctanh (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (-2 - 3 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Imaginary part of: ctanh (0 + 0x3.243f6cp-1 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (0 + pi/4 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "Real part of: ctanh (0.75 + 1.25 i)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (0.75 + 1.25 i)":
+float: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+Test "Real part of: ctanh (1 + 0x1p1023 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (1 + 0x1p1023 i)":
+double: 1
+idouble: 1
+Test "Real part of: ctanh (1 + 0x1p127 i)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "Imaginary part of: ctanh (1 + 0x1p127 i)":
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh (47 + 1 i)":
+ildouble: 2
+ldouble: 2
+
+# ctanh_downward
+Test "Imaginary part of: ctanh_downward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 3
+ldouble: 3
+Test "Real part of: ctanh_downward (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+Test "Imaginary part of: ctanh_downward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# ctanh_tonearest
+Test "Real part of: ctanh_tonearest (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "Imaginary part of: ctanh_tonearest (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# ctanh_towardzero
+Test "Real part of: ctanh_towardzero (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 13
+ldouble: 13
+Test "Imaginary part of: ctanh_towardzero (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 4
+ldouble: 4
+Test "Real part of: ctanh_towardzero (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 8
+ldouble: 8
+Test "Imaginary part of: ctanh_towardzero (0x1p-149 + 0x1.921fb6p+0 i)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+# ctanh_upward
+Test "Real part of: ctanh_upward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+ildouble: 10
+ldouble: 10
+Test "Imaginary part of: ctanh_upward (0x1p-1074 + 0x1.921fb54442d18p+0 i)":
+double: 1
+idouble: 1
+ildouble: 6
+ldouble: 6
+Test "Real part of: ctanh_upward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 5
+ldouble: 5
+Test "Imaginary part of: ctanh_upward (0x1p-149 + 0x1.921fb6p+0 i)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+# erf
+Test "erf (1.25)":
+double: 1
+idouble: 1
+
+# erfc
+Test "erfc (0x1.f7303cp+1)":
+double: 1
+idouble: 1
+Test "erfc (0x1.ffa002p+2)":
+float: 1
+ifloat: 1
+Test "erfc (0x1.ffff56789abcdef0123456789a8p+2)":
+ildouble: 1
+ldouble: 1
+Test "erfc (2.0)":
+double: 1
+idouble: 1
+Test "erfc (4.125)":
+double: 1
+idouble: 1
+
+# exp
+Test "exp (0.75)":
+ildouble: 1
+ldouble: 1
+Test "exp (50.0)":
+ildouble: 1
+ldouble: 1
+
+# exp10
+Test "exp10 (-1)":
+double: 1
+idouble: 1
+Test "exp10 (-305)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "exp10 (-36)":
+double: 1
+idouble: 1
+Test "exp10 (3)":
+double: 1
+idouble: 1
+Test "exp10 (36)":
+double: 1
+idouble: 1
+
+# exp_downward
+Test "exp_downward (2)":
+float: 1
+ifloat: 1
+Test "exp_downward (3)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# exp_towardzero
+Test "exp_towardzero (2)":
+float: 1
+ifloat: 1
+Test "exp_towardzero (3)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# exp_upward
+Test "exp_upward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# expm1
+Test "expm1 (0.75)":
+double: 1
+idouble: 1
+Test "expm1 (1)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "expm1 (500.0)":
+double: 1
+idouble: 1
+
+# gamma
+Test "gamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (1.2)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+# hypot
+Test "hypot (-0.7, -12.4)":
+float: 1
+ifloat: 1
+Test "hypot (-0.7, 12.4)":
+float: 1
+ifloat: 1
+Test "hypot (-12.4, -0.7)":
+float: 1
+ifloat: 1
+Test "hypot (-12.4, 0.7)":
+float: 1
+ifloat: 1
+Test "hypot (0.7, -12.4)":
+float: 1
+ifloat: 1
+Test "hypot (0.7, 12.4)":
+float: 1
+ifloat: 1
+Test "hypot (0.75, 1.25)":
+ildouble: 1
+ldouble: 1
+Test "hypot (12.4, -0.7)":
+float: 1
+ifloat: 1
+Test "hypot (12.4, 0.7)":
+float: 1
+ifloat: 1
+
+# j0
+Test "j0 (-0x1.001000001p+593)":
+ildouble: 2
+ldouble: 2
+Test "j0 (-4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "j0 (0.75)":
+float: 1
+ifloat: 1
+Test "j0 (0x1.d7ce3ap+107)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "j0 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "j0 (10.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "j0 (2.0)":
+float: 2
+ifloat: 2
+Test "j0 (4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "j0 (8.0)":
+float: 1
+ifloat: 1
+
+# j1
+Test "j1 (0x1.3ffp+74)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "j1 (0x1.ff00000000002p+840)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "j1 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "j1 (10.0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "j1 (2.0)":
+double: 1
+idouble: 1
+Test "j1 (8.0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+# jn
+Test "jn (0, -4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (0, 0.75)":
+float: 1
+ifloat: 1
+Test "jn (0, 10.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "jn (0, 2.0)":
+float: 2
+ifloat: 2
+Test "jn (0, 4.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (0, 8.0)":
+float: 1
+ifloat: 1
+Test "jn (1, 10.0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "jn (1, 2.0)":
+double: 1
+idouble: 1
+Test "jn (1, 8.0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "jn (10, -1.0)":
+ildouble: 1
+ldouble: 1
+Test "jn (10, 0.125)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "jn (10, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (10, 1.0)":
+ildouble: 1
+ldouble: 1
+Test "jn (10, 10.0)":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "jn (10, 2.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "jn (2, 0x1.ffff62p+99)":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+Test "jn (2, 2.4048255576957729)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "jn (3, 0.125)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (3, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "jn (3, 10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "jn (3, 2.0)":
+float: 1
+ifloat: 1
+Test "jn (3, 2.4048255576957729)":
+double: 3
+idouble: 3
+ildouble: 1
+ldouble: 1
+Test "jn (4, 2.4048255576957729)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "jn (5, 2.4048255576957729)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "jn (6, 2.4048255576957729)":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
+Test "jn (7, 2.4048255576957729)":
+double: 3
+float: 5
+idouble: 3
+ifloat: 5
+ildouble: 2
+ldouble: 2
+Test "jn (8, 2.4048255576957729)":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+Test "jn (9, 2.4048255576957729)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 7
+ldouble: 7
+
+# lgamma
+Test "lgamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (1.2)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+# log10
+Test "log10 (0.75)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "log10 (e)":
+float: 1
+ifloat: 1
+
+# log1p
+Test "log1p (-0.25)":
+float: 1
+ifloat: 1
+
+# log2
+Test "log2 (e)":
+ildouble: 1
+ldouble: 1
+
+# pow
+Test "pow (0x0.ffffffp0, -0x1p24)":
+float: 1
+ifloat: 1
+Test "pow (0x0.ffffffp0, 0x1p24)":
+float: 1
+ifloat: 1
+Test "pow (0x1.000002p0, 0x1p24)":
+float: 1
+ifloat: 1
+
+# pow10
+Test "pow10 (-1)":
+double: 1
+idouble: 1
+Test "pow10 (-305)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "pow10 (-36)":
+double: 1
+idouble: 1
+Test "pow10 (3)":
+double: 1
+idouble: 1
+Test "pow10 (36)":
+double: 1
+idouble: 1
+
+# pow_downward
+Test "pow_downward (1.0625, 1.125)":
+ildouble: 1
+ldouble: 1
+Test "pow_downward (1.5, 1.03125)":
+float: 1
+ifloat: 1
+
+# pow_towardzero
+Test "pow_towardzero (1.0625, 1.125)":
+ildouble: 1
+ldouble: 1
+Test "pow_towardzero (1.5, 1.03125)":
+float: 1
+ifloat: 1
+
+# pow_upward
+Test "pow_upward (1.0625, 1.125)":
+float: 1
+ifloat: 1
+
+# sin_downward
+Test "sin_downward (1)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (10)":
+float: 1
+ifloat: 1
+Test "sin_downward (2)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (3)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sin_downward (4)":
+ildouble: 1
+ldouble: 1
+Test "sin_downward (5)":
+float: 1
+ifloat: 1
+Test "sin_downward (6)":
+float: 1
+ifloat: 1
+Test "sin_downward (8)":
+ildouble: 1
+ldouble: 1
+
+# sin_tonearest
+Test "sin_tonearest (1)":
+float: 1
+ifloat: 1
+
+# sin_towardzero
+Test "sin_towardzero (1)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "sin_towardzero (10)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (2)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (3)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (4)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (5)":
+float: 1
+ifloat: 1
+Test "sin_towardzero (8)":
+ildouble: 1
+ldouble: 1
+Test "sin_towardzero (9)":
+float: 1
+ifloat: 1
+
+# sin_upward
+Test "sin_upward (1)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "sin_upward (2)":
+float: 2
+ifloat: 2
+Test "sin_upward (3)":
+ildouble: 1
+ldouble: 1
+Test "sin_upward (4)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "sin_upward (6)":
+ildouble: 1
+ldouble: 1
+Test "sin_upward (9)":
+float: 1
+ifloat: 1
+
+# sincos
+Test "sincos (0x1p+120) extra output 2":
+float: 1
+ifloat: 1
+Test "sincos (0x1p+127) extra output 2":
+float: 1
+ifloat: 1
+Test "sincos (M_PI_6l*2.0) extra output 1":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "sincos (M_PI_6l*2.0) extra output 2":
+double: 1
+idouble: 1
+Test "sincos (pi/6) extra output 2":
+float: 1
+ifloat: 1
+
+# sinh
+Test "sinh (0.75)":
+ildouble: 1
+ldouble: 1
+
+# sinh_downward
+Test "sinh_downward (22)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sinh_downward (23)":
+float: 1
+ifloat: 1
+Test "sinh_downward (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# sinh_towardzero
+Test "sinh_towardzero (22)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "sinh_towardzero (23)":
+float: 1
+ifloat: 1
+Test "sinh_towardzero (24)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# sinh_upward
+Test "sinh_upward (23)":
+ildouble: 1
+ldouble: 1
+Test "sinh_upward (24)":
+ildouble: 1
+ldouble: 1
+
+# tan
+Test "tan (-0xc.908p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90cp-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90ep-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90f8p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (-0xc.90fcp-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.90fd8p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.90fdap-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.92p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (-0xc.9p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.908p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90cp-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90ep-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90f8p-4)":
+ildouble: 2
+ldouble: 2
+Test "tan (0xc.90fcp-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.90fd8p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.90fdap-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.92p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (0xc.9p-4)":
+ildouble: 1
+ldouble: 1
+Test "tan (pi/4)":
+ildouble: 1
+ldouble: 1
+
+# tan_downward
+Test "tan_downward (1)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (10)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tan_downward (2)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (6)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_downward (8)":
+float: 1
+ifloat: 1
+Test "tan_downward (9)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# tan_tonearest
+Test "tan_tonearest (10)":
+ildouble: 1
+ldouble: 1
+Test "tan_tonearest (4)":
+ildouble: 1
+ldouble: 1
+Test "tan_tonearest (7)":
+ildouble: 1
+ldouble: 1
+
+# tan_towardzero
+Test "tan_towardzero (10)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tan_towardzero (3)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "tan_towardzero (4)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_towardzero (5)":
+float: 1
+ifloat: 1
+Test "tan_towardzero (6)":
+ildouble: 1
+ldouble: 1
+Test "tan_towardzero (7)":
+ildouble: 2
+ldouble: 2
+Test "tan_towardzero (9)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# tan_upward
+Test "tan_upward (1)":
+float: 1
+ifloat: 1
+Test "tan_upward (10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_upward (3)":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+Test "tan_upward (5)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tan_upward (6)":
+ildouble: 1
+ldouble: 1
+Test "tan_upward (7)":
+ildouble: 1
+ldouble: 1
+Test "tan_upward (9)":
+ildouble: 1
+ldouble: 1
+
+# tanh
+Test "tanh (-0.75)":
+ildouble: 1
+ldouble: 1
+Test "tanh (0.75)":
+ildouble: 1
+ldouble: 1
+
+# tgamma
+Test "tgamma (-0.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x0.fffffffffffff8p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x0.ffffffp0)":
+float: 1
+ifloat: 1
+Test "tgamma (-0x1.000002p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x1.0a32a2p+5)":
+float: 2
+ifloat: 2
+Test "tgamma (-0x1.fffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x13.ffffep0)":
+float: 2
+ifloat: 2
+Test "tgamma (-0x13.ffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x14.000000000001p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x14.00002p0)":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x1d.ffffep0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1d.fffffffffffffffffffffffff8p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1d.ffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x1e.00000000000000000000000008p0)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x1e.000000000001p0)":
+double: 3
+idouble: 3
+ildouble: 3
+ldouble: 3
+Test "tgamma (-0x1e.00002p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x2.0000000000002p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x2.000004p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x2.fffffcp0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x27.fffffffffffep0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x27.fffffffffffffffffffffffffp0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.000000000002p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x28.00004p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x29.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x29.000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x29.00004p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x29.ffffcp0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x29.fffffffffffep0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x2a.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x3.000004p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (-0x3.fffffcp0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x3.ffffffffffffep0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x31.fffffffffffep0)":
+double: 3
+idouble: 3
+Test "tgamma (-0x32.0000000000000000000000001p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x32.000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x4.000008p0)":
+float: 1
+ifloat: 1
+Test "tgamma (-0x4.fffff8p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x4.ffffffffffffcp0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x5.0000000000004p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x5.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x5.ffffffffffffcp0)":
+double: 1
+idouble: 1
+Test "tgamma (-0x6.000008p0)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x6.fffff8p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x6.ffffffffffffcp0)":
+double: 4
+idouble: 4
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x63.fffffffffffcp0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x63.ffffffffffffffffffffffffep0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x64.0000000000000000000000002p0)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (-0x64.000000000004p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.00000000000000000000000002p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.0000000000004p0)":
+double: 3
+idouble: 3
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x7.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-0x7.fffff8p0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+Test "tgamma (-0x7.ffffffffffffcp0)":
+double: 3
+idouble: 3
+ildouble: 3
+ldouble: 3
+Test "tgamma (-0x8.00000000000000000000000004p0)":
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x8.00001p0)":
+double: 2
+idouble: 2
+Test "tgamma (-0x9.ffffffffffff8p0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x9.fffffp0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-0x96.000000000008p0)":
+double: 1
+idouble: 1
+Test "tgamma (-0xa.00001p0)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (-2.5)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (-3.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-4.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-5.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (-6.5)":
+float: 1
+ifloat: 1
+Test "tgamma (-7.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (-8.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (-9.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0.5)":
+float: 1
+ifloat: 1
+Test "tgamma (0.7)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x1.fffffep0)":
+float: 1
+ifloat: 1
+Test "tgamma (0x1.fffffffffffffp0)":
+double: 1
+idouble: 1
+Test "tgamma (0x1p-24)":
+float: 1
+ifloat: 1
+Test "tgamma (0x1p-53)":
+double: 1
+idouble: 1
+Test "tgamma (0x2.30a43cp+4)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "tgamma (0x2.fffffcp0)":
+float: 3
+ifloat: 3
+Test "tgamma (0x3.fffffcp0)":
+float: 1
+ifloat: 1
+Test "tgamma (0x3.ffffffffffffep0)":
+double: 1
+idouble: 1
+Test "tgamma (0x4.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x4.ffffffffffffcp0)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x5.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x5.000008p0)":
+float: 2
+ifloat: 2
+Test "tgamma (0x5.fffff8p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x6.0000000000004p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x6.000008p0)":
+float: 2
+ifloat: 2
+Test "tgamma (0x6.fffff8p0)":
+double: 1
+idouble: 1
+Test "tgamma (0x6.ffffffffffffcp0)":
+double: 4
+idouble: 4
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x7.0000000000004p0)":
+double: 4
+idouble: 4
+Test "tgamma (0x7.000008p0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (0x7.fffff8p0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "tgamma (0x7.ffffffffffffcp0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (0x8.00001p0)":
+double: 2
+idouble: 2
+Test "tgamma (0xa.b9fd72b0fb238p+4)":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+Test "tgamma (0xa.b9fd72b0fb23a9ddbf0d3804f4p+4)":
+ildouble: 2
+ldouble: 2
+Test "tgamma (10)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (18.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (19.5)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (2.5)":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+Test "tgamma (23.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (29.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (3)":
+float: 1
+ifloat: 1
+Test "tgamma (3.5)":
+float: 2
+ifloat: 2
+Test "tgamma (30.5)":
+float: 1
+ifloat: 1
+Test "tgamma (33.5)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (34.5)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "tgamma (4)":
+float: 1
+ifloat: 1
+Test "tgamma (4.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (6)":
+float: 1
+ifloat: 1
+Test "tgamma (6.5)":
+float: 1
+ifloat: 1
+Test "tgamma (7)":
+double: 1
+idouble: 1
+Test "tgamma (7.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "tgamma (8)":
+double: 1
+idouble: 1
+Test "tgamma (8.5)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "tgamma (9)":
+double: 1
+idouble: 1
+Test "tgamma (9.5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+# y0
+Test "y0 (0.125)":
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1.3ffp+74)":
+double: 1
+idouble: 1
+Test "y0 (0x1.ff00000000002p+840)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-10)":
+double: 1
+idouble: 1
+Test "y0 (0x1p-100)":
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-110)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-20)":
+float: 1
+ifloat: 1
+Test "y0 (0x1p-30)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (0x1p-40)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "y0 (0x1p-50)":
+float: 1
+ifloat: 1
+Test "y0 (0x1p-70)":
+double: 1
+idouble: 1
+Test "y0 (0x1p-80)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "y0 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "y0 (1.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (1.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "y0 (10.0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "y0 (8.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+# y1
+Test "y1 (0.125)":
+double: 1
+idouble: 1
+Test "y1 (0x1.001000001p+593)":
+ildouble: 2
+ldouble: 2
+Test "y1 (0x1.27e204p+99)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "y1 (0x1p-10)":
+double: 1
+idouble: 1
+Test "y1 (0x1p-20)":
+ildouble: 1
+ldouble: 1
+Test "y1 (0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "y1 (1.5)":
+float: 1
+ifloat: 1
+Test "y1 (10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "y1 (2.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "y1 (8.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+
+# yn
+Test "yn (0, 0.125)":
+ildouble: 1
+ldouble: 1
+Test "yn (0, 1.0)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (0, 1.5)":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+Test "yn (0, 10.0)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (0, 8.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (1, 0.125)":
+double: 1
+idouble: 1
+Test "yn (1, 1.5)":
+float: 1
+ifloat: 1
+Test "yn (1, 10.0)":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (1, 2.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (1, 8.0)":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+Test "yn (10, 0.125)":
+double: 1
+idouble: 1
+Test "yn (10, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "yn (10, 1.0)":
+double: 1
+idouble: 1
+Test "yn (10, 10.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (10, 2.0)":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+Test "yn (3, 0.125)":
+double: 1
+idouble: 1
+Test "yn (3, 0.75)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "yn (3, 10.0)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (3, 2.0)":
+double: 1
+idouble: 1
+
+# Maximal error of functions:
+Function: "acos":
+ildouble: 1
+ldouble: 1
+
+Function: "acos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "acos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_upward":
+ildouble: 2
+ldouble: 2
+
+Function: "acosh":
+ildouble: 1
+ldouble: 1
+
+Function: "asin":
+ildouble: 2
+ldouble: 2
+
+Function: "asin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "asin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asinh":
+ildouble: 1
+ldouble: 1
+
+Function: "atan2":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atanh":
+float: 1
+ifloat: 1
+
+Function: "cabs":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "carg":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casin":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "casinh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "catan":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catanh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cexp":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "clog":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog10":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "clog10":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos_towardzero":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "cos_upward":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "cosh":
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_upward":
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cpow":
+double: 2
+float: 4
+idouble: 2
+ifloat: 4
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "cpow":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csin":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csinh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csqrt":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctan":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctan_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctan_downward":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Real part of "ctan_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctan_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 13
+ldouble: 13
+
+Function: Real part of "ctan_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "ctan_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 10
+ldouble: 10
+
+Function: Real part of "ctanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ctanh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctanh_downward":
+float: 1
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "ctanh_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ctanh_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ctanh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 13
+ldouble: 13
+
+Function: Imaginary part of "ctanh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctanh_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 10
+ldouble: 10
+
+Function: Imaginary part of "ctanh_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 6
+ldouble: 6
+
+Function: "erf":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erfc":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp":
+ildouble: 1
+ldouble: 1
+
+Function: "exp10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "exp_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "expm1":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "gamma":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "hypot":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j0":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "j1":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "jn":
+double: 4
+float: 5
+idouble: 4
+ifloat: 5
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "log":
+ildouble: 1
+ldouble: 1
+
+Function: "log10":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log1p":
+float: 1
+ifloat: 1
+
+Function: "log2":
+ildouble: 1
+ldouble: 1
+
+Function: "pow":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_upward":
+float: 1
+ifloat: 1
+
+Function: "sin":
+ildouble: 1
+ldouble: 1
+
+Function: "sin_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sin_tonearest":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sin_towardzero":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "sin_upward":
+float: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "sincos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sinh":
+ildouble: 1
+ldouble: 1
+
+Function: "sinh_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "sinh_towardzero":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_upward":
+ildouble: 1
+ldouble: 1
+
+Function: "tan":
+double: 1
+idouble: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_downward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "tan_tonearest":
+ildouble: 1
+ldouble: 1
+
+Function: "tan_towardzero":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tan_upward":
+float: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "tanh":
+ildouble: 1
+ldouble: 1
+
+Function: "tgamma":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "y0":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "y1":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "yn":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+# end of automatic generation
diff --git a/libc/sysdeps/powerpc/nofpu/shlib-versions b/libc/sysdeps/powerpc/nofpu/shlib-versions
new file mode 100644
index 000000000..72085ddf4
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/shlib-versions
@@ -0,0 +1 @@
+powerpc.*-.*-.*		ABI			powerpcsoft-@OS@
diff --git a/libc/sysdeps/powerpc/nofpu/sim-full.c b/libc/sysdeps/powerpc/nofpu/sim-full.c
new file mode 100644
index 000000000..e16703323
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/sim-full.c
@@ -0,0 +1,46 @@
+/* Software floating-point exception handling emulation.
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <signal.h>
+#include "soft-fp.h"
+#include "soft-supp.h"
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+   15483) and ideally preserved across signal handlers, like hardware
+   FP status words, but the latter is quite difficult to accomplish in
+   userland.  */
+
+/* Global to store sticky exceptions.  */
+int __sim_exceptions __attribute__ ((nocommon));
+libc_hidden_data_def (__sim_exceptions);
+
+/* By default, no exceptions should trap.  */
+int __sim_disabled_exceptions = 0xffffffff;
+libc_hidden_data_def (__sim_disabled_exceptions);
+
+int __sim_round_mode __attribute__ ((nocommon));
+libc_hidden_data_def (__sim_round_mode);
+
+void
+__simulate_exceptions (int x)
+{
+  __sim_exceptions |= x;
+  if (x & ~__sim_disabled_exceptions)
+    raise (SIGFPE);
+}
diff --git a/libc/sysdeps/powerpc/nofpu/soft-supp.h b/libc/sysdeps/powerpc/nofpu/soft-supp.h
new file mode 100644
index 000000000..18b4550e3
--- /dev/null
+++ b/libc/sysdeps/powerpc/nofpu/soft-supp.h
@@ -0,0 +1,48 @@
+/* Internal support stuff for complete soft float.
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2002.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if defined __NO_FPRS__ && !defined _SOFT_FLOAT
+
+# include <fenv_libc.h>
+
+#else
+
+# include <fenv.h>
+
+typedef union
+{
+  fenv_t fenv;
+  unsigned int l[2];
+} fenv_union_t;
+
+#endif
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+   15483) and ideally preserved across signal handlers, like hardware
+   FP status words, but the latter is quite difficult to accomplish in
+   userland.  */
+
+extern int __sim_exceptions;
+libc_hidden_proto (__sim_exceptions);
+extern int __sim_disabled_exceptions;
+libc_hidden_proto (__sim_disabled_exceptions);
+extern int __sim_round_mode;
+libc_hidden_proto (__sim_round_mode);
+
+extern void __simulate_exceptions (int x) attribute_hidden;
diff --git a/libc/sysdeps/powerpc/novmx-longjmp.c b/libc/sysdeps/powerpc/novmx-longjmp.c
index 8f6ea357d..b2c0e4cf5 100644
--- a/libc/sysdeps/powerpc/novmx-longjmp.c
+++ b/libc/sysdeps/powerpc/novmx-longjmp.c
@@ -50,13 +50,7 @@ weak_alias (__novmx__libc_siglongjmp, __novmx_longjmp)
 weak_alias (__novmx__libc_siglongjmp, __novmxlongjmp)
 weak_alias (__novmx__libc_siglongjmp, __novmxsiglongjmp)
 
-# if __WORDSIZE == 64
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.3);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.3);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.3);
-# else
-symbol_version (__novmx_longjmp,_longjmp,GLIBC_2.0);
-symbol_version (__novmxlongjmp,longjmp,GLIBC_2.0);
-symbol_version (__novmxsiglongjmp,siglongjmp,GLIBC_2.0);
-# endif
+compat_symbol (libc, __novmx_longjmp, _longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxlongjmp, longjmp, GLIBC_2_0);
+compat_symbol (libc, __novmxsiglongjmp, siglongjmp, GLIBC_2_0);
 #endif /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4))  */
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memcmp.S b/libc/sysdeps/powerpc/powerpc32/405/memcmp.S
new file mode 100644
index 000000000..2849461cd
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memcmp.S
@@ -0,0 +1,128 @@
+/* Optimized memcmp implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memcmp
+
+       r3:source1 address, return equality
+       r4:source2 address
+       r5:byte count
+
+       Check 2 words from src1 and src2. If unequal jump to end and
+       return src1 > src2 or src1 < src2.
+       If count = zero check bytes before zero counter and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If src1 = src2 and no null, repeat. */
+
+EALIGN (memcmp, 5, 0)
+       srwi.   r6,r5,5
+       beq     L(preword2_count_loop)
+       mtctr   r6
+       clrlwi  r5,r5,27
+
+L(word8_compare_loop):
+       lwz     r10,0(r3)
+       lwz     r6,4(r3)
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       lwz     r10,8(r3)
+       lwz     r6,12(r3)
+       lwz     r8,8(r4)
+       lwz     r9,12(r4)
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       lwz     r10,16(r3)
+       lwz     r6,20(r3)
+       lwz     r8,16(r4)
+       lwz     r9,20(r4)
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       lwz     r10,24(r3)
+       lwz     r6,28(r3)
+       addi    r3,r3,0x20
+       lwz     r8,24(r4)
+       lwz     r9,28(r4)
+       addi    r4,r4,0x20
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       bdnz    L(word8_compare_loop)
+
+L(preword2_count_loop):
+       srwi.   r6,r5,3
+       beq     L(prebyte_count_loop)
+       mtctr   r6
+       clrlwi  r5,r5,29
+
+L(word2_count_loop):
+       lwz     r10,0(r3)
+       lwz     r6,4(r3)
+       addi    r3,r3,0x08
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       addi    r4,r4,0x08
+       cmplw   cr5,r8,r10
+       cmplw   cr1,r9,r6
+       bne     cr5,L(st2)
+       bne     cr1,L(st1)
+       bdnz    L(word2_count_loop)
+
+L(prebyte_count_loop):
+       addi    r5,r5,1
+       mtctr   r5
+       bdz     L(end_memcmp)
+
+L(byte_count_loop):
+       lbz     r6,0(r3)
+       addi    r3,r3,0x01
+       lbz     r8,0(r4)
+       addi    r4,r4,0x01
+       cmplw   cr5,r8,r6
+       bne     cr5,L(st2)
+       bdnz    L(byte_count_loop)
+
+L(end_memcmp):
+       addi    r3,r0,0
+       blr
+
+L(l_r):
+       addi    r3,r0,1
+       blr
+
+L(st1):
+       blt     cr1,L(l_r)
+       addi    r3,r0,-1
+       blr
+
+L(st2):
+       blt     cr5,L(l_r)
+       addi    r3,r0,-1
+       blr
+END (memcmp)
+libc_hidden_builtin_def (memcmp)
+weak_alias (memcmp,bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memcpy.S b/libc/sysdeps/powerpc/powerpc32/405/memcpy.S
new file mode 100644
index 000000000..b01d53920
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memcpy.S
@@ -0,0 +1,130 @@
+/* Optimized memcpy implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memcpy
+
+       r0:return address
+       r3:destination address
+       r4:source address
+       r5:byte count
+
+       Save return address in r0.
+       If destinationn and source are unaligned and copy count is greater than 256
+       then copy 0-3 bytes to make destination aligned.
+       If 32 or more bytes to copy we use 32 byte copy loop.
+       Finaly we copy 0-31 extra bytes. */
+
+EALIGN (memcpy, 5, 0)
+/* Check if bytes to copy are greater than 256 and if
+       source and destination are unaligned */
+       cmpwi   r5,0x0100
+       addi    r0,r3,0
+       ble     L(string_count_loop)
+       neg     r6,r3
+       clrlwi. r6,r6,30
+       beq     L(string_count_loop)
+       neg     r6,r4
+       clrlwi. r6,r6,30
+       beq     L(string_count_loop)
+       mtctr   r6
+       subf    r5,r6,r5
+
+L(unaligned_bytecopy_loop): /* Align destination by coping 0-3 bytes */
+       lbz     r8,0x0(r4)
+       addi    r4,r4,1
+       stb     r8,0x0(r3)
+       addi    r3,r3,1
+       bdnz    L(unaligned_bytecopy_loop)
+       srwi.   r7,r5,5
+       beq     L(preword2_count_loop)
+       mtctr   r7
+
+L(word8_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+       lwz     r6,0(r4)
+       lwz     r7,4(r4)
+       lwz     r8,8(r4)
+       lwz     r9,12(r4)
+       subi    r5,r5,0x20
+       stw     r6,0(r3)
+       stw     r7,4(r3)
+       stw     r8,8(r3)
+       stw     r9,12(r3)
+       lwz     r6,16(r4)
+       lwz     r7,20(r4)
+       lwz     r8,24(r4)
+       lwz     r9,28(r4)
+       addi    r4,r4,0x20
+       stw     r6,16(r3)
+       stw     r7,20(r3)
+       stw     r8,24(r3)
+       stw     r9,28(r3)
+       addi    r3,r3,0x20
+       bdnz    L(word8_count_loop_no_dcbt)
+
+L(preword2_count_loop): /* Copy remaining 0-31 bytes */
+       clrlwi. r12,r5,27
+       beq     L(end_memcpy)
+       mtxer   r12
+       lswx    r5,0,r4
+       stswx   r5,0,r3
+       mr       r3,r0
+       blr
+
+L(string_count_loop): /* Copy odd 0-31 bytes */
+       clrlwi. r12,r5,28
+       add     r3,r3,r5
+       add     r4,r4,r5
+       beq     L(pre_string_copy)
+       mtxer   r12
+       subf    r4,r12,r4
+       subf    r3,r12,r3
+       lswx    r6,0,r4
+       stswx   r6,0,r3
+
+L(pre_string_copy): /* Check how many 32 byte chunks to copy */
+       srwi.   r7,r5,4
+       beq     L(end_memcpy)
+       mtctr   r7
+
+L(word4_count_loop_no_dcbt): /* Copy 32 bytes at a time */
+       lwz     r6,-4(r4)
+       lwz     r7,-8(r4)
+       lwz     r8,-12(r4)
+       lwzu    r9,-16(r4)
+       stw     r6,-4(r3)
+       stw     r7,-8(r3)
+       stw     r8,-12(r3)
+       stwu    r9,-16(r3)
+       bdz     L(end_memcpy)
+       lwz     r6,-4(r4)
+       lwz     r7,-8(r4)
+       lwz     r8,-12(r4)
+       lwzu    r9,-16(r4)
+       stw     r6,-4(r3)
+       stw     r7,-8(r3)
+       stw     r8,-12(r3)
+       stwu    r9,-16(r3)
+       bdnz    L(word4_count_loop_no_dcbt)
+
+L(end_memcpy):
+       mr       r3,r0
+       blr
+END (memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/memset.S b/libc/sysdeps/powerpc/powerpc32/405/memset.S
new file mode 100644
index 000000000..b73dba887
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/memset.S
@@ -0,0 +1,152 @@
+/* Optimized memset for PowerPC405,440,464 (32-byte cacheline).
+   Copyright (C) 2012-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memset
+
+       r3:destination address and return address
+       r4:source integer to copy
+       r5:byte count
+       r11:sources integer to copy in all 32 bits of reg
+       r12:temp return address
+
+       Save return address in r12
+       If destinationn is unaligned and count is greater tha 255 bytes
+       set 0-3 bytes to make destination aligned
+       If count is greater tha 255 bytes and setting zero to memory
+       use dbcz to set memeory when we can
+       otherwsie do the follwoing
+       If 16 or more words to set we use 16 word copy loop.
+       Finaly we set 0-15 extra bytes with string store. */
+
+EALIGN (memset, 5, 0)
+       rlwinm  r11,r4,0,24,31
+       rlwimi  r11,r4,8,16,23
+       rlwimi  r11,r11,16,0,15
+       addi    r12,r3,0
+       cmpwi   r5,0x00FF
+       ble     L(preword8_count_loop)
+       cmpwi   r4,0x00
+       beq     L(use_dcbz)
+       neg     r6,r3
+       clrlwi. r6,r6,30
+       beq     L(preword8_count_loop)
+       addi    r8,0,1
+       mtctr   r6
+       subi    r3,r3,1
+
+L(unaligned_bytecopy_loop):
+       stbu    r11,0x1(r3)
+       subf.   r5,r8,r5
+       beq     L(end_memset)
+       bdnz    L(unaligned_bytecopy_loop)
+       addi    r3,r3,1
+
+L(preword8_count_loop):
+       srwi.   r6,r5,4
+       beq     L(preword2_count_loop)
+       mtctr   r6
+       addi    r3,r3,-4
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+
+L(word8_count_loop_no_dcbt):
+       stwu    r8,4(r3)
+       stwu    r9,4(r3)
+       subi    r5,r5,0x10
+       stwu    r10,4(r3)
+       stwu    r11,4(r3)
+       bdnz    L(word8_count_loop_no_dcbt)
+       addi    r3,r3,4
+
+L(preword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+
+L(end_memset):
+       addi    r3,r12,0
+       blr
+
+L(use_dcbz):
+       neg     r6,r3
+       clrlwi. r7,r6,28
+       beq     L(skip_string_loop)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       subf    r5,r7,r5
+       mtxer   r7
+       stswx   r8,0,r3
+       add     r3,r3,r7
+
+L(skip_string_loop):
+       clrlwi  r8,r6,27
+       srwi.   r8,r8,4
+       beq     L(dcbz_pre_loop)
+       mtctr   r8
+
+L(word_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(word_loop)
+
+L(dcbz_pre_loop):
+       srwi    r6,r5,5
+       mtctr   r6
+       addi    r7,0,0
+
+L(dcbz_loop):
+       dcbz    r3,r7
+       addi    r3,r3,0x20
+       subi    r5,r5,0x20
+       bdnz    L(dcbz_loop)
+       srwi.   r6,r5,4
+       beq     L(postword2_count_loop)
+       mtctr   r6
+
+L(postword8_count_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(postword8_count_loop)
+
+L(postword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+       b       L(end_memset)
+END (memset)
+libc_hidden_builtin_def (memset)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strcmp.S b/libc/sysdeps/powerpc/powerpc32/405/strcmp.S
new file mode 100644
index 000000000..c0b21907b
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strcmp.S
@@ -0,0 +1,134 @@
+/* Optimized strcmp implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strcmp
+
+       Register Use
+       r0:temp return equality
+       r3:source1 address, return equality
+       r4:source2 address
+
+       Implementation description
+       Check 2 words from src1 and src2. If unequal jump to end and
+       return src1 > src2 or src1 < src2.
+       If null check bytes before null and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If src1 = src2 and no null, repeat. */
+
+EALIGN (strcmp,5,0)
+       neg     r7,r3
+       clrlwi  r7,r7,20
+       neg     r8,r4
+       clrlwi  r8,r8,20
+       srwi.   r7,r7,5
+       beq     L(byte_loop)
+       srwi.   r8,r8,5
+       beq     L(byte_loop)
+       cmplw   r7,r8
+       mtctr   r7
+       ble     L(big_loop)
+       mtctr   r8
+
+L(big_loop):
+       lwz     r5,0(r3)
+       lwz     r6,4(r3)
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       lwz     r5,8(r3)
+       lwz     r6,12(r3)
+       lwz     r8,8(r4)
+       lwz     r9,12(r4)
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       lwz     r5,16(r3)
+       lwz     r6,20(r3)
+       lwz     r8,16(r4)
+       lwz     r9,20(r4)
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       lwz     r5,24(r3)
+       lwz     r6,28(r3)
+       addi    r3,r3,0x20
+       lwz     r8,24(r4)
+       lwz     r9,28(r4)
+       addi    r4,r4,0x20
+       dlmzb.  r12,r5,r6
+       bne     L(end_check)
+       cmplw   r5,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       bdnz    L(big_loop)
+       b       L(byte_loop)
+
+L(end_check):
+       subfic  r12,r12,4
+       blt     L(end_check2)
+       rlwinm  r12,r12,3,0,31
+       srw     r5,r5,r12
+       srw     r8,r8,r12
+       cmplw   r5,r8
+       bne     L(st1)
+       b       L(end_strcmp)
+
+L(end_check2):
+       addi    r12,r12,4
+       cmplw   r5,r8
+       rlwinm  r12,r12,3,0,31
+       bne     L(st1)
+       srw     r6,r6,r12
+       srw     r9,r9,r12
+       cmplw   r6,r9
+       bne     L(st1)
+
+L(end_strcmp):
+       addi    r3,r0,0
+       blr
+
+L(st1):
+       mfcr    r3
+       blr
+
+L(byte_loop):
+       lbz     r5,0(r3)
+       addi    r3,r3,1
+       lbz     r6,0(r4)
+       addi    r4,r4,1
+       cmplw   r5,r6
+       bne     L(st1)
+       cmpwi   r5,0
+       beq     L(end_strcmp)
+       b       L(byte_loop)
+END (strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strcpy.S b/libc/sysdeps/powerpc/powerpc32/405/strcpy.S
new file mode 100644
index 000000000..d7c84569d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strcpy.S
@@ -0,0 +1,107 @@
+/* Optimized strcpy implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strcpy
+
+       Register Use
+       r3:destination and return address
+       r4:source address
+       r10:temp destination address
+
+       Implementation description
+       Loop by checking 2 words at a time, with dlmzb. Check if there is a null
+       in the 2 words. If there is a null jump to end checking to determine
+       where in the last 8 bytes it is. Copy the appropriate bytes of the last
+       8 according to the null position. */
+
+EALIGN (strcpy, 5, 0)
+       neg     r7,r4
+       subi    r4,r4,1
+       clrlwi. r8,r7,29
+       subi    r10,r3,1
+       beq     L(pre_word8_loop)
+       mtctr   r8
+
+L(loop):
+       lbzu    r5,0x01(r4)
+       cmpi    cr5,r5,0x0
+       stbu    r5,0x01(r10)
+       beq     cr5,L(end_strcpy)
+       bdnz    L(loop)
+
+L(pre_word8_loop):
+       subi    r4,r4,3
+       subi    r10,r10,3
+
+L(word8_loop):
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       lwzu    r5,0x04(r4)
+       lwzu    r6,0x04(r4)
+       dlmzb.  r11,r5,r6
+       bne     L(byte_copy)
+       stwu    r5,0x04(r10)
+       stwu    r6,0x04(r10)
+       b       L(word8_loop)
+
+L(last_bytes_copy):
+       stwu    r5,0x04(r10)
+       subi    r11,r11,4
+       mtctr   r11
+       addi    r10,r10,3
+       subi    r4,r4,1
+
+L(last_bytes_copy_loop):
+       lbzu    r5,0x01(r4)
+       stbu    r5,0x01(r10)
+       bdnz    L(last_bytes_copy_loop)
+       blr
+
+L(byte_copy):
+       blt     L(last_bytes_copy)
+       mtctr   r11
+       addi    r10,r10,3
+       subi    r4,r4,5
+
+L(last_bytes_copy_loop2):
+       lbzu    r5,0x01(r4)
+       stbu    r5,0x01(r10)
+       bdnz    L(last_bytes_copy_loop2)
+
+L(end_strcpy):
+       blr
+END (strcpy)
+libc_hidden_builtin_def (strcpy)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strlen.S b/libc/sysdeps/powerpc/powerpc32/405/strlen.S
new file mode 100644
index 000000000..77d22ea67
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strlen.S
@@ -0,0 +1,75 @@
+/* Optimized strlen implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strlen
+
+       Register Use
+       r3:source address and return length of string
+       r4:byte counter
+
+       Implementation description
+       Load 2 words at a time and count bytes, if we find null we subtract one from
+       the count and return the count value. We need to subtract one because
+       we don't count the null character as a byte. */
+
+EALIGN (strlen,5,0)
+       neg     r7,r3
+       clrlwi. r8,r7,29
+       addi    r4,0,0
+       beq     L(byte_count_loop)
+       mtctr   r8
+
+L(loop):
+       lbz     r5,0(r3)
+       cmpi    cr5,r5,0x0
+       addi    r3,r3,0x1
+       addi    r4,r4,0x1
+       beq     cr5,L(end_strlen)
+       bdnz    L(loop)
+
+L(byte_count_loop):
+       lwz     r5,0(r3)
+       lwz     r6,4(r3)
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       lwz     r5,8(r3)
+       lwz     r6,12(r3)
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       lwz     r5,16(r3)
+       lwz     r6,20(r3)
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       lwz     r5,24(r3)
+       lwz     r6,28(r3)
+       addi    r3,r3,0x20
+       dlmzb.  r12,r5,r6
+       add     r4,r4,r12
+       bne     L(end_strlen)
+       b       L(byte_count_loop)
+
+L(end_strlen):
+       addi    r3,r4,-1
+       blr
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/405/strncmp.S b/libc/sysdeps/powerpc/powerpc32/405/strncmp.S
new file mode 100644
index 000000000..3e2ba5f85
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/405/strncmp.S
@@ -0,0 +1,128 @@
+/* Optimized strncmp implementation for PowerPC476.
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* strncmp
+
+       Register Use
+       r0:temp return equality
+       r3:source1 address, return equality
+       r4:source2 address
+       r5:byte count
+
+       Implementation description
+       Touch in 3 lines of D-cache.
+       If source1 or source2 is unaligned copy 0-3 bytes to make source1 aligned
+       Check 2 words from src1 and src2. If unequal jump to end and
+       return src1 > src2 or src1 < src2.
+       If null check bytes before null and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If count = zero check bytes before zero counter and then jump to end and
+       return src1 > src2, src1 < src2 or src1 = src2.
+       If src1 = src2 and no null, repeat. */
+
+EALIGN (strncmp,5,0)
+       neg     r7,r3
+       clrlwi  r7,r7,20
+       neg     r8,r4
+       clrlwi  r8,r8,20
+       srwi.   r7,r7,3
+       beq     L(prebyte_count_loop)
+       srwi.   r8,r8,3
+       beq     L(prebyte_count_loop)
+       cmplw   r7,r8
+       mtctr   r7
+       ble     L(preword2_count_loop)
+       mtctr   r8
+
+L(preword2_count_loop):
+       srwi.   r6,r5,3
+       beq     L(prebyte_count_loop)
+       mfctr   r7
+       cmplw   r6,r7
+       bgt     L(set_count_loop)
+       mtctr   r6
+       clrlwi  r5,r5,29
+
+L(word2_count_loop):
+       lwz     r10,0(r3)
+       lwz     r6,4(r3)
+       addi    r3,r3,0x08
+       lwz     r8,0(r4)
+       lwz     r9,4(r4)
+       addi    r4,r4,0x08
+       dlmzb.  r12,r10,r6
+       bne     L(end_check)
+       cmplw   r10,r8
+       bne     L(st1)
+       cmplw   r6,r9
+       bne     L(st1)
+       bdnz    L(word2_count_loop)
+
+L(prebyte_count_loop):
+       addi    r5,r5,1
+       mtctr   r5
+       bdz     L(end_strncmp)
+
+L(byte_count_loop):
+       lbz     r6,0(r3)
+       addi    r3,r3,1
+       lbz     r7,0(r4)
+       addi    r4,r4,1
+       cmplw   r6,r7
+       bne     L(st1)
+       cmpwi   r6,0
+       beq     L(end_strncmp)
+       bdnz    L(byte_count_loop)
+       b       L(end_strncmp)
+
+L(set_count_loop):
+       slwi    r7,r7,3
+       subf    r5,r7,r5
+       b       L(word2_count_loop)
+
+L(end_check):
+       subfic  r12,r12,4
+       blt     L(end_check2)
+       rlwinm  r12,r12,3,0,31
+       srw     r10,r10,r12
+       srw     r8,r8,r12
+       cmplw   r10,r8
+       bne     L(st1)
+       b       L(end_strncmp)
+
+L(end_check2):
+       addi    r12,r12,4
+       cmplw   r10,r8
+       rlwinm  r12,r12,3,0,31
+       bne     L(st1)
+       srw     r6,r6,r12
+       srw     r9,r9,r12
+       cmplw   r6,r9
+       bne     L(st1)
+
+L(end_strncmp):
+       addi    r3,r0,0
+       blr
+
+L(st1):
+       mfcr    r3
+       blr
+END (strncmp)
+libc_hidden_builtin_def (strncmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/440/Implies b/libc/sysdeps/powerpc/powerpc32/440/Implies
new file mode 100644
index 000000000..70c0d2eda
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/440/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/405/fpu
+powerpc/powerpc32/405
diff --git a/libc/sysdeps/powerpc/powerpc32/464/Implies b/libc/sysdeps/powerpc/powerpc32/464/Implies
new file mode 100644
index 000000000..c3e52c550
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/464/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/440/fpu
+powerpc/powerpc32/440
diff --git a/libc/sysdeps/powerpc/powerpc32/476/Implies b/libc/sysdeps/powerpc/powerpc32/476/Implies
new file mode 100644
index 000000000..2829f9cca
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/476/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/464/fpu
+powerpc/powerpc32/464
diff --git a/libc/sysdeps/powerpc/powerpc32/476/memset.S b/libc/sysdeps/powerpc/powerpc32/476/memset.S
new file mode 100644
index 000000000..48c21d620
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/476/memset.S
@@ -0,0 +1,152 @@
+/* Optimized memset for PowerPC476 (128-byte cacheline).
+   Copyright (C) 2010-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* memset
+
+       r3:destination address and return address
+       r4:source integer to copy
+       r5:byte count
+       r11:sources integer to copy in all 32 bits of reg
+       r12:temp return address
+
+       Save return address in r12
+       If destinationn is unaligned and count is greater tha 255 bytes
+       set 0-3 bytes to make destination aligned
+       If count is greater tha 255 bytes and setting zero to memory
+       use dbcz to set memeory when we can
+       otherwsie do the follwoing
+       If 16 or more words to set we use 16 word copy loop.
+       Finaly we set 0-15 extra bytes with string store. */
+
+EALIGN (memset, 5, 0)
+       rlwinm  r11,r4,0,24,31
+       rlwimi  r11,r4,8,16,23
+       rlwimi  r11,r11,16,0,15
+       addi    r12,r3,0
+       cmpwi   r5,0x00FF
+       ble     L(preword8_count_loop)
+       cmpwi   r4,0x00
+       beq     L(use_dcbz)
+       neg     r6,r3
+       clrlwi. r6,r6,30
+       beq     L(preword8_count_loop)
+       addi    r8,0,1
+       mtctr   r6
+       subi    r3,r3,1
+
+L(unaligned_bytecopy_loop):
+       stbu    r11,0x1(r3)
+       subf.   r5,r8,r5
+       beq     L(end_memset)
+       bdnz    L(unaligned_bytecopy_loop)
+       addi    r3,r3,1
+
+L(preword8_count_loop):
+       srwi.   r6,r5,4
+       beq     L(preword2_count_loop)
+       mtctr   r6
+       addi    r3,r3,-4
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+
+L(word8_count_loop_no_dcbt):
+       stwu    r8,4(r3)
+       stwu    r9,4(r3)
+       subi    r5,r5,0x10
+       stwu    r10,4(r3)
+       stwu    r11,4(r3)
+       bdnz    L(word8_count_loop_no_dcbt)
+       addi    r3,r3,4
+
+L(preword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+
+L(end_memset):
+       addi    r3,r12,0
+       blr
+
+L(use_dcbz):
+       neg     r6,r3
+       clrlwi. r7,r6,28
+       beq     L(skip_string_loop)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       subf    r5,r7,r5
+       mtxer   r7
+       stswx   r8,0,r3
+       add     r3,r3,r7
+
+L(skip_string_loop):
+       clrlwi  r8,r6,25
+       srwi.   r8,r8,4
+       beq     L(dcbz_pre_loop)
+       mtctr   r8
+
+L(word_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(word_loop)
+
+L(dcbz_pre_loop):
+       srwi    r6,r5,7
+       mtctr   r6
+       addi    r7,0,0
+
+L(dcbz_loop):
+       dcbz    r3,r7
+       addi    r3,r3,0x80
+       subi    r5,r5,0x80
+       bdnz    L(dcbz_loop)
+       srwi.   r6,r5,4
+       beq     L(postword2_count_loop)
+       mtctr   r6
+
+L(postword8_count_loop):
+       stw     r11,0(r3)
+       subi    r5,r5,0x10
+       stw     r11,4(r3)
+       stw     r11,8(r3)
+       stw     r11,12(r3)
+       addi    r3,r3,0x10
+       bdnz    L(postword8_count_loop)
+
+L(postword2_count_loop):
+       clrlwi. r7,r5,28
+       beq     L(end_memset)
+       mr      r8,r11
+       mr      r9,r11
+       mr      r10,r11
+       mtxer   r7
+       stswx   r8,0,r3
+       b       L(end_memset)
+END (memset)
+libc_hidden_builtin_def (memset)
diff --git a/libc/sysdeps/powerpc/powerpc32/Makefile b/libc/sysdeps/powerpc/powerpc32/Makefile
index 64f79003a..cf620c826 100644
--- a/libc/sysdeps/powerpc/powerpc32/Makefile
+++ b/libc/sysdeps/powerpc/powerpc32/Makefile
@@ -1,8 +1,12 @@
 # Powerpc32 specific build options.
 
-ifeq ($(with-fp),no)
-+cflags += -msoft-float
-sysdep-LDFLAGS += -msoft-float
+# Some Powerpc32 variants assume soft-fp is the default even though there is
+# an fp variant so provide -mhard-float if --with-fp is explicitly passed.
+
+ifeq ($(with-fp),yes)
++cflags += -mhard-float
+ASFLAGS += -mhard-float
+sysdep-LDFLAGS += -mhard-float
 endif
 
 ifeq ($(subdir),gmon)
diff --git a/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
index 787447363..df1d5195f 100644
--- a/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/__longjmp-common.S
@@ -24,6 +24,12 @@
 # include <jmpbuf-offsets.h>
 #endif
 
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+# define LOAD_GP(N)	evldd r##N,((JB_FPRS+((N)-14)*2)*4)(r3)
+#else
+# define LOAD_GP(N)	lwz r##N,((JB_GPRS+(N)-14)*4)(r3)
+#endif
+
 ENTRY (__longjmp)
 
 #if defined PTR_DEMANGLE || defined CHECK_SP
@@ -39,13 +45,13 @@ ENTRY (__longjmp)
 	lwz r1,(JB_GPR1*4)(r3)
 #endif
 	lwz r0,(JB_LR*4)(r3)
-	lwz r14,((JB_GPRS+0)*4)(r3)
-	lwz r15,((JB_GPRS+1)*4)(r3)
-	lwz r16,((JB_GPRS+2)*4)(r3)
-	lwz r17,((JB_GPRS+3)*4)(r3)
-	lwz r18,((JB_GPRS+4)*4)(r3)
-	lwz r19,((JB_GPRS+5)*4)(r3)
-	lwz r20,((JB_GPRS+6)*4)(r3)
+	LOAD_GP (14)
+	LOAD_GP (15)
+	LOAD_GP (16)
+	LOAD_GP (17)
+	LOAD_GP (18)
+	LOAD_GP (19)
+	LOAD_GP (20)
 #ifdef PTR_DEMANGLE
 # ifndef CHECK_SP
 	PTR_DEMANGLE3 (r1, r24, r25)
@@ -53,19 +59,19 @@ ENTRY (__longjmp)
 	PTR_DEMANGLE2 (r0, r25)
 #endif
 	mtlr r0
-	lwz r21,((JB_GPRS+7)*4)(r3)
-	lwz r22,((JB_GPRS+8)*4)(r3)
+	LOAD_GP (21)
+	LOAD_GP (22)
 	lwz r0,(JB_CR*4)(r3)
-	lwz r23,((JB_GPRS+9)*4)(r3)
-	lwz r24,((JB_GPRS+10)*4)(r3)
-	lwz r25,((JB_GPRS+11)*4)(r3)
+	LOAD_GP (23)
+	LOAD_GP (24)
+	LOAD_GP (25)
 	mtcrf 0xFF,r0
-	lwz r26,((JB_GPRS+12)*4)(r3)
-	lwz r27,((JB_GPRS+13)*4)(r3)
-	lwz r28,((JB_GPRS+14)*4)(r3)
-	lwz r29,((JB_GPRS+15)*4)(r3)
-	lwz r30,((JB_GPRS+16)*4)(r3)
-	lwz r31,((JB_GPRS+17)*4)(r3)
+	LOAD_GP (26)
+	LOAD_GP (27)
+	LOAD_GP (28)
+	LOAD_GP (29)
+	LOAD_GP (30)
+	LOAD_GP (31)
 	mr r3,r4
 	blr
 END (__longjmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S b/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
index 95e8a5aa1..ad2b5ffdb 100644
--- a/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/bsd-_setjmp.S
@@ -30,7 +30,7 @@ libc_hidden_def (_setjmp)
 /* Build a versioned object for libc.  */
 
 # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx_setjmp, _setjmp, GLIBC_2_0);
 
 ENTRY (__novmx_setjmp)
 	li r4,0			/* Set second argument to 0.  */
@@ -39,7 +39,7 @@ END (__novmx_setjmp)
 libc_hidden_def (__novmx_setjmp)
 # endif /* defined SHARED  && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) */
 
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
 /* __GI__setjmp prototype is needed for ntpl i.e. _setjmp is defined
    as a libc_hidden_proto & is used in sysdeps/generic/libc-start.c
    if HAVE_CLEANUP_JMP_BUF is defined */
diff --git a/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S b/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
index 1113ea533..5e1e860d8 100644
--- a/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/bsd-setjmp.S
@@ -26,7 +26,7 @@ ENTRY (__novmxsetjmp)
 	b __novmx__sigsetjmp@local
 END (__novmxsetjmp)
 strong_alias (__novmxsetjmp, __novmx__setjmp)
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.0)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_0)
 
 #endif  /* defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) ) */
 
@@ -36,4 +36,4 @@ ENTRY (__vmxsetjmp)
 END (__vmxsetjmp)
 strong_alias (__vmxsetjmp, __vmx__setjmp)
 strong_alias (__vmx__setjmp, __setjmp)
-default_symbol_version (__vmxsetjmp,setjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
diff --git a/libc/sysdeps/powerpc/powerpc32/dl-machine.c b/libc/sysdeps/powerpc/powerpc32/dl-machine.c
index 3e7202d86..aba361856 100644
--- a/libc/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/libc/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -416,6 +416,12 @@ __process_machine_rela (struct link_map *map,
 			Elf32_Addr const finaladdr,
 			int rinfo)
 {
+  union unaligned
+    {
+      uint16_t u2;
+      uint32_t u4;
+    } __attribute__((__packed__));
+
   switch (rinfo)
     {
     case R_PPC_NONE:
@@ -432,10 +438,7 @@ __process_machine_rela (struct link_map *map,
       return;
 
     case R_PPC_UADDR32:
-      ((char *) reloc_addr)[0] = finaladdr >> 24;
-      ((char *) reloc_addr)[1] = finaladdr >> 16;
-      ((char *) reloc_addr)[2] = finaladdr >> 8;
-      ((char *) reloc_addr)[3] = finaladdr;
+      ((union unaligned *) reloc_addr)->u4 = finaladdr;
       break;
 
     case R_PPC_ADDR24:
@@ -453,8 +456,7 @@ __process_machine_rela (struct link_map *map,
     case R_PPC_UADDR16:
       if (__builtin_expect (finaladdr > 0x7fff && finaladdr < 0xffff8000, 0))
 	_dl_reloc_overflow (map,  "R_PPC_UADDR16", reloc_addr, refsym);
-      ((char *) reloc_addr)[0] = finaladdr >> 8;
-      ((char *) reloc_addr)[1] = finaladdr;
+      ((union unaligned *) reloc_addr)->u2 = finaladdr;
       break;
 
     case R_PPC_ADDR16_LO:
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile
new file mode 100644
index 000000000..adf556870
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/Makefile
@@ -0,0 +1,9 @@
+ifeq ($(subdir),math)
+libm-routines += fexcepts_to_spe fexcepts_from_spe
+libm-routines += fexcepts_to_prctl fexcepts_from_prctl
+libm-routines += fe_note_change
+endif
+
+ifeq ($(subdir),soft-fp)
+sysdep_routines += fraiseexcept-soft
+endif
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c
new file mode 100644
index 000000000..92a7dd1e0
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fclrexcpt.c
@@ -0,0 +1,53 @@
+/* Clear given exceptions in current floating-point environment.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+#undef feclearexcept
+int
+__feclearexcept (int excepts)
+{
+  unsigned int fpescr;
+  int excepts_spe = __fexcepts_to_spe (excepts);
+
+  /* Get the current state.  */
+  fpescr = fegetenv_register ();
+
+  /* Clear the relevant bits.  */
+  fpescr &= ~excepts_spe;
+
+  /* Put the new state in effect.  */
+  fesetenv_register (fpescr);
+
+  /* Let the kernel know if the "invalid" or "underflow" bit was
+     cleared.  */
+  if (excepts & (FE_INVALID | FE_UNDERFLOW))
+    __fe_note_change ();
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feclearexcept, __old_feclearexcept)
+compat_symbol (libm, __old_feclearexcept, feclearexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feclearexcept, feclearexcept)
+versioned_symbol (libm, __feclearexcept, feclearexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/x86_64/multiarch/strend-sse4.S b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c
index c5a7ae28a..43a570626 100644
--- a/libc/sysdeps/x86_64/multiarch/strend-sse4.S
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fe_note_change.c
@@ -1,6 +1,5 @@
-/* Return the pointer to the end of string, using SSE4.2
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
+/* Note a change to floating-point exceptions.
+   Copyright (C) 2013 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,32 +16,24 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <fenv_libc.h>
 #include <sysdep.h>
-#include "asm-syntax.h"
-
-	.section .text.sse4.2,"ax",@progbits
-ENTRY (__strend_sse4)
-	pxor	%xmm2, %xmm2
-	movq	%rdi, %rcx
-	andq	$~15, %rdi
-	movdqa	%xmm2, %xmm1
-	pcmpeqb	(%rdi), %xmm2
-	orl	$0xffffffff, %esi
-	subq	%rdi, %rcx
-	shll	%cl, %esi
-	pmovmskb %xmm2, %edx
-	andl	%esi, %edx
-	jnz	1f
-
-2:	pcmpistri $0x08, 16(%rdi), %xmm1
-	leaq	16(%rdi), %rdi
-	jnz	2b
-
-	leaq	(%rdi,%rcx), %rax
-	ret
-
-1:	bsfl	%edx, %eax
-	addq	%rdi, %rax
-	ret
-
-END (__strend_sse4)
+#include <sys/prctl.h>
+
+/* Inform the kernel of a change to floating-point exceptions.  */
+
+void
+__fe_note_change (void)
+{
+  int pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return;
+  if ((pflags & PR_FP_EXC_SW_ENABLE) == 0)
+    INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+		      pflags | PR_FP_EXC_SW_ENABLE);
+}
+
+libm_hidden_def (__fe_note_change)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c
new file mode 100644
index 000000000..7cc963c01
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fedisblxcpt.c
@@ -0,0 +1,54 @@
+/* Disable floating-point exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+fedisableexcept (int excepts)
+{
+  int result = 0, pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* Save old enable bits.  */
+  result = __fexcepts_from_prctl (pflags);
+
+  pflags &= ~__fexcepts_to_prctl (excepts);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			pflags | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* If disabling signals for "inexact", also disable trapping to the
+     kernel.  */
+  if ((excepts & FE_INEXACT) != 0)
+    {
+      unsigned long fpescr;
+
+      fpescr = fegetenv_register ();
+      fpescr &= ~SPEFSCR_FINXE;
+      fesetenv_register (fpescr);
+    }
+
+  return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c
new file mode 100644
index 000000000..133dde7b3
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feenablxcpt.c
@@ -0,0 +1,54 @@
+/* Enable floating-point exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+feenableexcept (int excepts)
+{
+  unsigned int result = 0, pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* Save old enable bits.  */
+  result = __fexcepts_from_prctl (pflags);
+
+  pflags |= __fexcepts_to_prctl (excepts);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			pflags | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* If enabling signals for "inexact", also enable trapping to the
+     kernel.  */
+  if ((excepts & FE_INEXACT) != 0)
+    {
+      unsigned long fpescr;
+
+      fpescr = fegetenv_register ();
+      fpescr |= SPEFSCR_FINXE;
+      fesetenv_register (fpescr);
+    }
+
+  return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c
new file mode 100644
index 000000000..bfcbca2ad
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetenv.c
@@ -0,0 +1,47 @@
+/* Store current floating-point environment.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+__fegetenv (fenv_t *envp)
+{
+  fenv_union_t u;
+  INTERNAL_SYSCALL_DECL (err);
+  int r;
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  u.l[1] = fegetenv_register ();
+  *envp = u.fenv;
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetenv, __old_fegetenv)
+compat_symbol (libm, __old_fegetenv, fegetenv, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetenv, fegetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c
new file mode 100644
index 000000000..9c7afc74f
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetexcept.c
@@ -0,0 +1,36 @@
+/* Get floating-point exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+fegetexcept (void)
+{
+  int result = 0, pflags, r;
+  INTERNAL_SYSCALL_DECL (err);
+
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &pflags);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  result = __fexcepts_from_prctl (pflags);
+
+  return result;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c
new file mode 100644
index 000000000..f69e9a5bd
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fegetround.c
@@ -0,0 +1,29 @@
+/* Return current rounding direction.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+#undef fegetround
+int
+fegetround (void)
+{
+  unsigned long fpescr;
+
+  fpescr = fegetenv_register ();
+  return fpescr & 3;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c
new file mode 100644
index 000000000..bd05ebd3c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feholdexcpt.c
@@ -0,0 +1,57 @@
+/* Store current floating-point environment and clear exceptions.
+   e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+feholdexcept (fenv_t *envp)
+{
+  fenv_union_t u;
+  INTERNAL_SYSCALL_DECL (err);
+  int r;
+
+  /* Get the current state.  */
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_GET_FPEXC, &u.l[0]);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  u.l[1] = fegetenv_register ();
+  *envp = u.fenv;
+
+  /* Clear everything except for the rounding mode and trapping to the
+     kernel.  */
+  u.l[0] &= ~(PR_FP_EXC_DIV
+	      | PR_FP_EXC_OVF
+	      | PR_FP_EXC_UND
+	      | PR_FP_EXC_RES
+	      | PR_FP_EXC_INV);
+  u.l[1] &= SPEFSCR_FRMC | (SPEFSCR_ALL_EXCEPT_ENABLE & ~SPEFSCR_FINXE);
+
+  /* Put the new state in effect.  */
+  fesetenv_register (u.l[1]);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			u.l[0] | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  return 0;
+}
+libm_hidden_def (feholdexcept)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c
new file mode 100644
index 000000000..3a85f1810
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_const.c
@@ -0,0 +1,41 @@
+/* Constant floating-point environments for e500.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The use of "unsigned long long" as the type to define the
+   bit-pattern explicitly, rather than the type "double" used in
+   <bits/fenv.h>, means that we cannot include <fenv_libc.h> here to
+   get the enum constants for the SPEFSCR bits to enable
+   exceptions.  */
+
+#include <sys/prctl.h>
+
+/* If the default argument is used we use this value.  */
+const unsigned long long __fe_dfl_env __attribute__ ((aligned (8))) =
+  0x3cULL;
+
+/* Floating-point environment where none of the exceptions are masked.  */
+const unsigned long long __fe_enabled_env __attribute__ ((aligned (8))) =
+  (((unsigned long long) (PR_FP_EXC_DIV
+			  | PR_FP_EXC_OVF
+			  | PR_FP_EXC_UND
+			  | PR_FP_EXC_RES
+			  | PR_FP_EXC_INV)) << 32) | 0x7cULL;
+
+/* Non-IEEE mode.  */
+const unsigned long long __fe_nonieee_env __attribute__ ((aligned (8))) =
+  0x0ULL;
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h
new file mode 100644
index 000000000..96375808d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fenv_libc.h
@@ -0,0 +1,96 @@
+/* Internal libc stuff for floating point environment routines.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _FENV_LIBC_H
+#define _FENV_LIBC_H	1
+
+#include <fenv.h>
+
+int __feraiseexcept_spe (int);
+libm_hidden_proto (__feraiseexcept_spe)
+
+int __fexcepts_to_spe (int);
+libm_hidden_proto (__fexcepts_to_spe)
+
+int __fexcepts_from_spe (int);
+libm_hidden_proto (__fexcepts_from_spe)
+
+int __fexcepts_to_prctl (int);
+libm_hidden_proto (__fexcepts_to_prctl)
+
+int __fexcepts_from_prctl (int);
+libm_hidden_proto (__fexcepts_from_prctl)
+
+void __fe_note_change (void);
+libm_hidden_proto (__fe_note_change)
+
+/* Equivalent to fegetenv, but returns an unsigned int instead of
+   taking a pointer.  */
+#define fegetenv_register() \
+  ({ unsigned int fscr; asm volatile ("mfspefscr %0" : "=r" (fscr)); fscr; })
+
+/* Equivalent to fesetenv, but takes an unsigned int instead of a
+   pointer.  */
+#define fesetenv_register(fscr) \
+  ({ asm volatile ("mtspefscr %0" : : "r" (fscr)); })
+
+typedef union
+{
+  fenv_t fenv;
+  unsigned int l[2];
+} fenv_union_t;
+
+/* Definitions of all the SPEFSCR bit numbers.  */
+enum {
+  SPEFSCR_SOVH          = 0x80000000,
+  SPEFSCR_OVH           = 0x40000000,
+  SPEFSCR_FGH           = 0x20000000,
+  SPEFSCR_FXH           = 0x10000000,
+  SPEFSCR_FINVH         = 0x08000000,
+  SPEFSCR_FDBZH         = 0x04000000,
+  SPEFSCR_FUNFH         = 0x02000000,
+  SPEFSCR_FOVFH         = 0x01000000,
+  /* 2 unused bits.  */
+  SPEFSCR_FINXS         = 0x00200000,
+  SPEFSCR_FINVS         = 0x00100000,
+  SPEFSCR_FDBZS         = 0x00080000,
+  SPEFSCR_FUNFS         = 0x00040000,
+  SPEFSCR_FOVFS         = 0x00020000,
+  /* Combination of the exception bits.  */
+  SPEFSCR_ALL_EXCEPT    = 0x003e0000,
+  SPEFSCR_MODE          = 0x00010000,
+  SPEFSCR_SOV           = 0x00008000,
+  SPEFSCR_OV            = 0x00004000,
+  SPEFSCR_FG            = 0x00002000,
+  SPEFSCR_FX            = 0x00001000,
+  SPEFSCR_FINV          = 0x00000800,
+  SPEFSCR_FDBZ          = 0x00000400,
+  SPEFSCR_FUNF          = 0x00000200,
+  SPEFSCR_FOVF          = 0x00000100,
+  /* 1 unused bit.  */
+  SPEFSCR_FINXE         = 0x00000040,
+  SPEFSCR_FINVE         = 0x00000020,
+  SPEFSCR_FDBZE         = 0x00000010,
+  SPEFSCR_FUNFE         = 0x00000008,
+  SPEFSCR_FOVFE         = 0x00000004,
+  /* Combination of the exception trap enable bits.  */
+  SPEFSCR_ALL_EXCEPT_ENABLE = 0x0000007c,
+  SPEFSCR_FRMC          = 0x00000003
+};
+
+#endif /* fenv_libc.h */
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c
new file mode 100644
index 000000000..411e6be8d
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetenv.c
@@ -0,0 +1,49 @@
+/* Install given floating-point environment.  e500 version.
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sysdep.h>
+#include <sys/prctl.h>
+
+int
+__fesetenv (const fenv_t *envp)
+{
+  fenv_union_t u;
+  INTERNAL_SYSCALL_DECL (err);
+  int r;
+
+  u.fenv = *envp;
+
+  fesetenv_register (u.l[1]);
+  r = INTERNAL_SYSCALL (prctl, err, 2, PR_SET_FPEXC,
+			u.l[0] | PR_FP_EXC_SW_ENABLE);
+  if (INTERNAL_SYSCALL_ERROR_P (r, err))
+    return -1;
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetenv, __old_fesetenv)
+compat_symbol (libm, __old_fesetenv, fesetenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__fesetenv, fesetenv)
+versioned_symbol (libm, __fesetenv, fesetenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c
new file mode 100644
index 000000000..805008e0c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fesetround.c
@@ -0,0 +1,35 @@
+/* Set current rounding direction.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+fesetround (int round)
+{
+  unsigned long fpescr;
+
+  if ((unsigned int) round > 3)
+    return 1;
+
+  fpescr = fegetenv_register ();
+  fpescr = (fpescr & ~SPEFSCR_FRMC) | (round & 3);
+  fesetenv_register (fpescr);
+
+  return 0;
+}
+libm_hidden_def (fesetround)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c
new file mode 100644
index 000000000..505c92363
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/feupdateenv.c
@@ -0,0 +1,47 @@
+/* Install given floating-point environment and raise exceptions.
+   e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+  int exc;
+
+  /* Save the currently set exceptions.  */
+  exc = fegetenv_register () & SPEFSCR_ALL_EXCEPT;
+
+  /* Install new environment.  */
+  fesetenv (envp);
+
+  /* Raise (if appropriate) saved exceptions. */
+  __feraiseexcept_spe (exc);
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feupdateenv, __old_feupdateenv)
+compat_symbol (libm, __old_feupdateenv, feupdateenv, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feupdateenv, feupdateenv)
+versioned_symbol (libm, __feupdateenv, feupdateenv, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c
new file mode 100644
index 000000000..c094a04cb
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_prctl.c
@@ -0,0 +1,42 @@
+/* Convert floating-point exceptions from prctl form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sys/prctl.h>
+
+/* Convert EXCEPTS from prctl bits to FE_* form, returning the
+   converted value.  */
+
+int
+__fexcepts_from_prctl (int excepts)
+{
+  int result = 0;
+  if (excepts & PR_FP_EXC_OVF)
+    result |= FE_OVERFLOW;
+  if (excepts & PR_FP_EXC_UND)
+    result |= FE_UNDERFLOW;
+  if (excepts & PR_FP_EXC_INV)
+    result |= FE_INVALID;
+  if (excepts & PR_FP_EXC_DIV)
+    result |= FE_DIVBYZERO;
+  if (excepts & PR_FP_EXC_RES)
+    result |= FE_INEXACT;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_from_prctl)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c
new file mode 100644
index 000000000..3ec939d18
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_from_spe.c
@@ -0,0 +1,41 @@
+/* Convert floating-point exceptions from SPEFSCR form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+/* Convert EXCEPTS from SPEFSCR bits to FE_* form, returning the
+   converted value.  */
+
+int
+__fexcepts_from_spe (int excepts)
+{
+  int result = 0;
+  if (excepts & SPEFSCR_FINXS)
+    result |= FE_INEXACT;
+  if (excepts & SPEFSCR_FDBZS)
+    result |= FE_DIVBYZERO;
+  if (excepts & SPEFSCR_FUNFS)
+    result |= FE_UNDERFLOW;
+  if (excepts & SPEFSCR_FOVFS)
+    result |= FE_OVERFLOW;
+  if (excepts & SPEFSCR_FINVS)
+    result |= FE_INVALID;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_from_spe)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c
new file mode 100644
index 000000000..b9c51b125
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_prctl.c
@@ -0,0 +1,42 @@
+/* Convert floating-point exceptions to prctl form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <sys/prctl.h>
+
+/* Convert EXCEPTS from FE_* form to prctl bits, returning the
+   converted value.  */
+
+int
+__fexcepts_to_prctl (int excepts)
+{
+  int result = 0;
+  if (excepts & FE_INEXACT)
+    result |= PR_FP_EXC_RES;
+  if (excepts & FE_DIVBYZERO)
+    result |= PR_FP_EXC_DIV;
+  if (excepts & FE_UNDERFLOW)
+    result |= PR_FP_EXC_UND;
+  if (excepts & FE_OVERFLOW)
+    result |= PR_FP_EXC_OVF;
+  if (excepts & FE_INVALID)
+    result |= PR_FP_EXC_INV;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_to_prctl)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c
new file mode 100644
index 000000000..570934d15
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fexcepts_to_spe.c
@@ -0,0 +1,41 @@
+/* Convert floating-point exceptions to SPEFSCR form.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+/* Convert EXCEPTS from FE_* form to SPEFSCR bits, returning the
+   converted value.  */
+
+int
+__fexcepts_to_spe (int excepts)
+{
+  int result = 0;
+  if (excepts & FE_INEXACT)
+    result |= SPEFSCR_FINXS;
+  if (excepts & FE_DIVBYZERO)
+    result |= SPEFSCR_FDBZS;
+  if (excepts & FE_UNDERFLOW)
+    result |= SPEFSCR_FUNFS;
+  if (excepts & FE_OVERFLOW)
+    result |= SPEFSCR_FOVFS;
+  if (excepts & FE_INVALID)
+    result |= SPEFSCR_FINVS;
+  return result;
+}
+
+libm_hidden_def (__fexcepts_to_spe)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c
new file mode 100644
index 000000000..b01cadeff
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fgetexcptflg.c
@@ -0,0 +1,41 @@
+/* Store current representation for exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+  unsigned long fpescr;
+
+  /* Get the current state.  */
+  fpescr = fegetenv_register ();
+
+  *flagp = fpescr & SPEFSCR_ALL_EXCEPT;
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fegetexceptflag, __old_fegetexceptflag)
+compat_symbol (libm, __old_fegetexceptflag, fegetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fegetexceptflag, fegetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c
new file mode 100644
index 000000000..0aed72ff3
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcept-soft.c
@@ -0,0 +1,28 @@
+/* Raise given exceptions.  e500 version for use from soft-fp.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Aldy Hernandez <aldyh@redhat.com>, 2004.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+#include <libc-symbols.h>
+
+int __feraiseexcept_soft (int);
+libc_hidden_proto (__feraiseexcept_soft)
+
+#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_soft
+#include "spe-raise.c"
+libc_hidden_def (__feraiseexcept_soft)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
new file mode 100644
index 000000000..0eca9ffff
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fraiseexcpt.c
@@ -0,0 +1,40 @@
+/* Raise given exceptions.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+#define __FERAISEEXCEPT_INTERNAL __feraiseexcept_spe
+#include "spe-raise.c"
+
+libm_hidden_def (__feraiseexcept_spe)
+
+#undef feraiseexcept
+int
+__feraiseexcept (int excepts)
+{
+  return __feraiseexcept_spe (__fexcepts_to_spe (excepts));
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__feraiseexcept, __old_feraiseexcept)
+compat_symbol (libm, __old_feraiseexcept, feraiseexcept, GLIBC_2_1);
+#endif
+
+libm_hidden_ver (__feraiseexcept, feraiseexcept)
+versioned_symbol (libm, __feraiseexcept, feraiseexcept, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c
new file mode 100644
index 000000000..43f2d19d1
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/fsetexcptflg.c
@@ -0,0 +1,55 @@
+/* Set floating-point environment exception handling.  e500 version.
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__fesetexceptflag (const fexcept_t *flagp, int excepts)
+{
+  unsigned long old_spefscr, spefscr;
+  fexcept_t flag;
+  int excepts_spe = __fexcepts_to_spe (excepts);
+
+  /* Get the current state.  */
+  old_spefscr = fegetenv_register ();
+
+  /* Ignore exceptions not listed in 'excepts'.  */
+  flag = *flagp & excepts_spe;
+
+  /* Replace the exception status */
+  spefscr = (old_spefscr & ~excepts_spe) | flag;
+
+  /* Store the new status word (along with the rest of the environment).  */
+  fesetenv_register (spefscr);
+
+  /* If the state of the "invalid" or "underflow" flag has changed,
+     inform the kernel.  */
+  if (((spefscr ^ old_spefscr) & (SPEFSCR_FINVS | SPEFSCR_FUNFS)) != 0)
+    __fe_note_change ();
+
+  /* Success.  */
+  return 0;
+}
+
+#include <shlib-compat.h>
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_2)
+strong_alias (__fesetexceptflag, __old_fesetexceptflag)
+compat_symbol (libm, __old_fesetexceptflag, fesetexceptflag, GLIBC_2_1);
+#endif
+
+versioned_symbol (libm, __fesetexceptflag, fesetexceptflag, GLIBC_2_2);
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c
new file mode 100644
index 000000000..f4f547d5f
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/ftestexcept.c
@@ -0,0 +1,31 @@
+/* Test exception in current environment.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+fetestexcept (int excepts)
+{
+  unsigned long f;
+
+  /* Get the current state.  */
+  f = fegetenv_register ();
+
+  return __fexcepts_from_spe (f) & excepts;
+}
+libm_hidden_def (fetestexcept)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h
new file mode 100644
index 000000000..117e7331e
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/get-rounding-mode.h
@@ -0,0 +1,4 @@
+/* The generic version of get-rounding-mode.h using fpu_control.h, not
+   the one using the software rounding mode, is correct for e500.  */
+
+#include <sysdeps/generic/get-rounding-mode.h>
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S
new file mode 100644
index 000000000..823f748ba
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/s_fabsf.S
@@ -0,0 +1,27 @@
+/* Floating-point absolute value.  e500 version.
+   Copyright (C) 2004-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+ENTRY (__fabsf)
+/* float [r3] fabsf (float [r3] x) ;  */
+	efsabs r3,r3
+	blr
+END (__fabsf)
+
+weak_alias (__fabsf, fabsf)
diff --git a/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c
new file mode 100644
index 000000000..4394ddc7c
--- /dev/null
+++ b/libc/sysdeps/powerpc/powerpc32/e500/nofpu/spe-raise.c
@@ -0,0 +1,53 @@
+/* Raise given exceptions, given the SPEFSCR bits for those exceptions.
+   Copyright (C) 1997-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv_libc.h>
+
+int
+__FERAISEEXCEPT_INTERNAL (int excepts)
+{
+  unsigned long f;
+
+  f = fegetenv_register ();
+  f |= (excepts & SPEFSCR_ALL_EXCEPT);
+  fesetenv_register (f);
+
+  /* Force the operations that cause the exceptions.  */
+  if ((SPEFSCR_FINVS & excepts) != 0)
+    /* 0 / 0 */
+    asm volatile ("efsdiv %0,%0,%1" : : "r" (0), "r" (0));
+
+  if ((SPEFSCR_FDBZS & excepts) != 0)
+    /* 1.0 / 0.0 */
+    asm volatile ("efsdiv %0,%0,%1" : : "r" (1.0F), "r" (0));
+
+  if ((SPEFSCR_FOVFS & excepts) != 0)
+    /* Largest normalized number plus itself.  */
+    asm volatile ("efsadd %0,%0,%1" : : "r" (0x7f7fffff), "r" (0x7f7fffff));
+
+  if ((SPEFSCR_FUNFS & excepts) != 0)
+    /* Smallest normalized number times itself.  */
+    asm volatile ("efsmul %0,%0,%1" : : "r" (0x800000), "r" (0x800000));
+
+  if ((SPEFSCR_FINXS & excepts) != 0)
+    /* Smallest normalized minus 1.0 raises the inexact flag.  */
+    asm volatile ("efssub %0,%0,%1" : : "r" (0x00800000), "r" (1.0F));
+
+  /* Success.  */
+  return 0;
+}
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
index 9d34cd916..d02aa5754 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S
@@ -43,16 +43,16 @@ ENTRY (__longjmp)
 #   endif
 	mtlr    r6
 	cfi_same_value (lr)
-	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
 #  else
 	lwz     r5,_dl_hwcap@got(r5)
 	mtlr    r6
 	cfi_same_value (lr)
-	lwz     r5,4(r5)
+	lwz     r5,LOWORD(r5)
 #  endif
 # else
-	lis	r5,(_dl_hwcap+4)@ha
-	lwz     r5,(_dl_hwcap+4)@l(r5)
+	lis	r5,(_dl_hwcap+LOWORD)@ha
+	lwz     r5,(_dl_hwcap+LOWORD)@l(r5)
 # endif
 	andis.	r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	beq	L(no_vmx)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
index 96e50de37..27166c454 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/__longjmp.S
@@ -26,14 +26,14 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmx__longjmp,__longjmp,GLIBC_2.3.4);
+versioned_symbol (libc, __vmx__longjmp, __longjmp, GLIBC_2_3_4);
 # define __longjmp  __vmx__longjmp
 # include "__longjmp-common.S"
 
 # if defined SHARED && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
 #  define __NO_VMX__
 #  undef JB_SIZE
-symbol_version (__novmx__longjmp,__longjmp,GLIBC_2.0);
+compat_symbol (libc, __novmx__longjmp, __longjmp, GLIBC_2_0);
 #  undef __longjmp
 #  define __longjmp  __novmx__longjmp
 #  include "__longjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
index 840891f1c..1da24f492 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysign.S
@@ -29,7 +29,7 @@ ENTRY(__copysign)
 	stwu	r1,-16(r1)
 	cfi_adjust_cfa_offset (16)
 	stfd	fp2,8(r1)
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
 	cmpwi   r3,0
 	addi    r1,r1,16
 	cfi_adjust_cfa_offset (-16)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
index 4ec8389b5..2ad6de273 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_copysignl.S
@@ -30,7 +30,7 @@ ENTRY(__copysignl)
 	fmr	fp0,fp1
 	fabs	fp1,fp1
 	fcmpu	cr7,fp0,fp1
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
 	cmpwi	cr6,r3,0
 	addi	r1,r1,16
 	cfi_adjust_cfa_offset (-16)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
index 27881f8cc..249fda501 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_lrint.S
@@ -24,10 +24,10 @@ ENTRY (__lrint)
 	stwu	r1,-16(r1)
 	fctiw	fp13,fp1
 	stfd	fp13,8(r1)
-	nop	/* Insure the following load is in a different dispatch group */
+	nop	/* Ensure the following load is in a different dispatch group */
 	nop	/* to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,12(r1)
+	lwz	r3,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__lrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
index 92dc3787d..6309f864b 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_lround.S
@@ -67,7 +67,7 @@ ENTRY (__lround)
 	nop	/* Ensure the following load is in a different dispatch  */
 	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,12(r1)	/* Load return as integer.  */
+	lwz	r3,8+LOWORD(r1)	/* Load return as integer.  */
 .Lout:
 	addi	r1,r1,16
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index 2ed9ca7b4..8cff1563a 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -19,7 +19,7 @@
 #include <sysdep.h>
 
 	.section	.rodata.cst8,"aM",@progbits,8
-	.align	2
+	.align	3
 .LC0:	/* 2**23 */
 	.long 0x4b000000
 .LC1:	/* 0.5 */
@@ -60,7 +60,6 @@ ENTRY (__roundf )
 #ifdef SHARED
 	lfs	fp10,.LC1-.LC0(r9)
 #else
-	lis	r9,.LC1@ha
 	lfs	fp10,.LC1@l(r9)
 #endif
 	ble-	cr6,.L4
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
index 46ea2b00f..f3244060e 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp-common.S
@@ -94,14 +94,14 @@ ENTRY (__sigsetjmp)
 #   else
 	lwz     r5,_rtld_global_ro@got(r5)
 #   endif
-	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r5)
+	lwz     r5,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r5)
 #  else
 	lwz     r5,_dl_hwcap@got(r5)
-	lwz     r5,4(r5)
+	lwz     r5,LOWORD(r5)
 #  endif
 # else
-	lis	r6,(_dl_hwcap+4)@ha
-	lwz     r5,(_dl_hwcap+4)@l(r6)
+	lis	r6,(_dl_hwcap+LOWORD)@ha
+	lwz     r5,(_dl_hwcap+LOWORD)@l(r6)
 # endif
 	andis.	r5,r5,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	beq	L(no_vmx)
@@ -111,44 +111,43 @@ ENTRY (__sigsetjmp)
 	stw	r0,((JB_VRSAVE)*4)(3)
 	addi	r6,r5,16
 	beq+	L(aligned_save_vmx)
-	lvsr	v0,0,r5
-	vspltisb v1,-1         /* set v1 to all 1's */
-	vspltisb v2,0          /* set v2 to all 0's */
-	vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes on left = misalignment  */
 
+	lvsr	v0,0,r5
+	lvsl	v1,0,r5
+	addi	r6,r5,-16
 
-	/* Special case for v20 we need to preserve what is in save area below v20 before obliterating it */
-	lvx     v5,0,r5
-	vperm   v20,v20,v20,v0
-	vsel    v5,v5,v20,v3
-	vsel    v20,v20,v2,v3
-	stvx    v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+	addi	addgpr,addgpr,32;					 \
+	vperm	tmpvr,prevvr,savevr,shiftvr;				 \
+	stvx	tmpvr,0,savegpr
 
-#define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
-	addi    addgpr,addgpr,32; \
-	vperm   savevr,savevr,savevr,shiftvr; \
-	vsel    hivr,prev_savevr,savevr,maskvr; \
-	stvx    hivr,0,savegpr;
+	/*
+	 * We have to be careful not to corrupt the data below v20 and
+	 * above v31. To keep things simple we just rotate both ends in
+	 * the opposite direction to our main permute so we can use
+	 * the common macro.
+	 */
 
-	save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+	/* load and rotate data below v20 */
+	lvx	v2,0,r5
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+	save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+	save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+	save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+	save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+	save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+	save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+	save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+	save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+	save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+	save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+	save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+	/* load and rotate data above v31 */
+	lvx	v2,0,r6
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
 
-	/* Special case for r31 we need to preserve what is in save area above v31 before obliterating it */
-	addi    r5,r5,32
-	vperm   v31,v31,v31,v0
-	lvx     v4,0,r5
-	vsel    v5,v30,v31,v3
-	stvx    v5,0,r6
-	vsel    v4,v31,v4,v3
-	stvx    v4,0,r5
 	b	L(no_vmx)
 
 L(aligned_save_vmx):
diff --git a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
index 60cd35052..92acff1e6 100644
--- a/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/fpu/setjmp.S
@@ -26,7 +26,7 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
 # define __sigsetjmp __vmx__sigsetjmp
 # define __sigjmp_save __vmx__sigjmp_save
 # include "setjmp-common.S"
@@ -36,7 +36,7 @@ default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
 #  undef __sigsetjmp
 #  undef __sigjmp_save
 #  undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
 #  define __sigsetjmp __novmx__sigsetjmp
 #  define __sigjmp_save __novmx__sigjmp_save
 #  include "setjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/mcount.c b/libc/sysdeps/powerpc/powerpc32/mcount.c
index 0476bf61d..d8c063222 100644
--- a/libc/sysdeps/powerpc/powerpc32/mcount.c
+++ b/libc/sysdeps/powerpc/powerpc32/mcount.c
@@ -9,7 +9,7 @@
 /* __mcount_internal was added in glibc 2.15 with version GLIBC_PRIVATE,
    but it should have been put in version GLIBC_2.15.  Mark the
    GLIBC_PRIVATE version obsolete and add it to GLIBC_2.16 instead.  */
-default_symbol_version (___mcount_internal, __mcount_internal, GLIBC_2.16);
+versioned_symbol (libc, ___mcount_internal, __mcount_internal, GLIBC_2_16);
 
 #if SHLIB_COMPAT (libc, GLIBC_2_15, GLIBC_2_16)
 strong_alias (___mcount_internal, ___mcount_internal_private);
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
index 55b2850fd..e7a88feb4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrint.S
@@ -29,8 +29,8 @@ ENTRY (__llrint)
 	nop	/* Insure the following load is in a different dispatch group */
 	nop	/* to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
index cc80fcb02..da24ad38d 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llrintf.S
@@ -28,8 +28,8 @@ ENTRY (__llrintf)
 	nop	/* Insure the following load is in a different dispatch group */
 	nop	/* to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrintf)
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
index 631180f07..7246ca4d1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S
@@ -19,12 +19,10 @@
 #include <sysdep.h>
 #include <math_ldbl_opt.h>
 
- .section .rodata.cst12,"aM",@progbits,12
+ .section .rodata.cst8,"aM",@progbits,8
  .align 3
- .LC0:   /* 0x1.0000000000000p+52 == 2^52 */
-	.long 0x43300000
-	.long 0x00000000
-	.long 0x3f000000 /* Use this for 0.5  */
+ .LC0:	.long (52+127)<<23 /* 0x1p+52  */
+	.long (-1+127)<<23 /* 0.5  */
 
 	.section	".text"
 
@@ -57,12 +55,12 @@ ENTRY (__llround)
 	addi	r9,r9,.LC0-got_label@l
 	mtlr	r11
 	cfi_same_value (lr)
-	lfd	fp9,0(r9)
-	lfs	fp10,8(r9)
+	lfs	fp9,0(r9)
+	lfs	fp10,4(r9)
 #else
 	lis r9,.LC0@ha
-	lfd fp9,.LC0@l(r9)	/* Load 2^52 into fpr9.  */
-	lfs fp10,.LC0@l+8(r9)	/* Load 0.5 into fpr10.  */
+	lfs fp9,.LC0@l(r9)	/* Load 2^52 into fpr9.  */
+	lfs fp10,.LC0@l+4(r9)	/* Load 0.5 into fpr10.  */
 #endif
 	fabs	fp2,fp1		/* Get the absolute value of x.  */
 	fsub	fp12,fp10,fp10	/* Compute 0.0 into fpr12.  */
@@ -80,8 +78,8 @@ ENTRY (__llround)
 	nop
 	nop
 	nop
-	lwz	r4,12(r1)	/* Load return as integer.  */
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)	/* Load return as integer.  */
+	lwz	r4,8+LOWORD(r1)
 .Lout:
 	addi	r1,r1,16
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h b/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
index 7d6c96e9e..4e42374ea 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
+++ b/libc/sysdeps/powerpc/powerpc32/power4/hp-timing.h
@@ -87,18 +87,15 @@ typedef unsigned long long int hp_timing_t;
 
 #define HP_TIMING_NOW(Var)						\
   do {									\
-        union { long long ll; long ii[2]; } _var;			\
-	long tmp;							\
-        __asm__ __volatile__ (						\
-		"1:	mfspr	%0,269;"				\
-		"	mfspr	%1,268;"				\
-		"	mfspr	%2,269;"				\
-		"	cmpw	%0,%2;"					\
-		"	bne	1b;"					\
-		: "=r" (_var.ii[0]), "=r" (_var.ii[1]) , "=r" (tmp)	\
-		: : "cr0"						\
-		);							\
-	Var = _var.ll;							\
+    unsigned int hi, lo, tmp;						\
+    __asm__ __volatile__ ("1:	mfspr	%0,269;"			\
+			  "	mfspr	%1,268;"			\
+			  "	mfspr	%2,269;"			\
+			  "	cmpw	%0,%2;"				\
+			  "	bne	1b;"				\
+			  : "=&r" (hi), "=&r" (lo), "=&r" (tmp)		\
+			  : : "cr0");					\
+    Var = ((hp_timing_t) hi << 32) | lo;				\
   } while (0)
 
 
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
index 9a455a3c6..35e162667 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memcmp.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation for PowerPC64.
+/* Optimized strcmp implementation for PowerPC32.
    Copyright (C) 2003-2013 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -18,13 +18,14 @@
 
 #include <sysdep.h>
 
-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+/* int [r3] memcmp (const char *s1 [r3],
+		    const char *s2 [r4],
+		    size_t size [r5])  */
 
 	.machine power4
 EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,33 +36,32 @@ EALIGN (memcmp, 4, 0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP, rSTR2, rSTR1
+	xor	r0, rSTR2, rSTR1
 	cmplwi	cr6, rN, 0
 	cmplwi	cr1, rN, 12
-	clrlwi.	rTMP, rTMP, 30
-	clrlwi	rBITDIF, rSTR1, 30
-	cmplwi	cr5, rBITDIF, 0
+	clrlwi.	r0, r0, 30
+	clrlwi	r12, rSTR1, 30
+	cmplwi	cr5, r12, 0
 	beq-	cr6, L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
 /* If less than 8 bytes or not aligned, use the unaligned
    byte loop.  */
 	blt	cr1, L(bytealigned)
-        stwu    1,-64(1)
+	stwu	1, -64(r1)
 	cfi_adjust_cfa_offset(64)
-        stw     r31,48(1)
-	cfi_offset(31,(48-64))
-        stw     r30,44(1)
-	cfi_offset(30,(44-64))
+	stw	rWORD8, 48(r1)
+	cfi_offset(rWORD8, (48-64))
+	stw	rWORD7, 44(r1)
+	cfi_offset(rWORD7, (44-64))
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already word
+   of r12 to 0.  If r12 == 0 then we are already word
    aligned and can perform the word aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
@@ -70,74 +70,95 @@ EALIGN (memcmp, 4, 0)
    eliminate bits preceding the first byte.  Since we want to join the
    normal (word aligned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first word. This insures that the loop count is
+   versioning for the first word. This ensures that the loop count is
    correct and the first word (shifted) is in the expected register pair. */
-	.align 4
+	.align	4
 L(samealignment):
 	clrrwi	rSTR1, rSTR1, 2
 	clrrwi	rSTR2, rSTR2, 2
 	beq	cr5, L(Waligned)
-	add	rN, rN, rBITDIF
-	slwi	r11, rBITDIF, 3
-	srwi	rTMP, rN, 4	 /* Divide by 16 */
-	andi.	rBITDIF, rN, 12  /* Get the word remainder */
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the word remainder */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
 	lwz	rWORD2, 0(rSTR2)
-	cmplwi	cr1, rBITDIF, 8
+#endif
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
 	clrlwi	rN, rN, 30
 	beq	L(dPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(dPs3)
 	beq	cr1, L(dPs2)
 
 /* Remainder is 4 */
-	.align 3
+	.align	3
 L(dsP1):
-	slw	rWORD5, rWORD1, r11
-	slw	rWORD6, rWORD2, r11
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
 	cmplw	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(dPs2):
-	slw	rWORD5, rWORD1, r11
-	slw	rWORD6, rWORD2, r11
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
 	cmplw	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 4(rSTR1)
 	lwz	rWORD8, 4(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 12 */
-	.align 4
+	.align	4
 L(dPs3):
-	slw	rWORD3, rWORD1, r11
-	slw	rWORD4, rWORD2, r11
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD2, rWORD6
 	cmplw	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
 L(dPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	slw	rWORD1, rWORD1, r11
-	slw	rWORD2, rWORD2, r11
-	cmplw	cr0, rWORD1, rWORD2
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD2, rWORD6
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(Waligned):
-	andi.	rBITDIF, rN, 12  /* Get the word remainder */
-	srwi	rTMP, rN, 4	 /* Divide by 16 */
-	cmplwi	cr1, rBITDIF, 8
+	andi.	r12, rN, 12	/* Get the word remainder */
+	srwi	r0, rN, 4	/* Divide by 16 */
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
 	clrlwi	rN, rN, 30
 	beq	L(dP4)
@@ -145,177 +166,352 @@ L(Waligned):
 	beq	cr1, L(dP2)
 
 /* Remainder is 4 */
-	.align 4
+	.align	4
 L(dP1):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 0(rSTR1)
 	lwz	rWORD6, 0(rSTR2)
+#endif
 	cmplw	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP1e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
-	bne	cr5, L(dLcr5)
-	bne	cr0, L(dLcr0)
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
 
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 16(rSTR1)
 	lwzu	rWORD8, 16(rSTR2)
+#endif
 	bne	cr1, L(dLcr1)
 	cmplw	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
 	bne	cr6, L(dLcr6)
-        lwz     r30,44(1)
-        lwz     r31,48(1)
-	.align 3
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+	.align	3
 L(dP1x):
 	slwi.	r12, rN, 3
-	bne	cr5, L(dLcr5)
+	bne	cr5, L(dLcr5x)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP2):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 0(rSTR1)
 	lwz	rWORD6, 0(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 4(rSTR1)
 	lwz	rWORD8, 4(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 L(dP2e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 8(rSTR1)
 	lwz	rWORD2, 8(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 12(rSTR1)
 	lwz	rWORD4, 12(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
+#endif
 	bne	cr6, L(dLcr6)
 	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP2x):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 4(rSTR1)
 	lwz	rWORD4, 4(rSTR2)
-	cmplw	cr5, rWORD3, rWORD4
+#endif
+	cmplw	cr1, rWORD3, rWORD4
 	slwi.	r12, rN, 3
-	bne	cr6, L(dLcr6)
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
-	bne	cr5, L(dLcr5)
+#endif
+	bne	cr1, L(dLcr1x)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 12 */
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP3):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 0(rSTR1)
 	lwz	rWORD4, 0(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 L(dP3e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 4(rSTR1)
 	lwz	rWORD6, 4(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 8(rSTR1)
 	lwz	rWORD8, 8(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 12(rSTR1)
 	lwz	rWORD2, 12(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	bne	cr1, L(dLcr1)
 	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP3x):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 8(rSTR1)
 	lwz	rWORD2, 8(rSTR2)
-	cmplw	cr5, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	slwi.	r12, rN, 3
-	bne	cr1, L(dLcr1)
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
-	bne	cr6, L(dLcr6)
+#endif
+	bne	cr6, L(dLcr6x)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
-	bne	cr5, L(dLcr5)
-        lwz     1,0(1)
+	bne	cr7, L(dLcr7x)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
 	lwz	rWORD2, 0(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP4e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 4(rSTR1)
 	lwz	rWORD4, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 8(rSTR1)
 	lwz	rWORD6, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 12(rSTR1)
 	lwzu	rWORD8, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(dLoop):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	bne	cr6, L(dLcr6)
 L(dLoop1):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr5, L(dLcr5)
 L(dLoop2):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(dLoop3):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 16(rSTR1)
 	lwzu	rWORD8, 16(rSTR2)
+#endif
 	bne-	cr1, L(dLcr1)
-	cmplw	cr0, rWORD1, rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	bdnz+	L(dLoop)
 
 L(dL4):
@@ -325,7 +521,7 @@ L(dL4):
 	bne	cr5, L(dLcr5)
 	cmplw	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
 	bne	cr1, L(dLcr1)
 L(d24):
@@ -334,69 +530,82 @@ L(d14):
 	slwi.	r12, rN, 3
 	bne	cr5, L(dLcr5)
 L(d04):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
-        lwz     1,0(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  Since
    we are aligned it is safe to load the whole word, and use
-   shift right to eliminate bits beyond the compare length. */
+   shift right to eliminate bits beyond the compare length.  */
 L(d00):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	srw	rWORD1, rWORD1, rN
 	srw	rWORD2, rWORD2, rN
-        cmplw   rWORD1,rWORD2
-        li      rRTN,0
-        beqlr
-        li      rRTN,1
-        bgtlr
-        li      rRTN,-1
-        blr
-
-	.align 4
-L(dLcr0):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	sub	rRTN, rWORD1, rWORD2
+	blr
+
+	.align	4
+	cfi_adjust_cfa_offset(64)
+L(dLcr7):
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr7x):
 	li	rRTN, 1
-        lwz     1,0(1)
-	bgtlr	cr0
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr1):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr1x):
 	li	rRTN, 1
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr1
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr6):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr6x):
 	li	rRTN, 1
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr6
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr5):
-        lwz     r30,44(1)
-        lwz     r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
 L(dLcr5x):
 	li	rRTN, 1
-        lwz     1,0(1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr5
 	li	rRTN, -1
 	blr
 
-	.align 4
+	.align	4
 L(bytealigned):
-	cfi_adjust_cfa_offset(-64)
-	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	rN	/* Power4 wants mtctr 1st in dispatch group */
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
    to avoid pipe delays.  The dependent instruction latencies (load to
@@ -411,7 +620,7 @@ L(bytealigned):
 	lbz	rWORD1, 0(rSTR1)
 	lbz	rWORD2, 0(rSTR2)
 	bdz-	L(b11)
-	cmplw	cr0, rWORD1, rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	lbz	rWORD3, 1(rSTR1)
 	lbz	rWORD4, 1(rSTR2)
 	bdz-	L(b12)
@@ -419,11 +628,11 @@ L(bytealigned):
 	lbzu	rWORD5, 2(rSTR1)
 	lbzu	rWORD6, 2(rSTR2)
 	bdz-	L(b13)
-	.align 4
+	.align	4
 L(bLoop):
 	lbzu	rWORD1, 1(rSTR1)
 	lbzu	rWORD2, 1(rSTR2)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 
 	cmplw	cr6, rWORD5, rWORD6
 	bdz-	L(b3i)
@@ -432,7 +641,7 @@ L(bLoop):
 	lbzu	rWORD4, 1(rSTR2)
 	bne-	cr1, L(bLcr1)
 
-	cmplw	cr0, rWORD1, rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	bdz-	L(b2i)
 
 	lbzu	rWORD5, 1(rSTR1)
@@ -449,23 +658,23 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	bne-	cr1, L(bLcr1)
 	b	L(bx56)
-	.align 4
+	.align	4
 L(b2i):
 	bne-	cr6, L(bLcr6)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	b	L(bx34)
-	.align 4
+	.align	4
 L(b3i):
 	bne-	cr1, L(bLcr1)
 	bne-	cr6, L(bLcr6)
 	b	L(bx12)
-	.align 4
-L(bLcr0):
+	.align	4
+L(bLcr7):
 	li	rRTN, 1
-	bgtlr	cr0
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
 L(bLcr1):
@@ -480,36 +689,31 @@ L(bLcr6):
 	blr
 
 L(b13):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 	bne-	cr1, L(bx34)
 L(bx56):
 	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 L(bx34):
 	sub	rRTN, rWORD3, rWORD4
 	blr
-
 L(b11):
 L(bx12):
 	sub	rRTN, rWORD1, rWORD2
 	blr
-
-	.align 4
-L(zeroLengthReturn):
-
+	.align	4
 L(zeroLength):
 	li	rRTN, 0
 	blr
 
-	cfi_adjust_cfa_offset(64)
-	.align 4
+	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is word aligned and can
+   of r12 to 0.  If r12 == 0 then rStr1 is word aligned and can
    perform the Wunaligned loop.
 
    Otherwise we know that rSTR1 is not already word aligned yet.
@@ -518,79 +722,88 @@ L(zeroLength):
    eliminate bits preceding the first byte.  Since we want to join the
    normal (Wualigned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first W. This insures that the loop count is
+   versioning for the first W. This ensures that the loop count is
    correct and the first W (shifted) is in the expected resister pair.  */
 #define rSHL		r29	/* Unaligned shift left count.  */
 #define rSHR		r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
+	cfi_adjust_cfa_offset(64)
 L(unaligned):
-	stw     r29,40(r1)
-	cfi_offset(r29,(40-64))
+	stw	rSHL, 40(r1)
+	cfi_offset(rSHL, (40-64))
 	clrlwi	rSHL, rSTR2, 30
-        stw     r28,36(r1)
-	cfi_offset(r28,(36-64))
+	stw	rSHR, 36(r1)
+	cfi_offset(rSHR, (36-64))
 	beq	cr5, L(Wunaligned)
-        stw     r27,32(r1)
-	cfi_offset(r27,(32-64))
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
 /* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 W.  */
-	sub	r27, rSTR2, rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the W before that W that contains
    the actual start of rSTR2.  */
 	clrrwi	rSTR2, rSTR2, 2
-        stw     r26,28(r1)
-	cfi_offset(r26,(28-64))
-/* Compute the left/right shift counts for the unalign rSTR2,
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+/* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (W aligned) start of rSTR1.  */
-	clrlwi	rSHL, r27, 30
+	clrlwi	rSHL, rWORD8_SHIFT, 30
 	clrrwi	rSTR1, rSTR1, 2
-        stw     r25,24(r1)
-	cfi_offset(r25,(24-64))
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
 	slwi	rSHL, rSHL, 3
-	cmplw	cr5, r27, rSTR2
-	add	rN, rN, rBITDIF
-	slwi	r11, rBITDIF, 3
-        stw     r24,20(r1)
-	cfi_offset(r24,(20-64))
+	cmplw	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
 	subfic	rSHR, rSHL, 32
-	srwi	rTMP, rN, 4      /* Divide by 16 */
-	andi.	rBITDIF, rN, 12  /* Get the W remainder */
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the W remainder */
 /* We normally need to load 2 Ws to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a W where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
 	li	rWORD8, 0
 	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD8, 0(rSTR2)
-	la	rSTR2, 4(rSTR2)
+	addi	rSTR2, rSTR2, 4
+#endif
 	slw	rWORD8, rWORD8, rSHL
 
 L(dus0):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
 	lwz	rWORD2, 0(rSTR2)
-	cmplwi	cr1, rBITDIF, 8
+#endif
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
-	srw	rG, rWORD2, rSHR
+	srw	r12, rWORD2, rSHR
 	clrlwi	rN, rN, 30
 	beq	L(duPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
 	bgt	cr1, L(duPs3)
 	beq	cr1, L(duPs2)
 
 /* Remainder is 4 */
-	.align 4
+	.align	4
 L(dusP1):
-	slw	rB, rWORD2, rSHL
-	slw	rWORD7, rWORD1, r11
-	slw	rWORD8, rWORD8, r11
+	slw	rWORD8_SHIFT, rWORD2, rSHL
+	slw	rWORD7, rWORD1, rWORD6
+	slw	rWORD8, rWORD8, rWORD6
 	bge	cr7, L(duP1e)
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
@@ -600,95 +813,133 @@ L(dusP1):
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(duPs2):
-	slw	rH, rWORD2, rSHL
-	slw	rWORD5, rWORD1, r11
-	slw	rWORD6, rWORD8, r11
+	slw	rWORD6_SHIFT, rWORD2, rSHL
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 12 */
-	.align 4
+	.align	4
 L(duPs3):
-	slw	rF, rWORD2, rSHL
-	slw	rWORD3, rWORD1, r11
-	slw	rWORD4, rWORD8, r11
+	slw	rWORD4_SHIFT, rWORD2, rSHL
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
 L(duPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
-	slw	rD, rWORD2, rSHL
-	slw	rWORD1, rWORD1, r11
-	slw	rWORD2, rWORD8, r11
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(Wunaligned):
-        stw     r27,32(r1)
-	cfi_offset(r27,(32-64))
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
 	clrrwi	rSTR2, rSTR2, 2
-        stw     r26,28(r1)
-	cfi_offset(r26,(28-64))
-	srwi	rTMP, rN, 4	 /* Divide by 16 */
-        stw     r25,24(r1)
-	cfi_offset(r25,(24-64))
-	andi.	rBITDIF, rN, 12  /* Get the W remainder */
-        stw     r24,20(r1)
-	cfi_offset(r24,(20-64))
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+	srwi	r0, rN, 4	/* Divide by 16 */
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
+	andi.	r12, rN, 12	/* Get the W remainder */
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
 	slwi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD6, 0(rSTR2)
 	lwzu	rWORD8, 4(rSTR2)
-	cmplwi	cr1, rBITDIF, 8
+#endif
+	cmplwi	cr1, r12, 8
 	cmplwi	cr7, rN, 16
 	clrlwi	rN, rN, 30
 	subfic	rSHR, rSHL, 32
-	slw	rH, rWORD6, rSHL
+	slw	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(duP3)
 	beq	cr1, L(duP2)
 
 /* Remainder is 4 */
-	.align 4
+	.align	4
 L(duP1):
-	srw	rG, rWORD8, rSHR
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD7, 0(rSTR1)
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+#endif
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP1x)
 L(duP1e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
 	bne	cr5, L(duLcr5)
-	or	rWORD4, rC, rD
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	bne	cr0, L(duLcr0)
-	or	rWORD6, rE, rF
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
 	cmplw	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
-	.align 4
+	.align	4
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
@@ -698,186 +949,321 @@ L(duP1x):
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
-	ld	rWORD2, 8(rSTR2)
-	srw	rA, rWORD2, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(duP2):
-	srw	rE, rWORD8, rSHR
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD5, 0(rSTR1)
-	or	rWORD6, rE, rH
-	slw	rH, rWORD8, rSHL
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	slw	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 4(rSTR1)
 	lwz	rWORD8, 4(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 8(rSTR1)
 	lwz	rWORD2, 8(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 12(rSTR1)
 	lwz	rWORD4, 12(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	bne	cr5, L(duLcr5)
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
-	.align 4
+	.align	4
 L(duP2x):
 	cmplw	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 4
 	addi	rSTR2, rSTR2, 4
+#endif
 	bne	cr6, L(duLcr6)
 	slwi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 12 */
-	.align 4
+	.align	4
 L(duP3):
-	srw	rC, rWORD8, rSHR
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD3, 0(rSTR1)
-	slw	rF, rWORD8, rSHL
-	or	rWORD4, rC, rH
+#endif
+	slw	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 4(rSTR1)
 	lwz	rWORD6, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD7, 8(rSTR1)
 	lwz	rWORD8, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 12(rSTR1)
 	lwz	rWORD2, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
-	.align 4
+	.align	4
 L(duP3x):
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
 	slwi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 16, remainder is 0 */
-	.align 4
+	.align	4
 L(duP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	srw	rA, rWORD8, rSHR
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
 	lwz	rWORD1, 0(rSTR1)
-	slw	rD, rWORD8, rSHL
-	or	rWORD2, rA, rH
+#endif
+	slw	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 4(rSTR1)
 	lwz	rWORD4, 4(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 8(rSTR1)
 	lwz	rWORD6, 8(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
-	bne	cr0, L(duLcr0)
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 12(rSTR1)
 	lwzu	rWORD8, 12(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	cmplw	cr5, rWORD7, rWORD8
 	bdz-	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(duLoop):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD1, 4(rSTR1)
 	lwz	rWORD2, 4(rSTR2)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
-	srw	rA, rWORD2, rSHR
-	slw	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD3, 8(rSTR1)
 	lwz	rWORD4, 8(rSTR2)
+#endif
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
-	srw	rC, rWORD4, rSHR
-	slw	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD5, 12(rSTR1)
 	lwz	rWORD6, 12(rSTR2)
+#endif
 	cmplw	cr5, rWORD7, rWORD8
-	bne	cr0, L(duLcr0)
-	srw	rE, rWORD6, rSHR
-	slw	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
 	lwzu	rWORD7, 16(rSTR1)
 	lwzu	rWORD8, 16(rSTR2)
-	cmplw	cr0, rWORD1, rWORD2
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	bne-	cr1, L(duLcr1)
-	srw	rG, rWORD8, rSHR
-	slw	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz+	L(duLoop)
 
 L(duL4):
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmplw	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
 	cmplw	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
 	cmplw	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0, L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
 	bne	cr1, L(duLcr1)
 L(du24):
@@ -887,95 +1273,101 @@ L(du14):
 	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  We use
    shift right to eliminate bits beyond the compare length.
+   This allows the use of word subtract to compute the final result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
+   rWORD8_SHIFT).  */
 	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
 	lwz	rWORD2, 4(rSTR2)
-	srw	rA, rWORD2, rSHR
-	.align 4
+#endif
+	srw	r0, rWORD2, rSHR
+	.align	4
 L(dutrim):
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+#else
 	lwz	rWORD1, 4(rSTR1)
-        lwz     r31,48(1)
+#endif
+	lwz	rWORD8, 48(r1)
 	subfic	rN, rN, 32	/* Shift count is 32 - (rN * 8).  */
-	or	rWORD2, rA, rB
-        lwz     r30,44(1)
-        lwz     r29,40(r1)
+	or	rWORD2, r0, rWORD8_SHIFT
+	lwz	rWORD7, 44(r1)
+	lwz	rSHL, 40(r1)
 	srw	rWORD1, rWORD1, rN
 	srw	rWORD2, rWORD2, rN
-        lwz     r28,36(r1)
-        lwz     r27,32(r1)
-        cmplw   rWORD1,rWORD2
-        li      rRTN,0
-        beq     L(dureturn26)
-        li      rRTN,1
-        bgt     L(dureturn26)
-        li      rRTN,-1
-	b    L(dureturn26)
-	.align 4
-L(duLcr0):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rSHR, 36(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
+	sub	rRTN, rWORD1, rWORD2
+	b	L(dureturn26)
+	.align	4
+L(duLcr7):
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
-	bgt	cr0, L(dureturn29)
-	lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	bgt	cr7, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr1):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
 	bgt	cr1, L(dureturn29)
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr6):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
 	bgt	cr6, L(dureturn29)
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr5):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 	li	rRTN, 1
 	bgt	cr5, L(dureturn29)
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-        lwz     r31,48(1)
-        lwz     r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 L(dureturn29):
-        lwz     r29,40(r1)
-        lwz     r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 L(dureturn27):
-        lwz     r27,32(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
 L(dureturn26):
-        lwz     r26,28(r1)
+	lwz	rWORD2_SHIFT, 28(r1)
 L(dureturn25):
-        lwz     r25,24(r1)
-        lwz     r24,20(r1)
-        lwz     1,0(1)
+	lwz	rWORD4_SHIFT, 24(r1)
+	lwz	rWORD6_SHIFT, 20(r1)
+	addi	1, 1, 64
+	cfi_adjust_cfa_offset(-64)
 	blr
 END (memcmp)
 
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
index d9146631e..338d3cce3 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memcpy.S
@@ -203,15 +203,28 @@ EALIGN (memcpy, 5, 0)
     blt   cr6,5f
     srwi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmplwi	cr1,10,16
@@ -339,13 +352,23 @@ EALIGN (memcpy, 5, 0)
     bf      30,1f
 
     /* there are at least two words to copy, so copy them */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10  /* shift 1st src word to left align it in R0 */
     srw   8,7,9   /* shift 2nd src word to right align it in R8 */
+#endif
     or    0,0,8   /* or them to get word to store */
     lwz   6,8(5)  /* load the 3rd src word */
     stw   0,0(4)  /* store the 1st dst word */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,7,10
+    slw   8,6,9
+#else
     slw   0,7,10  /* now left align 2nd src word into R0 */
     srw   8,6,9   /* shift 3rd src word to right align it in R8 */
+#endif
     or    0,0,8   /* or them to get word to store */
     lwz   7,12(5)
     stw   0,4(4)  /* store the 2nd dst word */
@@ -353,8 +376,13 @@ EALIGN (memcpy, 5, 0)
     addi  5,5,16
     bf    31,4f
     /* there is a third word to copy, so copy it */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10  /* shift 3rd src word to left align it in R0 */
     srw   8,7,9   /* shift 4th src word to right align it in R8 */
+#endif
     or    0,0,8   /* or them to get word to store */
     stw   0,0(4)  /* store 3rd dst word */
     mr    6,7
@@ -364,8 +392,13 @@ EALIGN (memcpy, 5, 0)
     b     4f
     .align 4
 1:
+#ifdef __LITTLE_ENDIAN__
+    srw     0,6,10
+    slw     8,7,9
+#else
     slw     0,6,10  /* shift 1st src word to left align it in R0 */
     srw     8,7,9   /* shift 2nd src word to right align it in R8 */
+#endif
     addi  5,5,8
     or    0,0,8   /* or them to get word to store */
     bf    31,4f
@@ -378,23 +411,43 @@ EALIGN (memcpy, 5, 0)
     .align  4
 4:
     /* copy 16 bytes at a time */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10
     srw   8,7,9
+#endif
     or    0,0,8
     lwz   6,0(5)
     stw   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srw   0,7,10
+    slw   8,6,9
+#else
     slw   0,7,10
     srw   8,6,9
+#endif
     or    0,0,8
     lwz   7,4(5)
     stw   0,4(4)
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10
     srw   8,7,9
+#endif
     or    0,0,8
     lwz   6,8(5)
     stw   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srw   0,7,10
+    slw   8,6,9
+#else
     slw   0,7,10
     srw   8,6,9
+#endif
     or    0,0,8
     lwz   7,12(5)
     stw   0,12(4)
@@ -403,8 +456,13 @@ EALIGN (memcpy, 5, 0)
     bdnz+ 4b
 8:
     /* calculate and store the final word */
+#ifdef __LITTLE_ENDIAN__
+    srw   0,6,10
+    slw   8,7,9
+#else
     slw   0,6,10
     srw   8,7,9
+#endif
     or    0,0,8
     stw   0,0(4)
 3:
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/memset.S b/libc/sysdeps/powerpc/powerpc32/power4/memset.S
index c2d288b38..4fd9d8cb4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/memset.S
@@ -50,7 +50,7 @@ L(_memset):
 
 /* Align to word boundary.  */
 	cmplwi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48     /* Replicate byte to halfword.  */
 	beq+	L(aligned)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 4
@@ -65,7 +65,7 @@ L(g0):
 /* Handle the case of size < 31.  */
 L(aligned):
 	mtcrf	0x01, rLEN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32    /* Replicate halfword to word.  */
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x1C
diff --git a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
index 724d9084a..89b961e78 100644
--- a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S
@@ -24,7 +24,7 @@
 
 EALIGN (strncmp, 4, 0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -37,6 +37,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -75,12 +76,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	slwi	rTMP, rTMP, 1
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzw	rBITDIF, rBITDIF
@@ -88,28 +122,20 @@ L(endstring):
 	addi	rNEG, rNEG, 7
 	cmpw	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
-	blt-	cr1, L(equal)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+	cr1
 L(equal):
 	li	rRTN, 0
 	blr
 
 L(different):
-	lwzu	rWORD1, -4(rSTR1)
+	lwz	rWORD1, -4(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
-	blt-	L(highbit)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+
 L(highbit):
-	srwi	rWORD2, rWORD2, 24
-	srwi	rWORD1, rWORD1, 24
-	sub	rRTN, rWORD1, rWORD2
+	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
index ecd37c3cd..49c8a0866 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S
@@ -39,8 +39,8 @@ ENTRY (__llround)
 	nop	/* Ensure the following load is in a different dispatch  */
 	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r4,12(r1)
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
index d4da625bb..780dd9ca4 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S
@@ -38,7 +38,7 @@ ENTRY (__lround)
 	nop	/* Ensure the following load is in a different dispatch  */
 	nop	/* group to avoid pipe stall on POWER4&5.  */
 	nop
-	lwz	r3,12(r1)
+	lwz	r3,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__lround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
index f2417fdf4..5f7ba43a2 100644
--- a/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power5/fpu/s_isnan.S
@@ -27,8 +27,8 @@ EALIGN (__isnan, 4, 0)
 	ori	r1,r1,0
 	stfd	fp1,24(r1)	/* copy FPR to GPR */
 	ori	r1,r1,0
-	lwz	r4,24(r1)
-	lwz	r5,28(r1)
+	lwz	r4,24+HIWORD(r1)
+	lwz	r5,24+LOWORD(r1)
 	lis	r0,0x7ff0	/* const long r0 0x7ff00000 00000000 */
 	clrlwi	r4,r4,1		/* x = fabs(x) */
 	cmpw	cr7,r4,r0	/* if (fabs(x) =< inf) */
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
index 2c095db1d..3ea18589c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_isnan.S
@@ -27,8 +27,8 @@ EALIGN (__isnan, 4, 0)
 	ori	r1,r1,0
 	stfd	fp1,24(r1)	/* copy FPR to GPR */
 	ori	r1,r1,0
-	lwz	r4,24(r1)
-	lwz	r5,28(r1)
+	lwz	r4,24+HIWORD(r1)
+	lwz	r5,24+LOWORD(r1)
 	lis	r0,0x7ff0	/* const long r0 0x7ff00000 00000000 */
 	clrlwi	r4,r4,1		/* x = fabs(x) */
 	cmpw	cr7,r4,r0	/* if (fabs(x) =< inf) */
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
index 3344b312e..c0660cf6e 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrint.S
@@ -29,8 +29,8 @@ ENTRY (__llrint)
 /* Insure the following load is in a different dispatch group by
    inserting "group ending nop".  */
 	ori	r1,r1,0
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrint)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
index 7f64f8d12..ce298905c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llrintf.S
@@ -28,8 +28,8 @@ ENTRY (__llrintf)
 /* Insure the following load is in a different dispatch group by
    inserting "group ending nop".  */
 	ori	r1,r1,0
-	lwz	r3,8(r1)
-	lwz	r4,12(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llrintf)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
index 0ff04cb71..abb0840d1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S
@@ -39,8 +39,8 @@ ENTRY (__llround)
 /* Insure the following load is in a different dispatch group by
    inserting "group ending nop".  */
 	ori	r1,r1,0
-	lwz	r4,12(r1)
-	lwz	r3,8(r1)
+	lwz	r3,8+HIWORD(r1)
+	lwz	r4,8+LOWORD(r1)
 	addi	r1,r1,16
 	blr
 	END (__llround)
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
index a76f71e04..f58114a0c 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/memcpy.S
@@ -219,15 +219,28 @@ L(word_unaligned_short):
     blt   cr6,5f
     srwi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmplwi	cr1,10,16
@@ -577,7 +590,11 @@ L(wdu1_32):
     lwz     6,-1(4)
     cmplwi  cr6,31,4
     srwi    8,31,5    /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+    srwi    6,6,8
+#else
     slwi    6,6,8
+#endif
     clrlwi  31,31,27   /* The remaining bytes, < 32.  */
     blt     cr5,L(wdu1_32tail)
     mtctr   8
@@ -585,8 +602,12 @@ L(wdu1_32):
 
     lwz   8,3(4)
     lwz   7,4(4)
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,24,32
+#else
 /*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
     rlwimi 6,8,8,(32-8),31
+#endif
     b      L(wdu1_loop32x)
     .align  4
 L(wdu1_loop32):
@@ -595,8 +616,12 @@ L(wdu1_loop32):
     lwz   7,4(4)
     stw   10,-8(3)
     stw   11,-4(3)
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,24,32
+#else
 /*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
     rlwimi 6,8,8,(32-8),31
+#endif
 L(wdu1_loop32x):
     lwz   10,8(4)
     lwz   11,12(4)
@@ -613,7 +638,11 @@ L(wdu1_loop32x):
     stw   6,16(3)
     stw   7,20(3)
     addi  3,3,32
+#ifdef __LITTLE_ENDIAN__
+    srwi  6,8,8
+#else
     slwi  6,8,8
+#endif
     bdnz+ L(wdu1_loop32)
     stw   10,-8(3)
     stw   11,-4(3)
@@ -624,8 +653,12 @@ L(wdu1_32tail):
     blt     cr6,L(wdu_4tail)
     /* calculate and store the final word */
     lwz   8,3(4)
-/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,24,32
+#else
+/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8  */
     rlwimi 6,8,8,(32-8),31
+#endif
     b     L(wdu_32tailx)
 
 L(wdu2_32):
@@ -633,7 +666,11 @@ L(wdu2_32):
     lwz     6,-2(4)
     cmplwi  cr6,31,4
     srwi    8,31,5    /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+    srwi    6,6,16
+#else
     slwi    6,6,16
+#endif
     clrlwi  31,31,27   /* The remaining bytes, < 32.  */
     blt     cr5,L(wdu2_32tail)
     mtctr   8
@@ -641,8 +678,11 @@ L(wdu2_32):
 
     lwz   8,2(4)
     lwz   7,4(4)
-/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,16,32
+#else
     rlwimi 6,8,16,(32-16),31
+#endif
     b      L(wdu2_loop32x)
     .align  4
 L(wdu2_loop32):
@@ -651,8 +691,11 @@ L(wdu2_loop32):
     lwz   7,4(4)
     stw   10,-8(3)
     stw   11,-4(3)
-/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,16,32
+#else
     rlwimi 6,8,16,(32-16),31
+#endif
 L(wdu2_loop32x):
     lwz   10,8(4)
     lwz   11,12(4)
@@ -670,7 +713,11 @@ L(wdu2_loop32x):
     stw   6,16(3)
     stw   7,20(3)
     addi  3,3,32
+#ifdef __LITTLE_ENDIAN__
+    srwi  6,8,16
+#else
     slwi  6,8,16
+#endif
     bdnz+ L(wdu2_loop32)
     stw   10,-8(3)
     stw   11,-4(3)
@@ -681,8 +728,11 @@ L(wdu2_32tail):
     blt     cr6,L(wdu_4tail)
     /* calculate and store the final word */
     lwz   8,2(4)
-/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,16,32
+#else
     rlwimi 6,8,16,(32-16),31
+#endif
     b     L(wdu_32tailx)
 
 L(wdu3_32):
@@ -690,7 +740,11 @@ L(wdu3_32):
     lwz     6,-3(4)
     cmplwi  cr6,31,4
     srwi    8,31,5    /* calculate the 32 byte loop count */
+#ifdef __LITTLE_ENDIAN__
+    srwi    6,6,24
+#else
     slwi    6,6,24
+#endif
     clrlwi  31,31,27   /* The remaining bytes, < 32.  */
     blt     cr5,L(wdu3_32tail)
     mtctr   8
@@ -698,8 +752,11 @@ L(wdu3_32):
 
     lwz   8,1(4)
     lwz   7,4(4)
-/*  Equivalent to: srwi   8,8,32-8;  or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,8,32
+#else
     rlwimi 6,8,24,(32-24),31
+#endif
     b      L(wdu3_loop32x)
     .align  4
 L(wdu3_loop32):
@@ -708,8 +765,11 @@ L(wdu3_loop32):
     lwz   7,4(4)
     stw   10,-8(3)
     stw   11,-4(3)
-/*  Equivalent to  srwi   8,8,32-8; or    6,6,8 */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,8,32
+#else
     rlwimi 6,8,24,(32-24),31
+#endif
 L(wdu3_loop32x):
     lwz   10,8(4)
     lwz   11,12(4)
@@ -726,7 +786,11 @@ L(wdu3_loop32x):
     stw   6,16(3)
     stw   7,20(3)
     addi  3,3,32
+#ifdef __LITTLE_ENDIAN__
+    srwi  6,8,24
+#else
     slwi  6,8,24
+#endif
     bdnz+ L(wdu3_loop32)
     stw   10,-8(3)
     stw   11,-4(3)
@@ -737,8 +801,11 @@ L(wdu3_32tail):
     blt     cr6,L(wdu_4tail)
     /* calculate and store the final word */
     lwz   8,1(4)
-/*  Equivalent to: srwi   8,8,32-9;  or    6,6,8  */
+#ifdef __LITTLE_ENDIAN__
+    rldimi 6,8,8,32
+#else
     rlwimi 6,8,24,(32-24),31
+#endif
     b     L(wdu_32tailx)
     .align  4
 L(wdu_32tailx):
diff --git a/libc/sysdeps/powerpc/powerpc32/power6/memset.S b/libc/sysdeps/powerpc/powerpc32/power6/memset.S
index 8c23c8d13..a4b002a96 100644
--- a/libc/sysdeps/powerpc/powerpc32/power6/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power6/memset.S
@@ -48,7 +48,7 @@ L(_memset):
 	ble-	cr1, L(small)
 /* Align to word boundary.  */
 	cmplwi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 4
@@ -64,7 +64,7 @@ L(g0):
 /* Handle the case of size < 31.  */
 L(aligned):
 	mtcrf	0x01, rLEN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x1C
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
index b2ab5bfe7..095c15547 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_finite.S
@@ -54,9 +54,8 @@ ENTRY (__finite)
 	stfd    fp1,8(r1)     /* Transfer FP to GPR's.  */
 
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lhz     r0,8(r1)      /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz	r0,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	clrlwi	r0,r0,17      /* r0 = abs(r0).  */
 	addi	r1,r1,16      /* Reset the stack pointer.  */
 	cmpwi	cr7,r0,0x7ff0 /* r4 == 0x7ff0?.  */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
index 3f8af60a5..0101c8fa1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isinf.S
@@ -48,14 +48,13 @@ ENTRY (__isinf)
 	li	r3,0
 	bflr    29	      /* If not INF, return.  */
 
-	/* Either we have -INF/+INF or a denormal.  */
+	/* Either we have +INF or -INF.  */
 
 	stwu    r1,-16(r1)    /* Allocate stack space.  */
 	stfd    fp1,8(r1)     /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lhz	r4,8(r1)      /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz	r4,8+HISHORT(r1) /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	addi	r1,r1,16      /* Reset the stack pointer.  */
 	cmpwi	cr7,r4,0x7ff0 /* r4 == 0x7ff0?  */
 	li	r3,1
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
index 99ff12696..0ad1dcf1f 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_isnan.S
@@ -53,8 +53,8 @@ ENTRY (__isnan)
 	stwu	r1,-16(r1)    /* Allocate stack space.  */
 	stfd	fp1,8(r1)     /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lwz     r4,8(r1)      /* Load the upper half of the FP value.  */
-	lwz     r5,12(r1)     /* Load the lower half of the FP value.  */
+	lwz     r4,8+HIWORD(r1) /* Load the upper half of the FP value.  */
+	lwz     r5,8+LOWORD(r1) /* Load the lower half of the FP value.  */
 	addi	r1,r1,16      /* Reset the stack pointer.  */
 	lis     r0,0x7ff0     /* Load the upper portion for an INF/NaN.  */
 	clrlwi  r4,r4,1	      /* r4 = abs(r4).  */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
index e008ed0c3..1c82577f5 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
+++ b/libc/sysdeps/powerpc/powerpc32/power7/fpu/s_logbl.c
@@ -35,14 +35,14 @@ static const union {
 long double
 __logbl (long double x)
 {
-  double xh, xl;
+  double xh;
   double ret;
 
   if (__builtin_expect (x == 0.0L, 0))
     /* Raise FE_DIVBYZERO and return -HUGE_VAL[LF].  */
     return -1.0L / __builtin_fabsl (x);
 
-  ldbl_unpack (x, &xh, &xl);
+  xh = ldbl_high (x);
   /* ret = x & 0x7ff0000000000000;  */
   asm (
     "xxland %x0,%x1,%x2\n"
@@ -58,9 +58,9 @@ __logbl (long double x)
     {
       /* POSIX specifies that denormal number is treated as
          though it were normalized.  */
-      int64_t lx, hx;
+      int64_t hx;
 
-      GET_LDOUBLE_WORDS64 (hx, lx, x);
+      EXTRACT_WORDS64 (hx, xh);
       return (long double) (-1023 - (__builtin_clzll (hx) - 12));
     }
   /* Test to avoid logb_downward (0.0) == -0.0.  */
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memchr.S b/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
index 369e5e048..85754f3f1 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memchr.S
@@ -25,107 +25,111 @@ ENTRY (__memchr)
 	CALL_MCOUNT
 	dcbt	0,r3
 	clrrwi  r8,r3,2
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
 	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	insrdi	r4,r4,16,32
 	cmplwi	r5,16
+	li	r9, -1
+	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
+	addi	r7,r7,-1
+#ifdef __LITTLE_ENDIAN__
+	slw	r9,r9,r6
+#else
+	srw	r9,r9,r6
+#endif
 	ble	L(small_range)
 
-	cmplw	cr7,r3,r7     /* Compare the starting address (r3) with the
-				 ending address (r7).  If (r3 >= r7), the size
-				 passed in is zero or negative.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1	      /* Artificially set our ending address (r7)
-				 such that we will exit early. */
-L(proceed):
-	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
-	cmpli	cr6,r6,0      /* cr6 == Do we have padding?  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTEs in WORD1.  */
-	beq	cr6,L(proceed_no_padding)
-	slw	r10,r10,r6
-	srw	r10,r10,r6
-L(proceed_no_padding):
-	cmplwi	cr7,r10,0     /* If r10 == 0, no BYTEs have been found.  */
+	cmpb	r3,r12,r4     /* Check for BYTEs in WORD1.  */
+	and	r3,r3,r9
+	clrlwi	r5,r7,30      /* Byte count - 1 in last word.  */
+	clrrwi	r7,r7,2       /* Address of last word.  */
+	cmplwi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(null)
-
 	mtcrf   0x01,r8
 	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-
 	bt	29,L(loop_setup)
 
 	/* Handle WORD2 of pair.  */
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	cmplwi	cr7,r10,0
+	cmpb	r3,r12,r4
+	cmplwi	cr7,r3,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(null)
-
 L(loop_setup):
-	sub	r5,r7,r9
-	srwi	r6,r5,3	      /* Number of loop iterations.  */
+	/* The last word we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the word at
+	   (s + size - 1) & ~3, or r7.  The first word read is at
+	   r8 + 4, we read 2 * cnt words, so the last word read will
+	   be at r8 + 4 + 8 * cnt - 4.  Solving for cnt gives
+	   cnt = (r7 - r8) / 8  */
+	sub	r6,r7,r8
+	srwi	r6,r6,3	      /* Number of loop iterations.  */
 	mtctr	r6            /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since
-	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE in the string.  Since
+	   it's a small loop (8 instructions), align it to 32-bytes.  */
+	.align	5
 L(loop):
 	/* Load two words, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 	lwz	r12,4(r8)
 	lwzu	r11,8(r8)
-	cmpb	r10,r12,r4
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one word.  */
-	cmplwi	cr7,r5,0
+	or	r6,r9,r3      /* Merge everything in one word.  */
+	cmplwi	cr7,r6,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
 
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  */
-	subi	r11,r7,4
-	cmplw	cr6,r8,r11
-	blt	cr6,L(loop_small)
-	b	L(null)
+	/* We may have one more dword to read.  */
+	cmplw	r8,r7
+	beqlr
+
+	lwzu	r12,4(r8)
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	bne	cr6,L(done)
+	blr
 
+	.align	4
+L(found):
 	/* OK, one (or both) of the words contains BYTE.  Check
 	   the first word and decrement the address in case the first
 	   word really contains BYTE.  */
-	.align	4
-L(found):
-	cmplwi	cr6,r10,0
+	cmplwi	cr6,r3,0
 	addi	r8,r8,-4
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,4
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
+	/* r3 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as BYTE in the original
 	   word from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the range.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r3,-1
+	andc    r0,r0,r3
+	popcntw	r0,r0	      /* Count trailing zeros.  */
+#else
+	cntlzw	r0,r3	      /* Count leading zeros before the match.  */
+#endif
+	cmplw	r8,r7         /* Are we on the last word?  */
+	srwi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
 	add	r3,r8,r0
-	cmplw	r3,r7
-	bge	L(null)
+	cmplw	cr7,r0,r5     /* If on the last dword, check byte offset.  */
+	bnelr
+	blelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -137,67 +141,42 @@ L(null):
 	.align	4
 L(small_range):
 	cmplwi	r5,0
-	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
-	beq	L(null)       /* This branch is for the cmplwi r5,0 above */
+	beq	L(null)
 	lwz	r12,0(r8)     /* Load word from memory.  */
-	cmplwi	cr6,r6,0      /* cr6 == Do we have padding?  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-	beq	cr6,L(small_no_padding)
-	slw	r10,r10,r6
-	srw	r10,r10,r6
-L(small_no_padding):
-	cmplwi	cr7,r10,0
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmplwi	cr7,r3,0
+	clrlwi	r5,r7,30      /* Byte count - 1 in last word.  */
+	clrrwi	r7,r7,2       /* Address of last word.  */
+	cmplw	r8,r7         /* Are we done already?  */
 	bne	cr7,L(done)
+	beqlr
 
-	/* Are we done already?  */
-	addi    r9,r8,4
-	cmplw	r9,r7
-	bge	L(null)
-
-L(loop_small):                /* loop_small has been unrolled.  */
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	cmplw	r8,r7
 	bne	cr6,L(done)
-	bge	L(null)
+	beqlr
 
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	cmplw	r8,r7
 	bne	cr6,L(done)
-	bge	L(null)
+	beqlr
 
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
+	cmplw	r8,r7
 	bne	cr6,L(done)
-	bge	L(null)
+	beqlr
 
 	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	cmplw	r9,r7
+	cmpb	r3,r12,r4
+	cmplwi	cr6,r3,0
 	bne	cr6,L(done)
-	bge	L(null)
-
-	/* For most cases we will never get here.  Under some combinations of
-	   padding + length there is a leftover word that still needs to be
-	   checked.  */
-	lwzu	r12,4(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,4
-	cmplwi	cr6,r10,0
-	bne	cr6,L(done)
-
-	/* save a branch and exit directly */
-	li	r3,0
 	blr
 
 END (__memchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
index 075e19f14..f160ddebf 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memcmp.S
@@ -23,10 +23,9 @@
 		    size_t size [r5])  */
 
 	.machine power7
-EALIGN (memcmp,4,0)
+EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -37,35 +36,32 @@ EALIGN (memcmp,4,0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP,rSTR2,rSTR1
-	cmplwi	cr6,rN,0
-	cmplwi	cr1,rN,12
-	clrlwi.	rTMP,rTMP,30
-	clrlwi	rBITDIF,rSTR1,30
-	cmplwi	cr5,rBITDIF,0
-	beq-	cr6,L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
-
-	/* If less than 8 bytes or not aligned, use the unaligned
-	   byte loop.  */
-
-	blt	cr1,L(bytealigned)
-	stwu	1,-64(1)
+	xor	r0, rSTR2, rSTR1
+	cmplwi	cr6, rN, 0
+	cmplwi	cr1, rN, 12
+	clrlwi.	r0, r0, 30
+	clrlwi	r12, rSTR1, 30
+	cmplwi	cr5, r12, 0
+	beq-	cr6, L(zeroLength)
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
+/* If less than 8 bytes or not aligned, use the unaligned
+   byte loop.  */
+	blt	cr1, L(bytealigned)
+	stwu	1, -64(r1)
 	cfi_adjust_cfa_offset(64)
-	stw	r31,48(1)
-	cfi_offset(31,(48-64))
-	stw	r30,44(1)
-	cfi_offset(30,(44-64))
+	stw	rWORD8, 48(r1)
+	cfi_offset(rWORD8, (48-64))
+	stw	rWORD7, 44(r1)
+	cfi_offset(rWORD7, (44-64))
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already word
+   of r12 to 0.  If r12 == 0 then we are already word
    aligned and can perform the word aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
@@ -74,332 +70,541 @@ EALIGN (memcmp,4,0)
    eliminate bits preceding the first byte.  Since we want to join the
    normal (word aligned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first word. This insures that the loop count is
+   versioning for the first word. This ensures that the loop count is
    correct and the first word (shifted) is in the expected register pair. */
 	.align	4
 L(samealignment):
-	clrrwi	rSTR1,rSTR1,2
-	clrrwi	rSTR2,rSTR2,2
-	beq	cr5,L(Waligned)
-	add	rN,rN,rBITDIF
-	slwi	r11,rBITDIF,3
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	andi.	rBITDIF,rN,12	/* Get the word remainder */
-	lwz	rWORD1,0(rSTR1)
-	lwz	rWORD2,0(rSTR2)
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	clrlwi	rN,rN,30
+	clrrwi	rSTR1, rSTR1, 2
+	clrrwi	rSTR2, rSTR2, 2
+	beq	cr5, L(Waligned)
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the word remainder */
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+#endif
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
 	beq	L(dPs4)
-	mtctr	rTMP
-	bgt	cr1,L(dPs3)
-	beq	cr1,L(dPs2)
+	mtctr	r0
+	bgt	cr1, L(dPs3)
+	beq	cr1, L(dPs2)
 
 /* Remainder is 4 */
 	.align	3
 L(dsP1):
-	slw	rWORD5,rWORD1,r11
-	slw	rWORD6,rWORD2,r11
-	cmplw	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
+	cmplw	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 8 */
 	.align	4
 L(dPs2):
-	slw	rWORD5,rWORD1,r11
-	slw	rWORD6,rWORD2,r11
-	cmplw	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD2, rWORD6
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	lwz	rWORD7,4(rSTR1)
-	lwz	rWORD8,4(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 12 */
 	.align	4
 L(dPs3):
-	slw	rWORD3,rWORD1,r11
-	slw	rWORD4,rWORD2,r11
-	cmplw	cr1,rWORD3,rWORD4
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD2, rWORD6
+	cmplw	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
 L(dPs4):
-	mtctr	rTMP
-	slw	rWORD1,rWORD1,r11
-	slw	rWORD2,rWORD2,r11
-	cmplw	cr0,rWORD1,rWORD2
+	mtctr	r0
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD2, rWORD6
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(Waligned):
-	andi.	rBITDIF,rN,12	/* Get the word remainder */
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	clrlwi	rN,rN,30
+	andi.	r12, rN, 12	/* Get the word remainder */
+	srwi	r0, rN, 4	/* Divide by 16 */
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
 	beq	L(dP4)
-	bgt	cr1,L(dP3)
-	beq	cr1,L(dP2)
+	bgt	cr1, L(dP3)
+	beq	cr1, L(dP2)
 
 /* Remainder is 4 */
 	.align	4
 L(dP1):
-	mtctr	rTMP
+	mtctr	r0
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
-	lwz	rWORD5,0(rSTR1)
-	lwz	rWORD6,0(rSTR2)
-	cmplw	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 0(rSTR1)
+	lwz	rWORD6, 0(rSTR2)
+#endif
+	cmplw	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP1e):
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	bne	cr0,L(dLcr0)
-
-	lwzu	rWORD7,16(rSTR1)
-	lwzu	rWORD8,16(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmplw	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
+
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmplw	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
-	bne	cr6,L(dLcr6)
-	lwz	r30,44(1)
-	lwz	r31,48(1)
+	bne	cr6, L(dLcr6)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
 	.align	3
 L(dP1x):
-	slwi.	r12,rN,3
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
-	lwz	1,0(1)
+	slwi.	r12, rN, 3
+	bne	cr5, L(dLcr5x)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 8 */
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP2):
-	mtctr	rTMP
-	lwz	rWORD5,0(rSTR1)
-	lwz	rWORD6,0(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
-	lwz	rWORD7,4(rSTR1)
-	lwz	rWORD8,4(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 0(rSTR1)
+	lwz	rWORD6, 0(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
 L(dP2e):
-	lwz	rWORD1,8(rSTR1)
-	lwz	rWORD2,8(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	lwz	rWORD3,12(rSTR1)
-	lwz	rWORD4,12(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	bne	cr6,L(dLcr6)
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 12(rSTR1)
+	lwz	rWORD4, 12(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	bne	cr6, L(dLcr6)
+	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP2x):
-	lwz	rWORD3,4(rSTR1)
-	lwz	rWORD4,4(rSTR2)
-	cmplw	cr5,rWORD3,rWORD4
-	slwi.	r12,rN,3
-	bne	cr6,L(dLcr6)
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
-	lwz	1,0(1)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	slwi.	r12, rN, 3
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	bne	cr1, L(dLcr1x)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 12 */
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP3):
-	mtctr	rTMP
-	lwz	rWORD3,0(rSTR1)
-	lwz	rWORD4,0(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 0(rSTR1)
+	lwz	rWORD4, 0(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
 L(dP3e):
-	lwz	rWORD5,4(rSTR1)
-	lwz	rWORD6,4(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP3x)
-	lwz	rWORD7,8(rSTR1)
-	lwz	rWORD8,8(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	lwz	rWORD1,12(rSTR1)
-	lwz	rWORD2,12(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr1,L(dLcr1)
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 4(rSTR1)
+	lwz	rWORD6, 4(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 8(rSTR1)
+	lwz	rWORD8, 8(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 12(rSTR1)
+	lwz	rWORD2, 12(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr1, L(dLcr1)
+	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP3x):
-	lwz	rWORD1,8(rSTR1)
-	lwz	rWORD2,8(rSTR2)
-	cmplw	cr5,rWORD1,rWORD2
-	slwi.	r12,rN,3
-	bne	cr1,L(dLcr1)
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr6,L(dLcr6)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
-	bne	cr5,L(dLcr5)
-	lwz	1,0(1)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	slwi.	r12, rN, 3
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr6, L(dLcr6x)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
+	bne	cr7, L(dLcr7x)
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dP4):
-	mtctr	rTMP
-	lwz	rWORD1,0(rSTR1)
-	lwz	rWORD2,0(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 L(dP4e):
-	lwz	rWORD3,4(rSTR1)
-	lwz	rWORD4,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	lwz	rWORD5,8(rSTR1)
-	lwz	rWORD6,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	lwzu	rWORD7,12(rSTR1)
-	lwzu	rWORD8,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
-	bne	cr1,L(dLcr1)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 8(rSTR1)
+	lwz	rWORD6, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 12(rSTR1)
+	lwzu	rWORD8, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
+	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(dLoop):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
 L(dLoop1):
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
 L(dLoop2):
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
 L(dLoop3):
-	lwzu	rWORD7,16(rSTR1)
-	lwzu	rWORD8,16(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmplw	cr7, rWORD1, rWORD2
 	bdnz	L(dLoop)
 
 L(dL4):
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	cmplw	cr5,rWORD7,rWORD8
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	cmplw	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0,L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
-	bne	cr1,L(dLcr1)
+	bne	cr1, L(dLcr1)
 L(d24):
-	bne	cr6,L(dLcr6)
+	bne	cr6, L(dLcr6)
 L(d14):
-	slwi.	r12,rN,3
-	bne	cr5,L(dLcr5)
+	slwi.	r12, rN, 3
+	bne	cr5, L(dLcr5)
 L(d04):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	lwz	1,0(1)
-	subfic	rN,r12,32	/* Shift count is 32 - (rN * 8).  */
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
+	subfic	rN, r12, 32	/* Shift count is 32 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  Since
    we are aligned it is safe to load the whole word, and use
-   shift right to eliminate bits beyond the compare length. */
+   shift right to eliminate bits beyond the compare length.  */
 L(d00):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	srw	rWORD1,rWORD1,rN
-	srw	rWORD2,rWORD2,rN
-	cmplw	rWORD1,rWORD2
-	li	rRTN,0
-	beqlr
-	li	rRTN,1
-	bgtlr
-	li	rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	rWORD1, rWORD1, rN
+	srw	rWORD2, rWORD2, rN
+	sub	rRTN, rWORD1, rWORD2
 	blr
 
 	.align	4
-L(dLcr0):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	li	rRTN,1
-	lwz	1,0(1)
-	bgtlr	cr0
-	li	rRTN,-1
+	cfi_adjust_cfa_offset(64)
+L(dLcr7):
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr7x):
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr1):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	li	rRTN,1
-	lwz	1,0(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr1x):
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr6):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
-	li	rRTN,1
-	lwz	1,0(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
+L(dLcr6x):
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
+	cfi_adjust_cfa_offset(64)
 L(dLcr5):
-	lwz	r30,44(1)
-	lwz	r31,48(1)
+	lwz	rWORD7, 44(r1)
+	lwz	rWORD8, 48(r1)
 L(dLcr5x):
-	li	rRTN,1
-	lwz	1,0(1)
+	li	rRTN, 1
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	bgtlr	cr5
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 	.align	4
 L(bytealigned):
-	cfi_adjust_cfa_offset(-64)
 	mtctr	rN
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
@@ -411,38 +616,39 @@ L(bytealigned):
 
    So we must precondition some registers and condition codes so that
    we don't exit the loop early on the first iteration.  */
-	lbz	rWORD1,0(rSTR1)
-	lbz	rWORD2,0(rSTR2)
+
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
 	bdz	L(b11)
-	cmplw	cr0,rWORD1,rWORD2
-	lbz	rWORD3,1(rSTR1)
-	lbz	rWORD4,1(rSTR2)
+	cmplw	cr7, rWORD1, rWORD2
+	lbz	rWORD3, 1(rSTR1)
+	lbz	rWORD4, 1(rSTR2)
 	bdz	L(b12)
-	cmplw	cr1,rWORD3,rWORD4
-	lbzu	rWORD5,2(rSTR1)
-	lbzu	rWORD6,2(rSTR2)
+	cmplw	cr1, rWORD3, rWORD4
+	lbzu	rWORD5, 2(rSTR1)
+	lbzu	rWORD6, 2(rSTR2)
 	bdz	L(b13)
 	.align	4
 L(bLoop):
-	lbzu	rWORD1,1(rSTR1)
-	lbzu	rWORD2,1(rSTR2)
-	bne	cr0,L(bLcr0)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne	cr7, L(bLcr7)
 
-	cmplw	cr6,rWORD5,rWORD6
+	cmplw	cr6, rWORD5, rWORD6
 	bdz	L(b3i)
 
-	lbzu	rWORD3,1(rSTR1)
-	lbzu	rWORD4,1(rSTR2)
-	bne	cr1,L(bLcr1)
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne	cr1, L(bLcr1)
 
-	cmplw	cr0,rWORD1,rWORD2
+	cmplw	cr7, rWORD1, rWORD2
 	bdz	L(b2i)
 
-	lbzu	rWORD5,1(rSTR1)
-	lbzu	rWORD6,1(rSTR2)
-	bne	cr6,L(bLcr6)
+	lbzu	rWORD5, 1(rSTR1)
+	lbzu	rWORD6, 1(rSTR2)
+	bne	cr6, L(bLcr6)
 
-	cmplw	cr1,rWORD3,rWORD4
+	cmplw	cr1, rWORD3, rWORD4
 	bdnz	L(bLoop)
 
 /* We speculatively loading bytes before we have tested the previous
@@ -452,67 +658,62 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne	cr0,L(bLcr0)
-	bne	cr1,L(bLcr1)
+	bne	cr7, L(bLcr7)
+	bne	cr1, L(bLcr1)
 	b	L(bx56)
 	.align	4
 L(b2i):
-	bne	cr6,L(bLcr6)
-	bne	cr0,L(bLcr0)
+	bne	cr6, L(bLcr6)
+	bne	cr7, L(bLcr7)
 	b	L(bx34)
 	.align	4
 L(b3i):
-	bne	cr1,L(bLcr1)
-	bne	cr6,L(bLcr6)
+	bne	cr1, L(bLcr1)
+	bne	cr6, L(bLcr6)
 	b	L(bx12)
 	.align	4
-L(bLcr0):
-	li	rRTN,1
-	bgtlr	cr0
-	li	rRTN,-1
+L(bLcr7):
+	li	rRTN, 1
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 L(bLcr1):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 L(bLcr6):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 L(b13):
-	bne	cr0,L(bx12)
-	bne	cr1,L(bx34)
+	bne	cr7, L(bx12)
+	bne	cr1, L(bx34)
 L(bx56):
-	sub	rRTN,rWORD5,rWORD6
+	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne	cr0,L(bx12)
+	bne	cr7, L(bx12)
 L(bx34):
-	sub	rRTN,rWORD3,rWORD4
+	sub	rRTN, rWORD3, rWORD4
 	blr
-
 L(b11):
 L(bx12):
-	sub	rRTN,rWORD1,rWORD2
+	sub	rRTN, rWORD1, rWORD2
 	blr
-
 	.align	4
-L(zeroLengthReturn):
-
 L(zeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
-	cfi_adjust_cfa_offset(64)
 	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    2 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is word aligned and can
+   of r12 to 0.  If r12 == 0 then rStr1 is word aligned and can
    perform the Wunaligned loop.
 
    Otherwise we know that rSTR1 is not already word aligned yet.
@@ -521,465 +722,654 @@ L(zeroLength):
    eliminate bits preceding the first byte.  Since we want to join the
    normal (Wualigned) compare loop, starting at the second word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first W. This insures that the loop count is
+   versioning for the first W. This ensures that the loop count is
    correct and the first W (shifted) is in the expected resister pair.  */
 #define rSHL		r29	/* Unaligned shift left count.  */
 #define rSHR		r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
+	cfi_adjust_cfa_offset(64)
 L(unaligned):
-	stw	r29,40(r1)
-	cfi_offset(r29,(40-64))
-	clrlwi	rSHL,rSTR2,30
-	stw	r28,36(r1)
-	cfi_offset(r28,(36-64))
-	beq	cr5,L(Wunaligned)
-	stw	r27,32(r1)
-	cfi_offset(r27,(32-64))
+	stw	rSHL, 40(r1)
+	cfi_offset(rSHL, (40-64))
+	clrlwi	rSHL, rSTR2, 30
+	stw	rSHR, 36(r1)
+	cfi_offset(rSHR, (36-64))
+	beq	cr5, L(Wunaligned)
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
 /* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 W.  */
-	sub	r27,rSTR2,rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the W before that W that contains
    the actual start of rSTR2.  */
-	clrrwi	rSTR2,rSTR2,2
-	stw	r26,28(r1)
-	cfi_offset(r26,(28-64))
-/* Compute the left/right shift counts for the unalign rSTR2,
+	clrrwi	rSTR2, rSTR2, 2
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+/* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (W aligned) start of rSTR1.  */
-	clrlwi	rSHL,r27,30
-	clrrwi	rSTR1,rSTR1,2
-	stw	r25,24(r1)
-	cfi_offset(r25,(24-64))
-	slwi	rSHL,rSHL,3
-	cmplw	cr5,r27,rSTR2
-	add	rN,rN,rBITDIF
-	slwi	r11,rBITDIF,3
-	stw	r24,20(r1)
-	cfi_offset(r24,(20-64))
-	subfic	rSHR,rSHL,32
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	andi.	rBITDIF,rN,12	/* Get the W remainder */
+	clrlwi	rSHL, rWORD8_SHIFT, 30
+	clrrwi	rSTR1, rSTR1, 2
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
+	slwi	rSHL, rSHL, 3
+	cmplw	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	slwi	rWORD6, r12, 3
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
+	subfic	rSHR, rSHL, 32
+	srwi	r0, rN, 4	/* Divide by 16 */
+	andi.	r12, rN, 12	/* Get the W remainder */
 /* We normally need to load 2 Ws to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a W where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
-	li	rWORD8,0
-	blt	cr5,L(dus0)
-	lwz	rWORD8,0(rSTR2)
-	la	rSTR2,4(rSTR2)
-	slw	rWORD8,rWORD8,rSHL
+	li	rWORD8, 0
+	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD8, 0(rSTR2)
+	addi	rSTR2, rSTR2, 4
+#endif
+	slw	rWORD8, rWORD8, rSHL
 
 L(dus0):
-	lwz	rWORD1,0(rSTR1)
-	lwz	rWORD2,0(rSTR2)
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	srw	rG,rWORD2,rSHR
-	clrlwi	rN,rN,30
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+	lwz	rWORD2, 0(rSTR2)
+#endif
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	srw	r12, rWORD2, rSHR
+	clrlwi	rN, rN, 30
 	beq	L(duPs4)
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	bgt	cr1,L(duPs3)
-	beq	cr1,L(duPs2)
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	bgt	cr1, L(duPs3)
+	beq	cr1, L(duPs2)
 
 /* Remainder is 4 */
 	.align	4
 L(dusP1):
-	slw	rB,rWORD2,rSHL
-	slw	rWORD7,rWORD1,r11
-	slw	rWORD8,rWORD8,r11
-	bge	cr7,L(duP1e)
+	slw	rWORD8_SHIFT, rWORD2, rSHL
+	slw	rWORD7, rWORD1, rWORD6
+	slw	rWORD8, rWORD8, rWORD6
+	bge	cr7, L(duP1e)
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
-	cmplw	cr5,rWORD7,rWORD8
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+	cmplw	cr5, rWORD7, rWORD8
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
 	.align	4
 L(duPs2):
-	slw	rH,rWORD2,rSHL
-	slw	rWORD5,rWORD1,r11
-	slw	rWORD6,rWORD8,r11
+	slw	rWORD6_SHIFT, rWORD2, rSHL
+	slw	rWORD5, rWORD1, rWORD6
+	slw	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 12 */
 	.align	4
 L(duPs3):
-	slw	rF,rWORD2,rSHL
-	slw	rWORD3,rWORD1,r11
-	slw	rWORD4,rWORD8,r11
+	slw	rWORD4_SHIFT, rWORD2, rSHL
+	slw	rWORD3, rWORD1, rWORD6
+	slw	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
 L(duPs4):
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	slw	rD,rWORD2,rSHL
-	slw	rWORD1,rWORD1,r11
-	slw	rWORD2,rWORD8,r11
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	slw	rWORD1, rWORD1, rWORD6
+	slw	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(Wunaligned):
-	stw	r27,32(r1)
-	cfi_offset(r27,(32-64))
-	clrrwi	rSTR2,rSTR2,2
-	stw	r26,28(r1)
-	cfi_offset(r26,(28-64))
-	srwi	rTMP,rN,4	/* Divide by 16 */
-	stw	r25,24(r1)
-	cfi_offset(r25,(24-64))
-	andi.	rBITDIF,rN,12	/* Get the W remainder */
-	stw	r24,20(r1)
-	cfi_offset(r24,(24-64))
-	slwi	rSHL,rSHL,3
-	lwz	rWORD6,0(rSTR2)
-	lwzu	rWORD8,4(rSTR2)
-	cmplwi	cr1,rBITDIF,8
-	cmplwi	cr7,rN,16
-	clrlwi	rN,rN,30
-	subfic	rSHR,rSHL,32
-	slw	rH,rWORD6,rSHL
+	stw	rWORD8_SHIFT, 32(r1)
+	cfi_offset(rWORD8_SHIFT, (32-64))
+	clrrwi	rSTR2, rSTR2, 2
+	stw	rWORD2_SHIFT, 28(r1)
+	cfi_offset(rWORD2_SHIFT, (28-64))
+	srwi	r0, rN, 4	/* Divide by 16 */
+	stw	rWORD4_SHIFT, 24(r1)
+	cfi_offset(rWORD4_SHIFT, (24-64))
+	andi.	r12, rN, 12	/* Get the W remainder */
+	stw	rWORD6_SHIFT, 20(r1)
+	cfi_offset(rWORD6_SHIFT, (20-64))
+	slwi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD6, 0(rSTR2)
+	lwzu	rWORD8, 4(rSTR2)
+#endif
+	cmplwi	cr1, r12, 8
+	cmplwi	cr7, rN, 16
+	clrlwi	rN, rN, 30
+	subfic	rSHR, rSHL, 32
+	slw	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr	rTMP
-	bgt	cr1,L(duP3)
-	beq	cr1,L(duP2)
+	mtctr	r0
+	bgt	cr1, L(duP3)
+	beq	cr1, L(duP2)
 
 /* Remainder is 4 */
 	.align	4
 L(duP1):
-	srw	rG,rWORD8,rSHR
-	lwz	rWORD7,0(rSTR1)
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP1x)
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD7, 0(rSTR1)
+#endif
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP1x)
 L(duP1e):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	bne	cr5,L(duLcr5)
-	or	rWORD4,rC,rD
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	bne	cr0,L(duLcr0)
-	or	rWORD6,rE,rF
-	cmplw	cr6,rWORD5,rWORD6
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	bne	cr5, L(duLcr5)
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
+	cmplw	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
 	.align	4
 /* At this point we exit early with the first word compare
    complete and remainder of 0 to 3 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
 L(duP1x):
-	cmplw	cr5,rWORD7,rWORD8
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+	cmplw	cr5, rWORD7, rWORD8
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 8 */
 	.align	4
 L(duP2):
-	srw	rE,rWORD8,rSHR
-	lwz	rWORD5,0(rSTR1)
-	or	rWORD6,rE,rH
-	slw	rH,rWORD8,rSHL
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD5, 0(rSTR1)
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	slw	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
-	lwz	rWORD7,4(rSTR1)
-	lwz	rWORD8,4(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP2x)
-	lwz	rWORD1,8(rSTR1)
-	lwz	rWORD2,8(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	lwz	rWORD3,12(rSTR1)
-	lwz	rWORD4,12(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	bne	cr5,L(duLcr5)
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	cmplw	cr1,rWORD3,rWORD4
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 4(rSTR1)
+	lwz	rWORD8, 4(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 8(rSTR1)
+	lwz	rWORD2, 8(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 12(rSTR1)
+	lwz	rWORD4, 12(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	bne	cr5, L(duLcr5)
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	cmplw	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
 	.align	4
 L(duP2x):
-	cmplw	cr5,rWORD7,rWORD8
-	addi	rSTR1,rSTR1,4
-	addi	rSTR2,rSTR2,4
-	bne	cr6,L(duLcr6)
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+	cmplw	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#endif
+	bne	cr6, L(duLcr6)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 12 */
 	.align	4
 L(duP3):
-	srw	rC,rWORD8,rSHR
-	lwz	rWORD3,0(rSTR1)
-	slw	rF,rWORD8,rSHL
-	or	rWORD4,rC,rH
+	srw	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD3, 0(rSTR1)
+#endif
+	slw	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
-	lwz	rWORD5,4(rSTR1)
-	lwz	rWORD6,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	lwz	rWORD7,8(rSTR1)
-	lwz	rWORD8,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP3x)
-	lwz	rWORD1,12(rSTR1)
-	lwz	rWORD2,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	cmplw	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 4(rSTR1)
+	lwz	rWORD6, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD7, 8(rSTR1)
+	lwz	rWORD8, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 12(rSTR1)
+	lwz	rWORD2, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	cmplw	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
 	.align	4
 L(duP3x):
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr1,L(duLcr1)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmplw	cr7,rN,rSHR
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 16, remainder is 0 */
 	.align	4
 L(duP4):
-	mtctr	rTMP
-	srw	rA,rWORD8,rSHR
-	lwz	rWORD1,0(rSTR1)
-	slw	rD,rWORD8,rSHL
-	or	rWORD2,rA,rH
+	mtctr	r0
+	srw	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 4
+#else
+	lwz	rWORD1, 0(rSTR1)
+#endif
+	slw	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
-	lwz	rWORD3,4(rSTR1)
-	lwz	rWORD4,4(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	lwz	rWORD5,8(rSTR1)
-	lwz	rWORD6,8(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr0,L(duLcr0)
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	lwzu	rWORD7,12(rSTR1)
-	lwzu	rWORD8,12(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	cmplw	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 4(rSTR1)
+	lwz	rWORD4, 4(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 8(rSTR1)
+	lwz	rWORD6, 8(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 12(rSTR1)
+	lwzu	rWORD8, 12(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	cmplw	cr5, rWORD7, rWORD8
 	bdz	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(duLoop):
-	lwz	rWORD1,4(rSTR1)
-	lwz	rWORD2,4(rSTR2)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	srw	rA,rWORD2,rSHR
-	slw	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD1, 4(rSTR1)
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	srw	r0, rWORD2, rSHR
+	slw	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
-	lwz	rWORD3,8(rSTR1)
-	lwz	rWORD4,8(rSTR2)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	srw	rC,rWORD4,rSHR
-	slw	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD3, 0, rSTR1
+	lwbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD3, 8(rSTR1)
+	lwz	rWORD4, 8(rSTR2)
+#endif
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	srw	r12, rWORD4, rSHR
+	slw	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
-	lwz	rWORD5,12(rSTR1)
-	lwz	rWORD6,12(rSTR2)
-	cmplw	cr5,rWORD7,rWORD8
-	bne	cr0,L(duLcr0)
-	srw	rE,rWORD6,rSHR
-	slw	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD5, 0, rSTR1
+	lwbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD5, 12(rSTR1)
+	lwz	rWORD6, 12(rSTR2)
+#endif
+	cmplw	cr5, rWORD7, rWORD8
+	bne	cr7, L(duLcr7)
+	srw	r0, rWORD6, rSHR
+	slw	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
-	lwzu	rWORD7,16(rSTR1)
-	lwzu	rWORD8,16(rSTR2)
-	cmplw	cr0,rWORD1,rWORD2
-	bne	cr1,L(duLcr1)
-	srw	rG,rWORD8,rSHR
-	slw	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD7, 0, rSTR1
+	lwbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 4
+	addi	rSTR2, rSTR2, 4
+#else
+	lwzu	rWORD7, 16(rSTR1)
+	lwzu	rWORD8, 16(rSTR2)
+#endif
+	cmplw	cr7, rWORD1, rWORD2
+	bne	cr1, L(duLcr1)
+	srw	r12, rWORD8, rSHR
+	slw	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz	L(duLoop)
 
 L(duL4):
-	bne	cr1,L(duLcr1)
-	cmplw	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	cmplw	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	cmplw	cr5,rWORD7,rWORD8
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmplw	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	cmplw	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	cmplw	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0,L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
-	bne	cr1,L(duLcr1)
+	bne	cr1, L(duLcr1)
 L(du24):
-	bne	cr6,L(duLcr6)
+	bne	cr6, L(duLcr6)
 L(du14):
-	slwi.	rN,rN,3
-	bne	cr5,L(duLcr5)
+	slwi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 3 bytes to compare.  We use
    shift right to eliminate bits beyond the compare length.
+   This allows the use of word subtract to compute the final result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
-	cmplw	cr7,rN,rSHR
+   rWORD8_SHIFT).  */
+	cmplw	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	lwz	rWORD2,4(rSTR2)
-	srw	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 4
+#else
+	lwz	rWORD2, 4(rSTR2)
+#endif
+	srw	r0, rWORD2, rSHR
 	.align	4
 L(dutrim):
-	lwz	rWORD1,4(rSTR1)
-	lwz	r31,48(1)
-	subfic	rN,rN,32	/* Shift count is 32 - (rN * 8).  */
-	or	rWORD2,rA,rB
-	lwz	r30,44(1)
-	lwz	r29,40(r1)
-	srw	rWORD1,rWORD1,rN
-	srw	rWORD2,rWORD2,rN
-	lwz	r28,36(r1)
-	lwz	r27,32(r1)
-	cmplw	rWORD1,rWORD2
-	li	rRTN,0
-	beq	L(dureturn26)
-	li	rRTN,1
-	bgt	L(dureturn26)
-	li	rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	rWORD1, 0, rSTR1
+#else
+	lwz	rWORD1, 4(rSTR1)
+#endif
+	lwz	rWORD8, 48(r1)
+	subfic	rN, rN, 32	/* Shift count is 32 - (rN * 8).  */
+	or	rWORD2, r0, rWORD8_SHIFT
+	lwz	rWORD7, 44(r1)
+	lwz	rSHL, 40(r1)
+	srw	rWORD1, rWORD1, rN
+	srw	rWORD2, rWORD2, rN
+	lwz	rSHR, 36(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
+	sub	rRTN, rWORD1, rWORD2
 	b	L(dureturn26)
 	.align	4
-L(duLcr0):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr0,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+L(duLcr7):
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr7, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr1):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr1,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr1, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr6):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr6,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr6, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr5):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
-	li	rRTN,1
-	bgt	cr5,L(dureturn29)
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
-	li	rRTN,-1
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
+	li	rRTN, 1
+	bgt	cr5, L(dureturn29)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-	lwz	r31,48(1)
-	lwz	r30,44(1)
+	lwz	rWORD8, 48(r1)
+	lwz	rWORD7, 44(r1)
 L(dureturn29):
-	lwz	r29,40(r1)
-	lwz	r28,36(r1)
+	lwz	rSHL, 40(r1)
+	lwz	rSHR, 36(r1)
 L(dureturn27):
-	lwz	r27,32(r1)
+	lwz	rWORD8_SHIFT, 32(r1)
 L(dureturn26):
-	lwz	r26,28(r1)
+	lwz	rWORD2_SHIFT, 28(r1)
 L(dureturn25):
-	lwz	r25,24(r1)
-	lwz	r24,20(r1)
-	lwz	1,0(1)
+	lwz	rWORD4_SHIFT, 24(r1)
+	lwz	rWORD6_SHIFT, 20(r1)
+	addi	r1, r1, 64
+	cfi_adjust_cfa_offset(-64)
 	blr
 END (memcmp)
+
 libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp,bcmp)
+weak_alias (memcmp, bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
index 7f0077823..acf3c1019 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memcpy.S
@@ -383,7 +383,7 @@ L(copy_GE_32_unaligned):
 
 	beq    L(copy_GE_32_unaligned_cont)
 
-	/* SRC is not quadword aligned, get it aligned.  */
+	/* DST is not quadword aligned, get it aligned.  */
 
 	mtcrf   0x01,0
 	subf    31,0,5
@@ -435,13 +435,21 @@ L(copy_GE_32_unaligned_cont):
 	mr      11,12
 	mtcrf   0x01,9
 	cmplwi  cr6,9,1
+#ifdef __LITTLE_ENDIAN__
+	lvsr    5,0,12
+#else
 	lvsl    5,0,12
+#endif
 	lvx     3,0,12
 	bf      31,L(setup_unaligned_loop)
 
 	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
 	lvx     4,12,6
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
 	vperm   6,3,4,5
+#endif
 	addi    11,12,16
 	addi    10,3,16
 	stvx    6,0,3
@@ -461,11 +469,17 @@ L(unaligned_loop):
 	vector instructions though.  */
 
 	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm   6,3,4,5	      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr6.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm   10,4,3,5      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr10.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   10,3,4,5
+#else
+	vperm   10,4,3,5
+#endif
 	addi    11,11,32
 	stvx    6,0,10
 	stvx    10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
index 5ad4edb58..4610ec5b5 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/mempcpy.S
@@ -325,7 +325,7 @@ L(copy_GE_32_unaligned):
 
 	beq	L(copy_GE_32_unaligned_cont)
 
-	/* SRC is not quadword aligned, get it aligned.  */
+	/* DST is not quadword aligned, get it aligned.  */
 
 	mtcrf	0x01,0
 	subf	31,0,5
@@ -377,13 +377,21 @@ L(copy_GE_32_unaligned_cont):
 	mr	11,12
 	mtcrf	0x01,9
 	cmplwi	cr6,9,1
-	lvsl	5,0,12
+#ifdef __LITTLE_ENDIAN__
+	lvsr    5,0,12
+#else
+	lvsl    5,0,12
+#endif
 	lvx	3,0,12
 	bf	31,L(setup_unaligned_loop)
 
 	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
 	lvx	4,12,6
-	vperm	6,3,4,5
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	addi	11,12,16
 	addi	10,3,16
 	stvx	6,0,3
@@ -403,11 +411,17 @@ L(unaligned_loop):
 	vector instructions though.  */
 
 	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm	6,3,4,5	      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr6.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm	10,4,3,5      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr10.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   10,3,4,5
+#else
+	vperm   10,4,3,5
+#endif
 	addi	11,11,32
 	stvx	6,0,10
 	stvx	10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
index defd832b0..9601aa799 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memrchr.S
@@ -23,117 +23,131 @@
 	.machine  power7
 ENTRY (__memrchr)
 	CALL_MCOUNT
-	dcbt	0,r3
-	mr	r7,r3
-	add	r3,r7,r5      /* Calculate the last acceptable address.  */
-	cmplw	cr7,r3,r7     /* Is the address equal or less than r3?  */
+	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	neg	r0,r7
+	addi	r7,r7,-1
+	mr	r10,r3
+	clrrwi	r6,r7,7
+	li	r9,3<<5
+	dcbt	r9,r6,16      /* Stream hint, decreasing addresses.  */
 
 	/* Replicate BYTE to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
-	bge	cr7,L(proceed)
-
-	li	r3,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
+	rldimi	r4,r4,8,48
+	rldimi	r4,r4,16,32
 	li	r6,-4
-	addi	r9,r3,-1
-	clrrwi  r8,r9,2
-	addi	r8,r8,4
-	neg	r0,r3
+	li	r9,-1
 	rlwinm	r0,r0,3,27,28 /* Calculate padding.  */
-
+	clrrwi	r8,r7,2
+	srw	r9,r9,r0
 	cmplwi	r5,16
+	clrrwi	r0,r10,2
 	ble	L(small_range)
 
-	lwbrx	r12,r8,r6     /* Load reversed word from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTE in WORD1.  */
-	slw	r10,r10,r0
-	srw	r10,r10,r0
-	cmplwi	cr7,r10,0     /* If r10 == 0, no BYTEs have been found.  */
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8      /* Load reversed word from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in WORD1.  */
+	and	r3,r3,r9
+	cmplwi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,-4
-	cmplw	cr6,r9,r7
-	ble	cr6,L(null)
-
 	mtcrf   0x01,r8
 	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-	mr	r8,r9
-	bt	29,L(loop_setup)
+	bf	29,L(loop_setup)
 
 	/* Handle WORD2 of pair.  */
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,r8,r6
+#else
 	lwbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmplwi	cr7,r10,0
-	bne	cr7,L(done)
-
-	/* Are we done already?  */
+#endif
 	addi	r8,r8,-4
-	cmplw	cr6,r8,r7
-	ble	cr6,L(null)
+	cmpb	r3,r12,r4
+	cmplwi	cr7,r3,0
+	bne	cr7,L(done)
 
 L(loop_setup):
-	li	r0,-8
-	sub	r5,r8,r7
-	srwi	r9,r5,3	      /* Number of loop iterations.  */
+	/* The last word we want to read in the loop below is the one
+	   containing the first byte of the string, ie. the word at
+	   s & ~3, or r0.  The first word read is at r8 - 4, we
+	   read 2 * cnt words, so the last word read will be at
+	   r8 - 4 - 8 * cnt + 4.  Solving for cnt gives
+	   cnt = (r8 - r0) / 8  */
+	sub	r5,r8,r0
+	addi	r8,r8,-4
+	srwi	r9,r5,3       /* Number of loop iterations.  */
 	mtctr	r9	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since it's a
-	   small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE backwards in the string.
+	   FIXME: Investigate whether 32 byte align helps with this
+	   9 instruction loop.  */
+	.align	5
 L(loop):
 	/* Load two words, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 
-	lwbrx	r12,r8,r6
-	lwbrx	r11,r8,r0
-	addi	r8,r8,-4
-	cmpb	r10,r12,r4
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+	lwzx	r11,r8,r6
+#else
+	lwbrx	r12,0,r8
+	lwbrx	r11,r8,r6
+#endif
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one word.  */
+	or	r5,r9,r3      /* Merge everything in one word.  */
 	cmplwi	cr7,r5,0
 	bne	cr7,L(found)
-	addi	r8,r8,-4
+	addi	r8,r8,-8
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  Just return
-	   the original range.  */
-	addi	r8,r8,4
-	cmplw	cr6,r8,r7
-	bgt	cr6,L(loop_small)
-	b	L(null)
 
-	/* OK, one (or both) of the words contains BYTE.  Check
-	   the first word and decrement the address in case the first
-	   word really contains BYTE.  */
+	/* We may have one more word to read.  */
+	cmplw	r8,r0
+	bnelr
+
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmplwi	cr7,r3,0
+	bne	cr7,L(done)
+	blr
+
 	.align	4
 L(found):
-	cmplwi	cr6,r10,0
-	addi	r8,r8,4
+	/* OK, one (or both) of the words contains BYTE.  Check
+	   the first word.  */
+	cmplwi	cr6,r3,0
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,-4
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
+	/* r3 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as BYTE in the original
 	   word from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the
 	   range.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
-	srwi	r6,r0,3	      /* Convert leading zeroes to bytes.  */
-	addi	r0,r6,1
+	cntlzw	r9,r3	      /* Count leading zeros before the match.  */
+	cmplw	r8,r0         /* Are we on the last word?  */
+	srwi	r6,r9,3	      /* Convert leading zeros to bytes.  */
+	addi	r0,r6,-3
 	sub	r3,r8,r0
-	cmplw	r3,r7
-	blt	L(null)
+	cmplw	cr7,r3,r10
+	bnelr
+	bgelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -147,28 +161,35 @@ L(small_range):
 	cmplwi	r5,0
 	beq	L(null)
 
-	lwbrx	r12,r8,r6     /* Load reversed word from memory.  */
-	cmpb	r10,r12,r4    /* Check for null bytes in WORD1.  */
-	slw	r10,r10,r0
-	srw	r10,r10,r0
-	cmplwi	cr7,r10,0
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8      /* Load reversed word from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in WORD1.  */
+	and	r3,r3,r9
+	cmplwi	cr7,r3,0
 	bne	cr7,L(done)
 
+	/* Are we done already?  */
+	cmplw	r8,r0
 	addi	r8,r8,-4
-	cmplw	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	beqlr
 
-	.p2align  5
+	.align	5
 L(loop_small):
-	lwbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmplwi	cr6,r10,0
-	bne	cr6,L(done)
+#ifdef __LITTLE_ENDIAN__
+	lwzx	r12,0,r8
+#else
+	lwbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmplw	r8,r0
+	cmplwi	cr7,r3,0
+	bne	cr7,L(done)
 	addi	r8,r8,-4
-	cmplw	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	bne	L(loop_small)
+	blr
 
 END (__memrchr)
 weak_alias (__memrchr, memrchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/memset.S b/libc/sysdeps/powerpc/powerpc32/power7/memset.S
index 360ea717f..aadda2558 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/memset.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/memset.S
@@ -35,8 +35,8 @@ L(_memset):
 	cfi_offset(31,-8)
 
 	/* Replicate byte to word.  */
-	rlwimi	4,4,8,16,23
-	rlwimi	4,4,16,0,15
+	insrdi	4,4,8,48
+	insrdi	4,4,16,32
 
 	ble	cr6,L(small)	/* If length <= 8, use short copy code.  */
 
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
index a80c74a09..c2d8c4b7b 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/rawmemchr.S
@@ -27,16 +27,21 @@ ENTRY (__rawmemchr)
 	clrrwi	r8,r3,2	      /* Align the address to word boundary.  */
 
 	/* Replicate byte to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	rldimi	r4,r4,8,48
+	rldimi	r4,r4,16,32
 
 	/* Now r4 has a word of c bytes.  */
 
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
 	cmpb	r5,r12,r4     /* Compare each byte against c byte.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r5,r5,r6
+	slw	r5,r5,r6
+#else
 	slw	r5,r5,r6      /* Move left to discard ignored bits.  */
 	srw	r5,r5,r6      /* Bring the bits back as zeros.  */
+#endif
 	cmpwi	cr7,r5,0      /* If r5 == 0, no c bytes have been found.  */
 	bne	cr7,L(done)
 
@@ -90,8 +95,14 @@ L(loop):
 	   word from the string.  Use that fact to find out what is
 	   the position of the byte inside the string.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntw	r0,r0
+#else
 	cntlzw	r0,r5	      /* Count leading zeros before the match.  */
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#endif
+	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching char.  */
 	blr
 END (__rawmemchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strchr.S b/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
index 0ecadb271..b66265967 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strchr.S
@@ -35,8 +35,8 @@ ENTRY (strchr)
 	beq	cr7,L(null_match)
 
 	/* Replicate byte to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 
 	/* Now r4 has a word of c bytes and r0 has
 	   a word of null bytes.  */
@@ -46,11 +46,17 @@ ENTRY (strchr)
 
 	/* Move the words left and right to discard the bits that are
 	   not part of the string and to bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	srw	r11,r11,r6
+	slw	r10,r10,r6
+	slw	r11,r11,r6
+#else
 	slw	r10,r10,r6
 	slw	r11,r11,r6
 	srw	r10,r10,r6
 	srw	r11,r11,r6
+#endif
 	or	r5,r10,r11    /* OR the results to speed things up.  */
 	cmpwi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -65,7 +71,7 @@ ENTRY (strchr)
 
 	/* Handle WORD2 of pair.  */
 	lwzu	r12,4(r8)
-	cmpb    r10,r12,r4
+	cmpb	r10,r12,r4
 	cmpb	r11,r12,r0
 	or	r5,r10,r11
 	cmpwi	cr7,r5,0
@@ -100,22 +106,31 @@ L(loop):
 	bne	cr6,L(done)
 
 	/* The c/null byte must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
-	   pointer.  */
+	   again and move the result of cmpb to r10/r11 so we can calculate
+	   the pointer.  */
 
 	mr	r10,r6
 	mr	r11,r7
 	addi	r8,r8,4
 
-	/* r5 has the output of the cmpb instruction, that is, it contains
+	/* r10/r11 have the output of the cmpb instructions, that is,
 	   0xff in the same position as the c/null byte in the original
 	   word from the string.  Use that to calculate the pointer.  */
 L(done):
-	cntlzw	r4,r10	      /* Count leading zeroes before c matches.  */
-	cntlzw	r0,r11	      /* Count leading zeroes before null matches.  */
-	cmplw	cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+	addi    r3,r10,-1
+	andc    r3,r3,r10
+	popcntw	r0,r3
+	addi    r4,r11,-1
+	andc    r4,r4,r11
+	cmplw	cr7,r3,r4
+	bgt	cr7,L(no_match)
+#else
+	cntlzw	r0,r10	      /* Count leading zeros before c matches.  */
+	cmplw	cr7,r11,r10
 	bgt	cr7,L(no_match)
-	srwi	r0,r4,3	      /* Convert leading zeroes to bytes.  */
+#endif
+	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching c byte
 				 or null in case c was not found.  */
 	blr
@@ -133,10 +148,14 @@ L(null_match):
 	cmpb	r5,r12,r0     /* Compare each byte against null bytes.  */
 
 	/* Move the words left and right to discard the bits that are
-	   not part of the string and to bring them back as zeros.  */
-
+	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r5,r5,r6
+	slw	r5,r5,r6
+#else
 	slw	r5,r5,r6
 	srw	r5,r5,r6
+#endif
 	cmpwi	cr7,r5,0      /* If r10 == 0, no c or null bytes
 				 have been found.  */
 	bne	cr7,L(done_null)
@@ -191,7 +210,13 @@ L(loop_null):
 	   0xff in the same position as the null byte in the original
 	   word from the string.  Use that to calculate the pointer.  */
 L(done_null):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntw	r0,r0
+#else
 	cntlzw	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
index d4cacab60..f5d24d434 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strchrnul.S
@@ -27,8 +27,8 @@ ENTRY (__strchrnul)
 	clrrwi	r8,r3,2	      /* Align the address to word boundary.  */
 
 	/* Replicate byte to word.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
@@ -43,10 +43,17 @@ ENTRY (__strchrnul)
 
 	/* Move the words left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	srw	r9,r9,r6
+	slw	r10,r10,r6
+	slw	r9,r9,r6
+#else
 	slw	r10,r10,r6
 	slw	r9,r9,r6
 	srw	r10,r10,r6
 	srw	r9,r9,r6
+#endif
 	or	r5,r9,r10     /* OR the results to speed things up.  */
 	cmpwi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -54,7 +61,7 @@ ENTRY (__strchrnul)
 
 	mtcrf   0x01,r8
 
-	/* Are we now aligned to a quadword boundary?  If so, skip to
+	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
 
 	bt	29,L(loop)
@@ -76,7 +83,7 @@ L(loop):
 	   single register for speed.  This is an attempt
 	   to speed up the null-checking process for bigger strings.  */
 	lwz	r12,4(r8)
-	lwzu     r11,8(r8)
+	lwzu	r11,8(r8)
 	cmpb	r10,r12,r0
 	cmpb	r9,r12,r4
 	cmpb	r6,r11,r0
@@ -95,9 +102,9 @@ L(loop):
 	addi	r8,r8,-4
 	bne	cr6,L(done)
 
-	/* The c/null byte must be in the second word.  Adjust the
-	   address again and move the result of cmpb to r10 so we can calculate
-	   the pointer.  */
+	/* The c/null byte must be in the second word.  Adjust the address
+	   again and move the result of cmpb to r5 so we can calculate the
+	   pointer.  */
 	mr	r5,r10
 	addi	r8,r8,4
 
@@ -105,7 +112,13 @@ L(loop):
 	   0xff in the same position as the c/null byte in the original
 	   word from the string.  Use that to calculate the pointer.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntw	r0,r0
+#else
 	cntlzw	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of matching c/null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strlen.S b/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
index b71a10f5c..b08d6c028 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strlen.S
@@ -29,7 +29,11 @@ ENTRY (strlen)
 	li	r0,0	      /* Word with null chars to use with cmpb.  */
 	li	r5,-1	      /* MASK = 0xffffffffffffffff.  */
 	lwz	r12,0(r4)     /* Load word from memory.  */
+#ifdef __LITTLE_ENDIAN__
+	slw	r5,r5,r6
+#else
 	srw	r5,r5,r6      /* MASK = MASK >> padding.  */
+#endif
 	orc	r9,r12,r5     /* Mask bits that are not part of the string.  */
 	cmpb	r10,r9,r0     /* Check for null bytes in WORD1.  */
 	cmpwi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
@@ -47,9 +51,6 @@ ENTRY (strlen)
 	cmpb	r10,r12,r0
 	cmpwi	cr7,r10,0
 	bne	cr7,L(done)
-	b	L(loop)	      /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
 
 	/* Main loop to look for the end of the string.  Since it's a
 	   small loop (< 8 instructions), align it to 32-bytes.  */
@@ -86,9 +87,15 @@ L(loop):
 	   0xff in the same position as the null byte in the original
 	   word from the string.  Use that to calculate the length.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	r9, r10, -1   /* Form a mask from trailing zeros.  */
+	andc	r9, r9, r10
+	popcntw r0, r9	      /* Count the bits in the mask.  */
+#else
+	cntlzw	r0,r10	      /* Count leading zeros before the match.  */
+#endif
 	subf	r5,r3,r4
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+	srwi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r5,r0      /* Compute final length.  */
 	blr
 END (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
index fdae44d26..10c9d251b 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S
@@ -26,7 +26,7 @@
 
 EALIGN (strncmp,5,0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -39,6 +39,7 @@ EALIGN (strncmp,5,0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	nop
@@ -78,13 +79,45 @@ L(g1):	add	rTMP,rFEFE,rWORD1
 /* OK. We've hit the end of the string. We need to be careful that
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	slwi	rTMP, rTMP, 1
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rldimi	rTMP2, rWORD2, 24, 32
+	rldimi	rTMP, rWORD1, 24, 32
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr
+	ori	rRTN, rTMP2, 1
+	blr
 
+#else
 L(endstring):
 	and	rTMP,r7F7F,rWORD1
 	beq	cr1,L(equal)
 	add	rTMP,rTMP,r7F7F
 	xor.	rBITDIF,rWORD1,rWORD2
-
 	andc	rNEG,rNEG,rTMP
 	blt	L(highbit)
 	cntlzw	rBITDIF,rBITDIF
@@ -92,28 +125,20 @@ L(endstring):
 	addi	rNEG,rNEG,7
 	cmpw	cr1,rNEG,rBITDIF
 	sub	rRTN,rWORD1,rWORD2
-	blt	cr1,L(equal)
-	srawi	rRTN,rRTN,31
-	ori	rRTN,rRTN,1
-	blr
+	bgelr	cr1
 L(equal):
 	li	rRTN,0
 	blr
 
 L(different):
-	lwzu	rWORD1,-4(rSTR1)
+	lwz	rWORD1,-4(rSTR1)
 	xor.	rBITDIF,rWORD1,rWORD2
 	sub	rRTN,rWORD1,rWORD2
-	blt	L(highbit)
-	srawi	rRTN,rRTN,31
-	ori	rRTN,rRTN,1
-	blr
+	bgelr
 L(highbit):
-	srwi	rWORD2,rWORD2,24
-	srwi	rWORD1,rWORD1,24
-	sub	rRTN,rWORD1,rWORD2
+	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well. In this case, we just do a byte-by-byte comparison.  */
 	.align	4
diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
index ed088366a..eb52afd1a 100644
--- a/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/power7/strnlen.S
@@ -28,51 +28,47 @@ ENTRY (__strnlen)
 	add	r7,r3,r4      /* Calculate the last acceptable address.  */
 	cmplwi	r4,16
 	li	r0,0	      /* Word with null chars.  */
+	addi	r7,r7,-1
 	ble	L(small_range)
 
-	cmplw	cr7,r3,r7     /* Is the address equal or less than r3?  If
-				 it's equal or less, it means size is either 0
-				 or a negative number.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in DWORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	slw	r10,r10,r6
+#else
 	slw	r10,r10,r6
 	srw	r10,r10,r6
+#endif
 	cmplwi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(end_max)
-
+	clrrwi	r7,r7,2       /* Address of last word.  */
 	mtcrf   0x01,r8
 	/* Are we now aligned to a doubleword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
 
 	bt	29,L(loop_setup)
 
-	/* Handle DWORD2 of pair.  */
+	/* Handle WORD2 of pair.  */
 	lwzu	r12,4(r8)
 	cmpb	r10,r12,r0
 	cmplwi	cr7,r10,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	bge	cr6,L(end_max)
-
 L(loop_setup):
-	sub	r5,r7,r9
+	/* The last word we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the word at
+	   (s + size - 1) & ~3, or r7.  The first word read is at
+	   r8 + 4, we read 2 * cnt words, so the last word read will
+	   be at r8 + 4 + 8 * cnt - 4.  Solving for cnt gives
+	   cnt = (r7 - r8) / 8  */
+	sub	r5,r7,r8
 	srwi	r6,r5,3	      /* Number of loop iterations.  */
 	mtctr	r6	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for the null byte backwards in the string.  Since
+
+	/* Main loop to look for the null byte in the string.  Since
 	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
 	.p2align  5
 L(loop):
@@ -88,15 +84,18 @@ L(loop):
 	cmplwi	cr7,r5,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for null in the whole range.  Just return
-	   the original size.  */
-	addi	r9,r8,4
-	cmplw	cr6,r9,r7
-	blt	cr6,L(loop_small)
+
+	/* We may have one more word to read.  */
+	cmplw	cr6,r8,r7
+	beq	cr6,L(end_max)
+
+	lwzu	r12,4(r8)
+	cmpb	r10,r12,r0
+	cmplwi	cr6,r10,0
+	bne	cr6,L(done)
 
 L(end_max):
-	sub	r3,r7,r3
+	mr	r3,r4
 	blr
 
 	/* OK, one (or both) of the words contains a null byte.  Check
@@ -121,49 +120,56 @@ L(found):
 	   We need to make sure the null char is *before* the end of the
 	   range.  */
 L(done):
-	cntlzw	r0,r10	      /* Count leading zeroes before the match.  */
-	srwi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
-	add	r9,r8,r0
-	sub	r6,r9,r3      /* Length until the match.  */
-	cmplw	r9,r7
-	bgt	L(end_max)
-	mr	r3,r6
-	blr
-
-	.align	4
-L(zero):
-	li	r3,0
+#ifdef __LITTLE_ENDIAN__
+	addi	r0,r10,-1
+	andc	r0,r0,r10
+	popcntw	r0,r0
+#else
+	cntlzw	r0,r10	      /* Count leading zeros before the match.  */
+#endif
+	sub	r3,r8,r3
+	srwi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
+	add	r3,r3,r0      /* Length until the match.  */
+	cmplw	r3,r4
+	blelr
+	mr	r3,r4
 	blr
 
-/* Deals with size <= 32.  */
+/* Deals with size <= 16.  */
 	.align	4
 L(small_range):
 	cmplwi	r4,0
-	beq	L(zero)
+	beq	L(end_max)
+
+	clrrwi	r7,r7,2       /* Address of last word.  */
 
 	rlwinm	r6,r3,3,27,28 /* Calculate padding.  */
 	lwz	r12,0(r8)     /* Load word from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in WORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srw	r10,r10,r6
+	slw	r10,r10,r6
+#else
 	slw	r10,r10,r6
 	srw	r10,r10,r6
+#endif
 	cmplwi	cr7,r10,0
 	bne	cr7,L(done)
 
-	addi    r9,r8,4
-	cmplw	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmplw	r8,r7
+	beq	L(end_max)
 
 	.p2align  5
 L(loop_small):
 	lwzu	r12,4(r8)
 	cmpb	r10,r12,r0
-	addi	r9,r8,4
 	cmplwi	cr6,r10,0
 	bne	cr6,L(done)
-	cmplw	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmplw	r8,r7
+	bne	L(loop_small)
+	mr	r3,r4
+	blr
+
 END (__strnlen)
 weak_alias (__strnlen, strnlen)
 libc_hidden_builtin_def (strnlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/setjmp-common.S b/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
index 60b0026fa..3fb65b5f7 100644
--- a/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc32/setjmp-common.S
@@ -24,6 +24,11 @@
 # include <jmpbuf-offsets.h>
 #endif
 
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+# define SAVE_GP(N)	evstdd r##N,((JB_FPRS+((N)-14)*2)*4)(3)
+#else
+# define SAVE_GP(N)	stw r##N,((JB_GPRS+(N)-14)*4)(3)
+#endif
 
 ENTRY (__sigsetjmp)
 
@@ -35,31 +40,31 @@ ENTRY (__sigsetjmp)
 	stw  r1,(JB_GPR1*4)(3)
 #endif
 	mflr r0
-	stw  r14,((JB_GPRS+0)*4)(3)
+	SAVE_GP (14)
 #ifdef PTR_MANGLE
 	PTR_MANGLE2 (r0, r10)
 	li   r10,0
 #endif
 	stw  r0,(JB_LR*4)(3)
-	stw  r15,((JB_GPRS+1)*4)(3)
+	SAVE_GP (15)
 	mfcr r0
-	stw  r16,((JB_GPRS+2)*4)(3)
+	SAVE_GP (16)
 	stw  r0,(JB_CR*4)(3)
-	stw  r17,((JB_GPRS+3)*4)(3)
-	stw  r18,((JB_GPRS+4)*4)(3)
-	stw  r19,((JB_GPRS+5)*4)(3)
-	stw  r20,((JB_GPRS+6)*4)(3)
-	stw  r21,((JB_GPRS+7)*4)(3)
-	stw  r22,((JB_GPRS+8)*4)(3)
-	stw  r23,((JB_GPRS+9)*4)(3)
-	stw  r24,((JB_GPRS+10)*4)(3)
-	stw  r25,((JB_GPRS+11)*4)(3)
-	stw  r26,((JB_GPRS+12)*4)(3)
-	stw  r27,((JB_GPRS+13)*4)(3)
-	stw  r28,((JB_GPRS+14)*4)(3)
-	stw  r29,((JB_GPRS+15)*4)(3)
-	stw  r30,((JB_GPRS+16)*4)(3)
-	stw  r31,((JB_GPRS+17)*4)(3)
+	SAVE_GP (17)
+	SAVE_GP (18)
+	SAVE_GP (19)
+	SAVE_GP (20)
+	SAVE_GP (21)
+	SAVE_GP (22)
+	SAVE_GP (23)
+	SAVE_GP (24)
+	SAVE_GP (25)
+	SAVE_GP (26)
+	SAVE_GP (27)
+	SAVE_GP (28)
+	SAVE_GP (29)
+	SAVE_GP (30)
+	SAVE_GP (31)
 #if defined NOT_IN_libc && defined IS_IN_rtld
 	li   r3,0
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc32/setjmp.S b/libc/sysdeps/powerpc/powerpc32/setjmp.S
index 8a8cf0d6e..49b64ecf0 100644
--- a/libc/sysdeps/powerpc/powerpc32/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/setjmp.S
@@ -25,7 +25,7 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
 # define __sigsetjmp __vmx__sigsetjmp
 # define __sigjmp_save __vmx__sigjmp_save
 # include "setjmp-common.S"
@@ -35,7 +35,7 @@ default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
 #  undef __sigsetjmp
 #  undef __sigjmp_save
 #  undef JB_SIZE
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.0)
+compat_symbol (libc, __novmx__sigsetjmp, __sigsetjmp, GLIBC_2_0)
 #  define __sigsetjmp __novmx__sigsetjmp
 #  define __sigjmp_save __novmx__sigjmp_save
 #  include "setjmp-common.S"
diff --git a/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h b/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
index 839f6a4b9..b3d0af830 100644
--- a/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
+++ b/libc/sysdeps/powerpc/powerpc32/stackguard-macros.h
@@ -2,3 +2,13 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("lwz %0,-28680(2)" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+  ({												\
+     uintptr_t x;										\
+     asm ("lwz %0,%1(2)"									\
+	  : "=r" (x)										\
+	  : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t))	\
+         );											\
+     x;												\
+   })
diff --git a/libc/sysdeps/powerpc/powerpc32/stpcpy.S b/libc/sysdeps/powerpc/powerpc32/stpcpy.S
index 03c6dddc3..7e106e0e6 100644
--- a/libc/sysdeps/powerpc/powerpc32/stpcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/stpcpy.S
@@ -62,7 +62,22 @@ L(g2):	add	rTMP, rFEFE, rWORD
 
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stbu	rALT, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 1(rDEST)
+	blr
+#else
+	rlwinm.	rTMP, rALT, 8, 24, 31
 	stbu	rTMP, 4(rDEST)
 	beqlr-
 	rlwinm.	rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
 	beqlr-
 	stbu	rALT, 1(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strchr.S b/libc/sysdeps/powerpc/powerpc32/strchr.S
index c9952eecc..605056577 100644
--- a/libc/sysdeps/powerpc/powerpc32/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc32/strchr.S
@@ -36,6 +36,8 @@ ENTRY (strchr)
 #define rIGN	r10	/* number of bits we should ignore in the first word */
 #define rMASK	r11	/* mask with the bits to ignore set to 0 */
 #define rTMP3	r12
+#define rTMP4	rIGN
+#define rTMP5	rMASK
 
 
 	rlwimi	rCHR, rCHR, 8, 16, 23
@@ -49,64 +51,93 @@ ENTRY (strchr)
 	addi	r7F7F, r7F7F, 0x7f7f
 /* Test the first (partial?) word.  */
 	lwz	rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+	slw	rMASK, rMASK, rIGN
+#else
 	srw	rMASK, rMASK, rIGN
+#endif
 	orc	rWORD, rWORD, rMASK
 	add	rTMP1, rFEFE, rWORD
 	nor	rTMP2, r7F7F, rWORD
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2
 	xor	rTMP3, rCHR, rWORD
 	orc	rTMP3, rTMP3, rMASK
 	b	L(loopentry)
 
 /* The loop.  */
 
-L(loop):lwzu rWORD, 4(rSTR)
-	and.	rTMP1, rTMP1, rTMP2
+L(loop):
+	lwzu	rWORD, 4(rSTR)
+	and.	rTMP5, rTMP1, rTMP2
 /* Test for 0.	*/
-	add	rTMP1, rFEFE, rWORD
-	nor	rTMP2, r7F7F, rWORD
+	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
+	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
 	bne	L(foundit)
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
 /* Start test for the bytes we're looking for.  */
 	xor	rTMP3, rCHR, rWORD
 L(loopentry):
 	add	rTMP1, rFEFE, rTMP3
 	nor	rTMP2, r7F7F, rTMP3
 	beq	L(loop)
+
 /* There is a zero byte in the word, but may also be a matching byte (either
    before or after the zero byte).  In fact, we may be looking for a
-   zero byte, in which case we return a match.  We guess that this hasn't
-   happened, though.  */
-L(missed):
-	and.	rTMP1, rTMP1, rTMP2
+   zero byte, in which case we return a match.  */
+	and.	rTMP5, rTMP1, rTMP2
 	li	rRTN, 0
 	beqlr
-/* It did happen. Decide which one was first...
-   I'm not sure if this is actually faster than a sequence of
-   rotates, compares, and branches (we use it anyway because it's shorter).  */
+/* At this point:
+   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+   But there may be false matches in the next most significant byte from
+   a true match due to carries.  This means we need to recalculate the
+   matches using a longer method for big-endian.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzw	rCLZB, rTMP1
+	addi	rTMP2, rTMP4, -1
+	andc	rTMP2, rTMP2, rTMP4
+	cmplw	rTMP1, rTMP2
+	bgtlr
+	subfic	rCLZB, rCLZB, 32-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+   results from the loop for reuse here.  See strlen.S tail.  Similarly
+   one instruction could be pruned from L(foundit).  */
 	and	rFEFE, r7F7F, rWORD
-	or	rMASK, r7F7F, rWORD
+	or	rTMP5, r7F7F, rWORD
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rFEFE, rFEFE, r7F7F
 	add	rTMP1, rTMP1, r7F7F
-	nor	rWORD, rMASK, rFEFE
-	nor	rTMP2, rIGN, rTMP1
+	nor	rWORD, rTMP5, rFEFE
+	nor	rTMP2, rTMP4, rTMP1
+	cntlzw	rCLZB, rTMP2
 	cmplw	rWORD, rTMP2
 	bgtlr
-	cntlzw	rCLZB, rTMP2
+#endif
 	srwi	rCLZB, rCLZB, 3
 	add	rRTN, rSTR, rCLZB
 	blr
 
 L(foundit):
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzw	rCLZB, rTMP1
+	subfic	rCLZB, rCLZB, 32-7-32
+	srawi	rCLZB, rCLZB, 3
+#else
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rTMP1, rTMP1, r7F7F
-	nor	rTMP2, rIGN, rTMP1
+	nor	rTMP2, rTMP4, rTMP1
 	cntlzw	rCLZB, rTMP2
 	subi	rSTR, rSTR, 4
 	srwi	rCLZB, rCLZB, 3
+#endif
 	add	rRTN, rSTR, rCLZB
 	blr
 END (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc32/strcmp.S b/libc/sysdeps/powerpc/powerpc32/strcmp.S
index 297ca3c1b..91d60c905 100644
--- a/libc/sysdeps/powerpc/powerpc32/strcmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/strcmp.S
@@ -24,7 +24,7 @@
 
 EALIGN (strcmp, 4, 0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -34,6 +34,7 @@ EALIGN (strcmp, 4, 0)
 #define r7F7F	r8	/* constant 0x7f7f7f7f */
 #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
+#define rTMP	r11
 
 
 	or	rTMP, rSTR2, rSTR1
@@ -56,10 +57,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
 	and.	rTMP, rTMP, rNEG
 	cmpw	cr1, rWORD1, rWORD2
 	beq+	L(g0)
-L(endstring):
+
 /* OK. We've hit the end of the string. We need to be careful that
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	rlwimi	rTMP2, rTMP2, 1, 0, 30
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+#else
+L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
@@ -84,7 +120,7 @@ L(different):
 L(highbit):
 	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strcpy.S b/libc/sysdeps/powerpc/powerpc32/strcpy.S
index 4ae577dbb..e938cc42a 100644
--- a/libc/sysdeps/powerpc/powerpc32/strcpy.S
+++ b/libc/sysdeps/powerpc/powerpc32/strcpy.S
@@ -62,7 +62,22 @@ L(g2):	add	rTMP, rFEFE, rWORD
 
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stb	rALT, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stb	rTMP, 5(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stb	rTMP, 6(rDEST)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stb	rTMP, 7(rDEST)
+	blr
+#else
+	rlwinm.	rTMP, rALT, 8, 24, 31
 	stb	rTMP, 4(rDEST)
 	beqlr-
 	rlwinm.	rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
 	beqlr-
 	stb	rALT, 7(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc32/strlen.S b/libc/sysdeps/powerpc/powerpc32/strlen.S
index 9a6eafc38..a7153ed7a 100644
--- a/libc/sysdeps/powerpc/powerpc32/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc32/strlen.S
@@ -29,7 +29,12 @@
       1 is subtracted you get a value in the range 0x00-0x7f, none of which
       have their high bit set. The expression here is
       (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
-      there were no 0x00 bytes in the word.
+      there were no 0x00 bytes in the word.  You get 0x80 in bytes that
+      match, but possibly false 0x80 matches in the next more significant
+      byte to a true match due to carries.  For little-endian this is
+      of no consequence since the least significant match is the one
+      we're interested in, but big-endian needs method 2 to find which
+      byte matches.
 
    2) Given a word 'x', we can test to see _which_ byte was zero by
       calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -72,7 +77,7 @@
 
 ENTRY (strlen)
 
-#define rTMP1	r0
+#define rTMP4	r0
 #define rRTN	r3	/* incoming STR arg, outgoing result */
 #define rSTR	r4	/* current string position */
 #define rPADN	r5	/* number of padding bits we prepend to the
@@ -82,9 +87,9 @@ ENTRY (strlen)
 #define rWORD1	r8	/* current string word */
 #define rWORD2	r9	/* next string word */
 #define rMASK	r9	/* mask for first string word */
-#define rTMP2	r10
-#define rTMP3	r11
-#define rTMP4	r12
+#define rTMP1	r10
+#define rTMP2	r11
+#define rTMP3	r12
 
 
 	clrrwi	rSTR, rRTN, 2
@@ -93,15 +98,20 @@ ENTRY (strlen)
 	lwz	rWORD1, 0(rSTR)
 	li	rMASK, -1
 	addi	r7F7F, r7F7F, 0x7f7f
-/* That's the setup done, now do the first pair of words.
-   We make an exception and use method (2) on the first two words, to reduce
-   overhead.  */
+/* We use method (2) on the first two words, because rFEFE isn't
+   required which reduces setup overhead.  Also gives a faster return
+   for small strings on big-endian due to needing to recalculate with
+   method (2) anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	slw	rMASK, rMASK, rPADN
+#else
 	srw	rMASK, rMASK, rPADN
+#endif
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	add	rTMP1, rTMP1, r7F7F
-	nor	rTMP1, rTMP2, rTMP1
-	and.	rWORD1, rTMP1, rMASK
+	nor	rTMP3, rTMP2, rTMP1
+	and.	rTMP3, rTMP3, rMASK
 	mtcrf	0x01, rRTN
 	bne	L(done0)
 	lis	rFEFE, -0x101
@@ -110,11 +120,12 @@ ENTRY (strlen)
 	bt	29, L(loop)
 
 /* Handle second word of pair.  */
+/* Perhaps use method (1) here for little-endian, saving one instruction?  */
 	lwzu	rWORD1, 4(rSTR)
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	add	rTMP1, rTMP1, r7F7F
-	nor.	rWORD1, rTMP2, rTMP1
+	nor.	rTMP3, rTMP2, rTMP1
 	bne	L(done0)
 
 /* The loop.  */
@@ -128,28 +139,52 @@ L(loop):
 	add	rTMP3, rFEFE, rWORD2
 	nor	rTMP4, r7F7F, rWORD2
 	bne	L(done1)
-	and.	rTMP1, rTMP3, rTMP4
+	and.	rTMP3, rTMP3, rTMP4
 	beq	L(loop)
 
+#ifndef __LITTLE_ENDIAN__
 	and	rTMP1, r7F7F, rWORD2
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP4, rTMP1
+	andc	rTMP3, rTMP4, rTMP1
 	b	L(done0)
 
 L(done1):
 	and	rTMP1, r7F7F, rWORD1
 	subi	rSTR, rSTR, 4
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP2, rTMP1
+	andc	rTMP3, rTMP2, rTMP1
 
 /* When we get to here, rSTR points to the first word in the string that
-   contains a zero byte, and the most significant set bit in rWORD1 is in that
-   byte.  */
+   contains a zero byte, and rTMP3 has 0x80 for bytes that are zero,
+   and 0x00 otherwise.  */
 L(done0):
-	cntlzw	rTMP3, rWORD1
+	cntlzw	rTMP3, rTMP3
 	subf	rTMP1, rRTN, rSTR
 	srwi	rTMP3, rTMP3, 3
 	add	rRTN, rTMP1, rTMP3
 	blr
+#else
+
+L(done0):
+	addi	rTMP1, rTMP3, -1	/* Form a mask from trailing zeros.  */
+	andc	rTMP1, rTMP1, rTMP3
+	cntlzw	rTMP1, rTMP1		/* Count bits not in the mask.  */
+	subf	rTMP3, rRTN, rSTR
+	subfic	rTMP1, rTMP1, 32-7
+	srwi	rTMP1, rTMP1, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+
+L(done1):
+	addi	rTMP3, rTMP1, -1
+	andc	rTMP3, rTMP3, rTMP1
+	cntlzw	rTMP3, rTMP3
+	subf	rTMP1, rRTN, rSTR
+	subfic	rTMP3, rTMP3, 32-7-32
+	srawi	rTMP3, rTMP3, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+#endif
+
 END (strlen)
 libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc32/strncmp.S b/libc/sysdeps/powerpc/powerpc32/strncmp.S
index fa345d293..e36a160a8 100644
--- a/libc/sysdeps/powerpc/powerpc32/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc32/strncmp.S
@@ -24,7 +24,7 @@
 
 EALIGN (strncmp, 4, 0)
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,6 +35,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -73,12 +74,45 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	slwi	rTMP, rTMP, 1
+	addi    rTMP2, rTMP, -1
+	andc    rTMP2, rTMP2, rTMP
+	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+L(different):
+	lwz	rWORD1, -4(rSTR1)
+	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
+	rlwinm	rTMP, rWORD1, 8, 0xffffffff
+	rlwimi	rTMP2, rWORD2, 24, 0, 7
+	rlwimi	rTMP, rWORD1, 24, 0, 7
+	rlwimi	rTMP2, rWORD2, 24, 16, 23
+	rlwimi	rTMP, rWORD1, 24, 16, 23
+	xor.	rBITDIF, rTMP, rTMP2
+	sub	rRTN, rTMP, rTMP2
+	bgelr+
+	ori	rRTN, rTMP2, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzw	rBITDIF, rBITDIF
@@ -86,28 +120,20 @@ L(endstring):
 	addi	rNEG, rNEG, 7
 	cmpw	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
-	blt-	cr1, L(equal)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+	cr1
 L(equal):
 	li	rRTN, 0
 	blr
 
 L(different):
-	lwzu	rWORD1, -4(rSTR1)
+	lwz	rWORD1, -4(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
-	blt-	L(highbit)
-	srawi	rRTN, rRTN, 31
-	ori	rRTN, rRTN, 1
-	blr
+	bgelr+
 L(highbit):
-	srwi	rWORD2, rWORD2, 24
-	srwi	rWORD1, rWORD1, 24
-	sub	rRTN, rWORD1, rWORD2
+	ori	rRTN, rWORD2, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
index 70c370439..4f1e3c88d 100644
--- a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S
@@ -57,7 +57,7 @@ ENTRY (__longjmp)
 	beq	L(no_vmx)
 	la	r5,((JB_VRS)*8)(3)
 	andi.	r6,r5,0xf
-	lwz	r0,((JB_VRSAVE)*8)(3)
+	lwz	r0,((JB_VRSAVE)*8)(3)	/* 32-bit VRSAVE.  */
 	mtspr	VRSAVE,r0
 	beq+	L(aligned_restore_vmx)
 	addi    r6,r5,16
@@ -153,7 +153,7 @@ L(no_vmx):
 	lfd fp21,((JB_FPRS+7)*8)(r3)
 	ld r22,((JB_GPRS+8)*8)(r3)
 	lfd fp22,((JB_FPRS+8)*8)(r3)
-	ld r0,(JB_CR*8)(r3)
+	lwz r0,((JB_CR*8)+4)(r3)	/* 32-bit CR.  */
 	ld r23,((JB_GPRS+9)*8)(r3)
 	lfd fp23,((JB_FPRS+9)*8)(r3)
 	ld r24,((JB_GPRS+10)*8)(r3)
diff --git a/libc/sysdeps/powerpc/powerpc64/dl-machine.h b/libc/sysdeps/powerpc/powerpc64/dl-machine.h
index 059fdafd5..18cf15738 100644
--- a/libc/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/libc/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -561,6 +561,12 @@ elf_machine_rela (struct link_map *map,
   Elf64_Addr *const reloc_addr = reloc_addr_arg;
   const int r_type = ELF64_R_TYPE (reloc->r_info);
   const Elf64_Sym *const refsym = sym;
+  union unaligned
+    {
+      uint16_t u2;
+      uint32_t u4;
+      uint64_t u8;
+    } __attribute__ ((__packed__));
 
   if (r_type == R_PPC64_RELATIVE)
     {
@@ -741,23 +747,11 @@ elf_machine_rela (struct link_map *map,
       return;
 
     case R_PPC64_UADDR64:
-      /* We are big-endian.  */
-      ((char *) reloc_addr_arg)[0] = (value >> 56) & 0xff;
-      ((char *) reloc_addr_arg)[1] = (value >> 48) & 0xff;
-      ((char *) reloc_addr_arg)[2] = (value >> 40) & 0xff;
-      ((char *) reloc_addr_arg)[3] = (value >> 32) & 0xff;
-      ((char *) reloc_addr_arg)[4] = (value >> 24) & 0xff;
-      ((char *) reloc_addr_arg)[5] = (value >> 16) & 0xff;
-      ((char *) reloc_addr_arg)[6] = (value >> 8) & 0xff;
-      ((char *) reloc_addr_arg)[7] = (value >> 0) & 0xff;
+      ((union unaligned *) reloc_addr)->u8 = value;
       return;
 
     case R_PPC64_UADDR32:
-      /* We are big-endian.  */
-      ((char *) reloc_addr_arg)[0] = (value >> 24) & 0xff;
-      ((char *) reloc_addr_arg)[1] = (value >> 16) & 0xff;
-      ((char *) reloc_addr_arg)[2] = (value >> 8) & 0xff;
-      ((char *) reloc_addr_arg)[3] = (value >> 0) & 0xff;
+      ((union unaligned *) reloc_addr)->u4 = value;
       return;
 
     case R_PPC64_ADDR32:
@@ -781,10 +775,8 @@ elf_machine_rela (struct link_map *map,
     case R_PPC64_UADDR16:
       if (dont_expect ((value + 0x8000) >= 0x10000))
 	_dl_reloc_overflow (map, "R_PPC64_UADDR16", reloc_addr, refsym);
-      /* We are big-endian.  */
-      ((char *) reloc_addr_arg)[0] = (value >> 8) & 0xff;
-      ((char *) reloc_addr_arg)[1] = (value >> 0) & 0xff;
-      break;
+      ((union unaligned *) reloc_addr)->u2 = value;
+      return;
 
     case R_PPC64_ADDR16_DS:
       if (dont_expect ((value + 0x8000) >= 0x10000 || (value & 3) != 0))
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
index 801af5d0a..45f71d7ac 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__ceilf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
index a0a22e7eb..e85b820b2 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__floorf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
index 876707c76..b1a2b8cd6 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_nearbyintf.S
@@ -26,8 +26,10 @@
 /* float [fp1] nearbyintf(float [fp1]) */
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__nearbyintf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index cb28ec748..188771742 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 EALIGN (__rintf, 4, 0)
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
index 980a77bde..4f2c85163 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
@@ -19,10 +19,12 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
 .LC1:	/* 0.5 */
-	.tc FD_3f000000_0[TC],0x3f00000000000000
+	.long 0x3f000000
+
 	.section	".text"
 
 /* float [fp1] roundf  (float x [fp1])
diff --git a/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
index 5ea5f3d04..b8fd05031 100644
--- a/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
+++ b/libc/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
@@ -19,8 +19,10 @@
 #include <sysdep.h>
 
 	.section	".toc","aw"
+	.p2align 3
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.long 0x4b000000
+	.long 0x0
 	.section	".text"
 
 /* float [fp1] truncf (float x [fp1])
diff --git a/libc/sysdeps/powerpc/powerpc64/memcpy.S b/libc/sysdeps/powerpc/powerpc64/memcpy.S
index b8c4cc8b1..5fc7401c9 100644
--- a/libc/sysdeps/powerpc/powerpc64/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/memcpy.S
@@ -212,15 +212,28 @@ EALIGN (memcpy, 5, 0)
     blt   cr6,5f
     srdi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmpldi	cr1,10,16
@@ -328,7 +341,11 @@ EALIGN (memcpy, 5, 0)
     ld    7,8(5)
     subfic  9,10,64
     beq   2f
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+#else
     sld   0,6,10
+#endif
     cmpldi  11,1
     mr    6,7
     addi  4,4,-8
@@ -336,15 +353,25 @@ EALIGN (memcpy, 5, 0)
     b     1f
 2:  addi  5,5,8
     .align  4
+#ifdef __LITTLE_ENDIAN__
+0:  srd   0,6,10
+    sld   8,7,9
+#else
 0:  sld   0,6,10
     srd   8,7,9
+#endif
     cmpldi  11,2
     ld    6,8(5)
     or    0,0,8
     addi  11,11,-2
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,7,10
+1:  sld   8,6,9
+#else
     sld   0,7,10
 1:  srd   8,6,9
+#endif
     or    0,0,8
     beq   8f
     ld    7,16(5)
diff --git a/libc/sysdeps/powerpc/powerpc64/memset.S b/libc/sysdeps/powerpc/powerpc64/memset.S
index 6acf149c8..1027a592c 100644
--- a/libc/sysdeps/powerpc/powerpc64/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/memset.S
@@ -55,14 +55,14 @@ L(_memset):
 
 /* Align to doubleword boundary.  */
 	cmpldi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned2)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 8
 	cror	28,30,31		/* Detect odd word aligned.  */
 	add	rMEMP, rMEMP, rALIGN
 	sub	rLEN, rLEN, rALIGN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	bt	29, L(g4)
 /* Process the even word of doubleword.  */
 	bf+	31, L(g2)
@@ -84,14 +84,14 @@ L(g0):
 
 /* Handle the case of size < 31.  */
 L(aligned2):
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 L(aligned):
 	mtcrf	0x01, rLEN
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x18
 	subfic	rALIGN, rALIGN, 0x20
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word. */
 	beq	L(caligned)
 	mtcrf	0x01, rALIGN
 	add	rMEMP, rMEMP, rALIGN
@@ -212,7 +212,7 @@ L(le4):
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word.  */
 	cmpldi	cr1, rLEN, 16
 L(medium_tail2):
 	add	rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
index 69caedc9f..80d67c9aa 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S
@@ -1,4 +1,4 @@
-/* Optimized strcmp implementation for PowerPC64.
+/* Optimized memcmp implementation for PowerPC64.
    Copyright (C) 2003-2013 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -18,13 +18,14 @@
 
 #include <sysdep.h>
 
-/* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
+/* int [r3] memcmp (const char *s1 [r3],
+		    const char *s2 [r4],
+		    size_t size [r5])  */
 
 	.machine power4
 EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,107 +36,127 @@ EALIGN (memcmp, 4, 0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP, rSTR2, rSTR1
+	xor	r0, rSTR2, rSTR1
 	cmpldi	cr6, rN, 0
 	cmpldi	cr1, rN, 12
-	clrldi.	rTMP, rTMP, 61
-	clrldi	rBITDIF, rSTR1, 61
-	cmpldi	cr5, rBITDIF, 0
+	clrldi.	r0, r0, 61
+	clrldi	r12, rSTR1, 61
+	cmpldi	cr5, r12, 0
 	beq-	cr6, L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
 /* If less than 8 bytes or not aligned, use the unaligned
    byte loop.  */
 	blt	cr1, L(bytealigned)
-	std	rWORD8,-8(r1)
-	cfi_offset(rWORD8,-8)
-	std	rWORD7,-16(r1)
-	cfi_offset(rWORD7,-16)
+	std	rWORD8, -8(r1)
+	cfi_offset(rWORD8, -8)
+	std	rWORD7, -16(r1)
+	cfi_offset(rWORD7, -16)
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already double word
-   aligned and can perform the DWaligned loop.
+   of r12 to 0.  If r12 == 0 then we are already double word
+   aligned and can perform the DW aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
-   yet DW).  So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   yet DW).  So we force the string addresses to the next lower DW
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
-   normal (DWaligned) compare loop, starting at the second double word,
+   normal (DW aligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
-   correct and the first DW (shifted) is in the expected resister pair.  */
-	.align 4
+   versioning for the first DW. This ensures that the loop count is
+   correct and the first DW (shifted) is in the expected register pair.  */
+	.align	4
 L(samealignment):
 	clrrdi	rSTR1, rSTR1, 3
 	clrrdi	rSTR2, rSTR2, 3
 	beq	cr5, L(DWaligned)
-	add	rN, rN, rBITDIF
-	sldi	r11, rBITDIF, 3
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 0(rSTR1)
 	ld	rWORD2, 0(rSTR2)
-	cmpldi	cr1, rBITDIF, 16
+#endif
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
 	clrldi	rN, rN, 61
 	beq	L(dPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(dPs3)
 	beq	cr1, L(dPs2)
 
 /* Remainder is 8 */
-	.align 3
+	.align	3
 L(dsP1):
-	sld	rWORD5, rWORD1, r11
-	sld	rWORD6, rWORD2, r11
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
 	cmpld	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(dPs2):
-	sld	rWORD5, rWORD1, r11
-	sld	rWORD6, rWORD2, r11
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
 	cmpld	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 8(rSTR1)
 	ld	rWORD8, 8(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(dPs3):
-	sld	rWORD3, rWORD1, r11
-	sld	rWORD4, rWORD2, r11
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD2, rWORD6
 	cmpld	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(dPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	sld	rWORD1, rWORD1, r11
-	sld	rWORD2, rWORD2, r11
-	cmpld	cr0, rWORD1, rWORD2
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD2, rWORD6
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are double word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(DWaligned):
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	cmpldi	cr1, rBITDIF, 16
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	srdi	r0, rN, 5	/* Divide by 32 */
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
 	clrldi	rN, rN, 61
 	beq	L(dP4)
@@ -143,174 +164,343 @@ L(DWaligned):
 	beq	cr1, L(dP2)
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(dP1):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 0(rSTR1)
 	ld	rWORD6, 0(rSTR2)
+#endif
 	cmpld	cr5, rWORD5, rWORD6
 	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP1e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
-	bne	cr5, L(dLcr5)
-	bne	cr0, L(dLcr0)
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
 
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 32(rSTR1)
 	ldu	rWORD8, 32(rSTR2)
+#endif
 	bne	cr1, L(dLcr1)
 	cmpld	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
 	bne	cr6, L(dLcr6)
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	.align 3
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	.align	3
 L(dP1x):
 	sldi.	r12, rN, 3
-	bne	cr5, L(dLcr5)
+	bne	cr5, L(dLcr5x)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(dP2):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 0(rSTR1)
 	ld	rWORD6, 0(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 8(rSTR1)
 	ld	rWORD8, 8(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 L(dP2e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 16(rSTR1)
 	ld	rWORD2, 16(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 24(rSTR1)
 	ld	rWORD4, 24(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	bne	cr6, L(dLcr6)
 	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP2x):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 8(rSTR1)
 	ld	rWORD4, 8(rSTR2)
-	cmpld	cr5, rWORD3, rWORD4
+#endif
+	cmpld	cr1, rWORD3, rWORD4
 	sldi.	r12, rN, 3
-	bne	cr6, L(dLcr6)
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
-	bne	cr5, L(dLcr5)
+#endif
+	bne	cr1, L(dLcr1x)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(dP3):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 0(rSTR1)
 	ld	rWORD4, 0(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 L(dP3e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 8(rSTR1)
 	ld	rWORD6, 8(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 16(rSTR1)
 	ld	rWORD8, 16(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 24(rSTR1)
 	ld	rWORD2, 24(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
+#endif
 	bne	cr1, L(dLcr1)
 	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
-	.align 4
+	.align	4
 L(dP3x):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 16(rSTR1)
 	ld	rWORD2, 16(rSTR2)
-	cmpld	cr5, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	sldi.	r12, rN, 3
-	bne	cr1, L(dLcr1)
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
-	bne	cr6, L(dLcr6)
+#endif
+	bne	cr6, L(dLcr6x)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
-	bne	cr5, L(dLcr5)
+	bne	cr7, L(dLcr7x)
 	bne	L(d00)
 	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(dP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 0(rSTR1)
 	ld	rWORD2, 0(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP4e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 8(rSTR1)
 	ld	rWORD4, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 16(rSTR1)
 	ld	rWORD6, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 24(rSTR1)
 	ldu	rWORD8, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(dLoop):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	bne	cr6, L(dLcr6)
 L(dLoop1):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr5, L(dLcr5)
 L(dLoop2):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(dLoop3):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 32(rSTR1)
 	ldu	rWORD8, 32(rSTR2)
+#endif
 	bne-	cr1, L(dLcr1)
-	cmpld	cr0, rWORD1, rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	bdnz+	L(dLoop)
 
 L(dL4):
@@ -320,7 +510,7 @@ L(dL4):
 	bne	cr5, L(dLcr5)
 	cmpld	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0, L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
 	bne	cr1, L(dLcr1)
 L(d24):
@@ -329,60 +519,74 @@ L(d14):
 	sldi.	r12, rN, 3
 	bne	cr5, L(dLcr5)
 L(d04):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  Since
    we are aligned it is safe to load the whole double word, and use
    shift right double to eliminate bits beyond the compare length.  */
 L(d00):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	srd	rWORD1, rWORD1, rN
 	srd	rWORD2, rWORD2, rN
-	cmpld	cr5, rWORD1, rWORD2
- 	bne	cr5, L(dLcr5x)
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr7, L(dLcr7x)
 	li	rRTN, 0
 	blr
-	.align 4
-L(dLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+
+	.align	4
+L(dLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr7x):
 	li	rRTN, 1
-	bgtlr	cr0
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
 L(dLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr1x):
 	li	rRTN, 1
 	bgtlr	cr1
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
 L(dLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr6x):
 	li	rRTN, 1
 	bgtlr	cr6
 	li	rRTN, -1
 	blr
-	.align 4
+	.align	4
 L(dLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dLcr5x):
 	li	rRTN, 1
 	bgtlr	cr5
 	li	rRTN, -1
 	blr
 
-	.align 4
+	.align	4
 L(bytealigned):
-	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	rN	/* Power4 wants mtctr 1st in dispatch group */
+#if 0
+/* Huh?  We've already branched on cr6!  */
 	beq-	cr6, L(zeroLength)
+#endif
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
    to avoid pipe delays.  The dependent instruction latencies (load to
@@ -397,7 +601,7 @@ L(bytealigned):
 	lbz	rWORD1, 0(rSTR1)
 	lbz	rWORD2, 0(rSTR2)
 	bdz-	L(b11)
-	cmpld	cr0, rWORD1, rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	lbz	rWORD3, 1(rSTR1)
 	lbz	rWORD4, 1(rSTR2)
 	bdz-	L(b12)
@@ -405,11 +609,11 @@ L(bytealigned):
 	lbzu	rWORD5, 2(rSTR1)
 	lbzu	rWORD6, 2(rSTR2)
 	bdz-	L(b13)
-	.align 4
+	.align	4
 L(bLoop):
 	lbzu	rWORD1, 1(rSTR1)
 	lbzu	rWORD2, 1(rSTR2)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 
 	cmpld	cr6, rWORD5, rWORD6
 	bdz-	L(b3i)
@@ -418,7 +622,7 @@ L(bLoop):
 	lbzu	rWORD4, 1(rSTR2)
 	bne-	cr1, L(bLcr1)
 
-	cmpld	cr0, rWORD1, rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	bdz-	L(b2i)
 
 	lbzu	rWORD5, 1(rSTR1)
@@ -435,23 +639,23 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	bne-	cr1, L(bLcr1)
 	b	L(bx56)
-	.align 4
+	.align	4
 L(b2i):
 	bne-	cr6, L(bLcr6)
-	bne-	cr0, L(bLcr0)
+	bne-	cr7, L(bLcr7)
 	b	L(bx34)
-	.align 4
+	.align	4
 L(b3i):
 	bne-	cr1, L(bLcr1)
 	bne-	cr6, L(bLcr6)
 	b	L(bx12)
-	.align 4
-L(bLcr0):
+	.align	4
+L(bLcr7):
 	li	rRTN, 1
-	bgtlr	cr0
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
 L(bLcr1):
@@ -466,14 +670,14 @@ L(bLcr6):
 	blr
 
 L(b13):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 	bne-	cr1, L(bx34)
 L(bx56):
 	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne-	cr0, L(bx12)
+	bne-	cr7, L(bx12)
 L(bx34):
 	sub	rRTN, rWORD3, rWORD4
 	blr
@@ -481,101 +685,106 @@ L(b11):
 L(bx12):
 	sub	rRTN, rWORD1, rWORD2
 	blr
-	.align 4
-L(zeroLengthReturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	.align	4
 L(zeroLength):
 	li	rRTN, 0
 	blr
 
-	.align 4
+	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is double word
+   of r12 to 0.  If r12 == 0 then rStr1 is double word
    aligned and can perform the DWunaligned loop.
 
    Otherwise we know that rSTR1 is not already DW aligned yet.
    So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
    normal (DWaligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
+   versioning for the first DW. This ensures that the loop count is
    correct and the first DW (shifted) is in the expected resister pair.  */
-#define rSHL	r29	/* Unaligned shift left count.  */
-#define rSHR	r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rSHL		r29	/* Unaligned shift left count.  */
+#define rSHR		r28	/* Unaligned shift right count.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
 L(unaligned):
-	std	r29,-24(r1)
-	cfi_offset(r29,-24)
+	std	rSHL, -24(r1)
+	cfi_offset(rSHL, -24)
 	clrldi	rSHL, rSTR2, 61
 	beq-	cr6, L(duzeroLength)
-	std	r28,-32(r1)
-	cfi_offset(r28,-32)
+	std	rSHR, -32(r1)
+	cfi_offset(rSHR, -32)
 	beq	cr5, L(DWunaligned)
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
-/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 DW.  */
-	sub	r27, rSTR2, rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the DW before that DW that contains
    the actual start of rSTR2.  */
 	clrrdi	rSTR2, rSTR2, 3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
-/* Compute the left/right shift counts for the unalign rSTR2,
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
+/* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (DW aligned) start of rSTR1.  */
-	clrldi	rSHL, r27, 61
+	clrldi	rSHL, rWORD8_SHIFT, 61
 	clrrdi	rSTR1, rSTR1, 3
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
 	sldi	rSHL, rSHL, 3
-	cmpld	cr5, r27, rSTR2
-	add	rN, rN, rBITDIF
-	sldi	r11, rBITDIF, 3
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
+	cmpld	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
 	subfic	rSHR, rSHL, 64
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
 /* We normally need to load 2 DWs to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a DW where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
 	li	rWORD8, 0
 	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD8, 0(rSTR2)
-	la	rSTR2, 8(rSTR2)
+	addi	rSTR2, rSTR2, 8
+#endif
 	sld	rWORD8, rWORD8, rSHL
 
 L(dus0):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 0(rSTR1)
 	ld	rWORD2, 0(rSTR2)
-	cmpldi	cr1, rBITDIF, 16
+#endif
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
-	srd	rG, rWORD2, rSHR
+	srd	r12, rWORD2, rSHR
 	clrldi	rN, rN, 61
 	beq	L(duPs4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
 	bgt	cr1, L(duPs3)
 	beq	cr1, L(duPs2)
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(dusP1):
-	sld	rB, rWORD2, rSHL
-	sld	rWORD7, rWORD1, r11
-	sld	rWORD8, rWORD8, r11
+	sld	rWORD8_SHIFT, rWORD2, rSHL
+	sld	rWORD7, rWORD1, rWORD6
+	sld	rWORD8, rWORD8, rWORD6
 	bge	cr7, L(duP1e)
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
@@ -585,95 +794,133 @@ L(dusP1):
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(duPs2):
-	sld	rH, rWORD2, rSHL
-	sld	rWORD5, rWORD1, r11
-	sld	rWORD6, rWORD8, r11
+	sld	rWORD6_SHIFT, rWORD2, rSHL
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(duPs3):
-	sld	rF, rWORD2, rSHL
-	sld	rWORD3, rWORD1, r11
-	sld	rWORD4, rWORD8, r11
+	sld	rWORD4_SHIFT, rWORD2, rSHL
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(duPs4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	or	rWORD8, rG, rWORD8
-	sld	rD, rWORD2, rSHL
-	sld	rWORD1, rWORD1, r11
-	sld	rWORD2, rWORD8, r11
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	or	rWORD8, r12, rWORD8
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is double word aligned and the
    compare length is at least 8 bytes.  */
-	.align 4
+	.align	4
 L(DWunaligned):
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
 	clrrdi	rSTR2, rSTR2, 3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
-	srdi	rTMP, rN, 5	/* Divide by 32 */
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
-	andi.	rBITDIF, rN, 24	/* Get the DW remainder */
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
+	srdi	r0, rN, 5	/* Divide by 32 */
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
 	sldi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD6, 0(rSTR2)
 	ldu	rWORD8, 8(rSTR2)
-	cmpldi	cr1, rBITDIF, 16
+#endif
+	cmpldi	cr1, r12, 16
 	cmpldi	cr7, rN, 32
 	clrldi	rN, rN, 61
 	subfic	rSHR, rSHL, 64
-	sld	rH, rWORD6, rSHL
+	sld	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
 	bgt	cr1, L(duP3)
 	beq	cr1, L(duP2)
 
 /* Remainder is 8 */
-	.align 4
+	.align	4
 L(duP1):
-	srd	rG, rWORD8, rSHR
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD7, 0(rSTR1)
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+#endif
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP1x)
 L(duP1e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
 	bne	cr5, L(duLcr5)
-	or	rWORD4, rC, rD
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	bne	cr0, L(duLcr0)
-	or	rWORD6, rE, rF
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
 	cmpld	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
-	.align 4
+	.align	4
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
@@ -683,186 +930,321 @@ L(duP1x):
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
-	.align 4
+	.align	4
 L(duP2):
-	srd	rE, rWORD8, rSHR
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD5, 0(rSTR1)
-	or	rWORD6, rE, rH
-	sld	rH, rWORD8, rSHL
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	sld	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 8(rSTR1)
 	ld	rWORD8, 8(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 16(rSTR1)
 	ld	rWORD2, 16(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 24(rSTR1)
 	ld	rWORD4, 24(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	bne	cr5, L(duLcr5)
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
-	.align 4
+	.align	4
 L(duP2x):
 	cmpld	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 8
 	addi	rSTR2, rSTR2, 8
+#endif
 	bne	cr6, L(duLcr6)
 	sldi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 24 */
-	.align 4
+	.align	4
 L(duP3):
-	srd	rC, rWORD8, rSHR
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD3, 0(rSTR1)
-	sld	rF, rWORD8, rSHL
-	or	rWORD4, rC, rH
+#endif
+	sld	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 8(rSTR1)
 	ld	rWORD6, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD7, 16(rSTR1)
 	ld	rWORD8, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 24(rSTR1)
 	ld	rWORD2, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
-	.align 4
+	.align	4
 L(duP3x):
+#ifndef __LITTLE_ENDIAN__
 	addi	rSTR1, rSTR1, 16
 	addi	rSTR2, rSTR2, 16
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
 	bne	cr6, L(duLcr6)
 	sldi.	rN, rN, 3
 	bne	cr5, L(duLcr5)
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 32, remainder is 0 */
-	.align 4
+	.align	4
 L(duP4):
-	mtctr   rTMP	/* Power4 wants mtctr 1st in dispatch group */
-	srd	rA, rWORD8, rSHR
+	mtctr	r0	/* Power4 wants mtctr 1st in dispatch group */
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
 	ld	rWORD1, 0(rSTR1)
-	sld	rD, rWORD8, rSHL
-	or	rWORD2, rA, rH
+#endif
+	sld	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 8(rSTR1)
 	ld	rWORD4, 8(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 16(rSTR1)
 	ld	rWORD6, 16(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
-	bne	cr0, L(duLcr0)
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 24(rSTR1)
 	ldu	rWORD8, 24(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr1, L(duLcr1)
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	cmpld	cr5, rWORD7, rWORD8
 	bdz-	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
-	.align 4
+	.align	4
 L(duLoop):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD1, 8(rSTR1)
 	ld	rWORD2, 8(rSTR2)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
-	srd	rA, rWORD2, rSHR
-	sld	rD, rWORD2, rSHL
-	or	rWORD2, rA, rB
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD3, 16(rSTR1)
 	ld	rWORD4, 16(rSTR2)
+#endif
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
-	srd	rC, rWORD4, rSHR
-	sld	rF, rWORD4, rSHL
-	or	rWORD4, rC, rD
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD5, 24(rSTR1)
 	ld	rWORD6, 24(rSTR2)
+#endif
 	cmpld	cr5, rWORD7, rWORD8
-	bne	cr0, L(duLcr0)
-	srd	rE, rWORD6, rSHR
-	sld	rH, rWORD6, rSHL
-	or	rWORD6, rE, rF
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
 	ldu	rWORD7, 32(rSTR1)
 	ldu	rWORD8, 32(rSTR2)
-	cmpld	cr0, rWORD1, rWORD2
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	bne-	cr1, L(duLcr1)
-	srd	rG, rWORD8, rSHR
-	sld	rB, rWORD8, rSHL
-	or	rWORD8, rG, rH
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz+	L(duLoop)
 
 L(duL4):
+#if 0
+/* Huh?  We've already branched on cr1!  */
 	bne	cr1, L(duLcr1)
+#endif
 	cmpld	cr1, rWORD3, rWORD4
 	bne	cr6, L(duLcr6)
 	cmpld	cr6, rWORD5, rWORD6
 	bne	cr5, L(duLcr5)
 	cmpld	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0, L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
 	bne	cr1, L(duLcr1)
 L(du24):
@@ -872,103 +1254,110 @@ L(du14):
 	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  We use
    shift right double to eliminate bits beyond the compare length.
-   This allows the use of double word subtract to compute the final
-   result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
+   rWORD8_SHIFT).  */
 	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA, 0
+	li	r0, 0
 	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
 	ld	rWORD2, 8(rSTR2)
-	srd	rA, rWORD2, rSHR
-	.align 4
+#endif
+	srd	r0, rWORD2, rSHR
+	.align	4
 L(dutrim):
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+#else
 	ld	rWORD1, 8(rSTR1)
-	ld	rWORD8,-8(r1)
+#endif
+	ld	rWORD8, -8(r1)
 	subfic	rN, rN, 64	/* Shift count is 64 - (rN * 8).  */
-	or	rWORD2, rA, rB
-	ld	rWORD7,-16(r1)
-	ld	r29,-24(r1)
+	or	rWORD2, r0, rWORD8_SHIFT
+	ld	rWORD7, -16(r1)
+	ld	rSHL, -24(r1)
 	srd	rWORD1, rWORD1, rN
 	srd	rWORD2, rWORD2, rN
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
+	ld	rSHR, -32(r1)
+	ld	rWORD8_SHIFT, -40(r1)
 	li	rRTN, 0
-	cmpld	cr0, rWORD1, rWORD2
-	ld	r26,-48(r1)
-	ld	r25,-56(r1)
- 	beq	cr0, L(dureturn24)
+	cmpld	cr7, rWORD1, rWORD2
+	ld	rWORD2_SHIFT, -48(r1)
+	ld	rWORD4_SHIFT, -56(r1)
+	beq	cr7, L(dureturn24)
 	li	rRTN, 1
-	ld	r24,-64(r1)
-	bgtlr	cr0
+	ld	rWORD6_SHIFT, -64(r1)
+	bgtlr	cr7
 	li	rRTN, -1
 	blr
-	.align 4
-L(duLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	.align	4
+L(duLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
-	bgt	cr0, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	bgt	cr7, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
 	bgt	cr1, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
 	bgt	cr6, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
-	.align 4
+	.align	4
 L(duLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	li	rRTN, 1
 	bgt	cr5, L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dureturn29):
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 L(dureturn27):
-	ld	r27,-40(r1)
+	ld	rWORD8_SHIFT, -40(r1)
 L(dureturn26):
-	ld	r26,-48(r1)
+	ld	rWORD2_SHIFT, -48(r1)
 L(dureturn25):
-	ld	r25,-56(r1)
+	ld	rWORD4_SHIFT, -56(r1)
 L(dureturn24):
-	ld	r24,-64(r1)
+	ld	rWORD6_SHIFT, -64(r1)
 	blr
 L(duzeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 END (memcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
index 4317c7e78..f9a7260dc 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S
@@ -214,15 +214,28 @@ EALIGN (memcpy, 5, 0)
     blt   cr6,5f
     srdi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmpldi	cr1,10,16
@@ -334,13 +347,23 @@ EALIGN (memcpy, 5, 0)
     bf      30,1f
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srd     0,6,10
+    sld     8,7,9
+#else
     sld     0,6,10
     srd     8,7,9
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srd     0,7,10
+    sld     8,6,9
+#else
     sld     0,7,10
     srd     8,6,9
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -349,8 +372,13 @@ EALIGN (memcpy, 5, 0)
     blt     cr6,8f  /* if total DWs = 3, then bypass loop */
     bf      31,4f
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srd     0,6,10
+    sld     8,7,9
+#else
     sld     0,6,10
     srd     8,7,9
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -361,8 +389,13 @@ EALIGN (memcpy, 5, 0)
     b       4f
     .align 4
 1:
+#ifdef __LITTLE_ENDIAN__
+    srd     0,6,10
+    sld     8,7,9
+#else
     sld     0,6,10
     srd     8,7,9
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,4f
@@ -373,23 +406,44 @@ EALIGN (memcpy, 5, 0)
     addi    4,4,8
     .align 4
 /* copy 32 bytes at a time */
-4:  sld   0,6,10
+4:
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+    sld   8,7,9
+#else
+    sld   0,6,10
     srd   8,7,9
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,7,10
+    sld   8,6,9
+#else
     sld   0,7,10
     srd   8,6,9
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+    sld   8,7,9
+#else
     sld   0,6,10
     srd   8,7,9
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srd   0,7,10
+    sld   8,6,9
+#else
     sld   0,7,10
     srd   8,6,9
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -399,8 +453,13 @@ EALIGN (memcpy, 5, 0)
     .align 4
 8:
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srd   0,6,10
+    sld   8,7,9
+#else
     sld   0,6,10
     srd   8,7,9
+#endif
     or    0,0,8
     std   0,0(4)
 3:
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memset.S b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
index dbecee8b9..ad0d38128 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -50,14 +50,14 @@ L(_memset):
 
 /* Align to doubleword boundary.  */
 	cmpldi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned2)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 8
 	cror	28,30,31		/* Detect odd word aligned.  */
 	add	rMEMP, rMEMP, rALIGN
 	sub	rLEN, rLEN, rALIGN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	bt	29, L(g4)
 /* Process the even word of doubleword.  */
 	bf+	31, L(g2)
@@ -79,14 +79,14 @@ L(g0):
 
 /* Handle the case of size < 31.  */
 L(aligned2):
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 L(aligned):
 	mtcrf	0x01, rLEN
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x18
 	subfic	rALIGN, rALIGN, 0x20
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word. */
 	beq	L(caligned)
 	mtcrf	0x01, rALIGN
 	add	rMEMP, rMEMP, rALIGN
@@ -146,24 +146,24 @@ L(zloopstart):
 L(getCacheAligned):
 	cmpldi	cr1,rLEN,32
 	andi.	rTMP,rMEMP,127
-	blt		cr1,L(handletail32)
-	beq		L(cacheAligned)
+	blt	cr1,L(handletail32)
+	beq	L(cacheAligned)
 	addi	rMEMP,rMEMP,32
 	addi	rLEN,rLEN,-32
-	std		rCHR,-32(rMEMP)
-	std		rCHR,-24(rMEMP)
-	std		rCHR,-16(rMEMP)
-	std		rCHR,-8(rMEMP)
-	b		L(getCacheAligned)
+	std	rCHR,-32(rMEMP)
+	std	rCHR,-24(rMEMP)
+	std	rCHR,-16(rMEMP)
+	std	rCHR,-8(rMEMP)
+	b	L(getCacheAligned)
 
 /* Now we are aligned to the cache line and can use dcbz.  */
 L(cacheAligned):
 	cmpld	cr1,rLEN,rCLS
-	blt		cr1,L(handletail32)
+	blt	cr1,L(handletail32)
 	dcbz	0,rMEMP
 	subf	rLEN,rCLS,rLEN
-	add		rMEMP,rMEMP,rCLS
-	b		L(cacheAligned)
+	add	rMEMP,rMEMP,rCLS
+	b	L(cacheAligned)
 
 /* We are here because the cache line size was set and was not 32-bytes
    and the remainder (rLEN) is less than the actual cache line size.
@@ -200,7 +200,7 @@ L(le4):
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word.  */
 	cmpldi	cr1, rLEN, 16
 L(medium_tail2):
 	add	rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
index 1276e16a5..5d136cfa2 100644
--- a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S
@@ -25,7 +25,7 @@
 EALIGN (strncmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -38,6 +38,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -79,12 +80,59 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzd	rBITDIF, rBITDIF
@@ -93,7 +141,7 @@ L(endstring):
 	cmpd	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
 	blt-	cr1, L(equal)
-	sradi	rRTN, rRTN, 63
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
 	ori	rRTN, rRTN, 1
 	blr
 L(equal):
@@ -101,7 +149,7 @@ L(equal):
 	blr
 
 L(different):
-	ldu	rWORD1, -8(rSTR1)
+	ld	rWORD1, -8(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
 	blt-	L(highbit)
@@ -109,11 +157,10 @@ L(different):
 	ori	rRTN, rRTN, 1
 	blr
 L(highbit):
-	srdi	rWORD2, rWORD2, 56
-	srdi	rWORD1, rWORD1, 56
-	sub	rRTN, rWORD1, rWORD2
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
index d6d242d29..e3f3d8a30 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S
@@ -400,15 +400,28 @@ L(das_tail2):
     blt   cr6,5f
     srdi  7,6,16
     bgt	  cr6,3f
+#ifdef __LITTLE_ENDIAN__
+    sth   7,0(3)
+#else
     sth   6,0(3)
+#endif
     b     7f
     .align  4
 3:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,24
+    stb   6,0(3)
+    sth   7,1(3)
+#else
     stb   7,0(3)
     sth   6,1(3)
+#endif
     b     7f
     .align  4
 5:
+#ifdef __LITTLE_ENDIAN__
+    rotlwi 6,6,8
+#endif
     stb   6,0(3)
 7:
     cmpldi	cr1,10,16
@@ -595,13 +608,24 @@ L(du1_do):
     bf      30,L(du1_1dw)
 
     /* there are at least two DWs to copy */
+    /* FIXME: can combine last shift and "or" into "rldimi" */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 8
+    sldi     8,7, 64-8
+#else
     sldi     0,6, 8
     srdi     8,7, 64-8
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 8
+    sldi     8,6, 64-8
+#else
     sldi     0,7, 8
     srdi     8,6, 64-8
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -610,8 +634,13 @@ L(du1_do):
     blt     cr6,L(du1_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du1_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 8
+    sldi     8,7, 64-8
+#else
     sldi     0,6, 8
     srdi     8,7, 64-8
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -622,8 +651,13 @@ L(du1_do):
     b       L(du1_loop)
     .align 4
 L(du1_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 8
+    sldi     8,7, 64-8
+#else
     sldi     0,6, 8
     srdi     8,7, 64-8
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du1_loop)
@@ -635,23 +669,43 @@ L(du1_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du1_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 8
+    sldi   8,7, 64-8
+#else
     sldi   0,6, 8
     srdi   8,7, 64-8
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 8
+    sldi   8,6, 64-8
+#else
     sldi   0,7, 8
     srdi   8,6, 64-8
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 8
+    sldi   8,7, 64-8
+#else
     sldi   0,6, 8
     srdi   8,7, 64-8
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 8
+    sldi   8,6, 64-8
+#else
     sldi   0,7, 8
     srdi   8,6, 64-8
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -661,8 +715,13 @@ L(du1_loop):
     .align 4
 L(du1_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 8
+    sldi   8,7, 64-8
+#else
     sldi   0,6, 8
     srdi   8,7, 64-8
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -672,13 +731,23 @@ L(du2_do):
     bf      30,L(du2_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 16
+    sldi     8,7, 64-16
+#else
     sldi     0,6, 16
     srdi     8,7, 64-16
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 16
+    sldi     8,6, 64-16
+#else
     sldi     0,7, 16
     srdi     8,6, 64-16
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -687,8 +756,13 @@ L(du2_do):
     blt     cr6,L(du2_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du2_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 16
+    sldi     8,7, 64-16
+#else
     sldi     0,6, 16
     srdi     8,7, 64-16
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -699,8 +773,13 @@ L(du2_do):
     b       L(du2_loop)
     .align 4
 L(du2_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 16
+    sldi     8,7, 64-16
+#else
     sldi     0,6, 16
     srdi     8,7, 64-16
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du2_loop)
@@ -712,23 +791,43 @@ L(du2_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du2_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 16
+    sldi   8,7, 64-16
+#else
     sldi   0,6, 16
     srdi   8,7, 64-16
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 16
+    sldi   8,6, 64-16
+#else
     sldi   0,7, 16
     srdi   8,6, 64-16
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 16
+    sldi   8,7, 64-16
+#else
     sldi   0,6, 16
     srdi   8,7, 64-16
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 16
+    sldi   8,6, 64-16
+#else
     sldi   0,7, 16
     srdi   8,6, 64-16
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -738,8 +837,13 @@ L(du2_loop):
     .align 4
 L(du2_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 16
+    sldi   8,7, 64-16
+#else
     sldi   0,6, 16
     srdi   8,7, 64-16
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -749,13 +853,23 @@ L(du3_do):
     bf      30,L(du3_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 24
+    sldi     8,7, 64-24
+#else
     sldi     0,6, 24
     srdi     8,7, 64-24
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 24
+    sldi     8,6, 64-24
+#else
     sldi     0,7, 24
     srdi     8,6, 64-24
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -764,8 +878,13 @@ L(du3_do):
     blt     cr6,L(du3_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du3_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 24
+    sldi     8,7, 64-24
+#else
     sldi     0,6, 24
     srdi     8,7, 64-24
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -776,8 +895,13 @@ L(du3_do):
     b       L(du3_loop)
     .align 4
 L(du3_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 24
+    sldi     8,7, 64-24
+#else
     sldi     0,6, 24
     srdi     8,7, 64-24
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du3_loop)
@@ -789,23 +913,43 @@ L(du3_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du3_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 24
+    sldi   8,7, 64-24
+#else
     sldi   0,6, 24
     srdi   8,7, 64-24
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 24
+    sldi   8,6, 64-24
+#else
     sldi   0,7, 24
     srdi   8,6, 64-24
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 24
+    sldi   8,7, 64-24
+#else
     sldi   0,6, 24
     srdi   8,7, 64-24
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 24
+    sldi   8,6, 64-24
+#else
     sldi   0,7, 24
     srdi   8,6, 64-24
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -815,8 +959,13 @@ L(du3_loop):
     .align 4
 L(du3_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 24
+    sldi   8,7, 64-24
+#else
     sldi   0,6, 24
     srdi   8,7, 64-24
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -832,13 +981,23 @@ L(du4_dox):
     bf      30,L(du4_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 32
+    sldi     8,7, 64-32
+#else
     sldi     0,6, 32
     srdi     8,7, 64-32
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 32
+    sldi     8,6, 64-32
+#else
     sldi     0,7, 32
     srdi     8,6, 64-32
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -847,8 +1006,13 @@ L(du4_dox):
     blt     cr6,L(du4_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du4_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 32
+    sldi     8,7, 64-32
+#else
     sldi     0,6, 32
     srdi     8,7, 64-32
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -859,8 +1023,13 @@ L(du4_dox):
     b       L(du4_loop)
     .align 4
 L(du4_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 32
+    sldi     8,7, 64-32
+#else
     sldi     0,6, 32
     srdi     8,7, 64-32
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du4_loop)
@@ -872,23 +1041,43 @@ L(du4_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du4_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 32
+    sldi   8,7, 64-32
+#else
     sldi   0,6, 32
     srdi   8,7, 64-32
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 32
+    sldi   8,6, 64-32
+#else
     sldi   0,7, 32
     srdi   8,6, 64-32
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 32
+    sldi   8,7, 64-32
+#else
     sldi   0,6, 32
     srdi   8,7, 64-32
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 32
+    sldi   8,6, 64-32
+#else
     sldi   0,7, 32
     srdi   8,6, 64-32
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -898,8 +1087,13 @@ L(du4_loop):
     .align 4
 L(du4_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 32
+    sldi   8,7, 64-32
+#else
     sldi   0,6, 32
     srdi   8,7, 64-32
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -909,13 +1103,23 @@ L(du5_do):
     bf      30,L(du5_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 40
+    sldi     8,7, 64-40
+#else
     sldi     0,6, 40
     srdi     8,7, 64-40
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 40
+    sldi     8,6, 64-40
+#else
     sldi     0,7, 40
     srdi     8,6, 64-40
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -924,8 +1128,13 @@ L(du5_do):
     blt     cr6,L(du5_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du5_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 40
+    sldi     8,7, 64-40
+#else
     sldi     0,6, 40
     srdi     8,7, 64-40
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -936,8 +1145,13 @@ L(du5_do):
     b       L(du5_loop)
     .align 4
 L(du5_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 40
+    sldi     8,7, 64-40
+#else
     sldi     0,6, 40
     srdi     8,7, 64-40
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du5_loop)
@@ -949,23 +1163,43 @@ L(du5_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du5_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 40
+    sldi   8,7, 64-40
+#else
     sldi   0,6, 40
     srdi   8,7, 64-40
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 40
+    sldi   8,6, 64-40
+#else
     sldi   0,7, 40
     srdi   8,6, 64-40
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 40
+    sldi   8,7, 64-40
+#else
     sldi   0,6, 40
     srdi   8,7, 64-40
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 40
+    sldi   8,6, 64-40
+#else
     sldi   0,7, 40
     srdi   8,6, 64-40
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -975,8 +1209,13 @@ L(du5_loop):
     .align 4
 L(du5_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 40
+    sldi   8,7, 64-40
+#else
     sldi   0,6, 40
     srdi   8,7, 64-40
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -986,13 +1225,23 @@ L(du6_do):
     bf      30,L(du6_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 48
+    sldi     8,7, 64-48
+#else
     sldi     0,6, 48
     srdi     8,7, 64-48
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 48
+    sldi     8,6, 64-48
+#else
     sldi     0,7, 48
     srdi     8,6, 64-48
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -1001,8 +1250,13 @@ L(du6_do):
     blt     cr6,L(du6_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du6_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 48
+    sldi     8,7, 64-48
+#else
     sldi     0,6, 48
     srdi     8,7, 64-48
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -1013,8 +1267,13 @@ L(du6_do):
     b       L(du6_loop)
     .align 4
 L(du6_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 48
+    sldi     8,7, 64-48
+#else
     sldi     0,6, 48
     srdi     8,7, 64-48
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du6_loop)
@@ -1026,23 +1285,43 @@ L(du6_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du6_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 48
+    sldi   8,7, 64-48
+#else
     sldi   0,6, 48
     srdi   8,7, 64-48
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 48
+    sldi   8,6, 64-48
+#else
     sldi   0,7, 48
     srdi   8,6, 64-48
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 48
+    sldi   8,7, 64-48
+#else
     sldi   0,6, 48
     srdi   8,7, 64-48
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 48
+    sldi   8,6, 64-48
+#else
     sldi   0,7, 48
     srdi   8,6, 64-48
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -1052,8 +1331,13 @@ L(du6_loop):
     .align 4
 L(du6_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 48
+    sldi   8,7, 64-48
+#else
     sldi   0,6, 48
     srdi   8,7, 64-48
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
@@ -1063,13 +1347,23 @@ L(du7_do):
     bf      30,L(du7_1dw)
 
     /* there are at least two DWs to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 56
+    sldi     8,7, 64-56
+#else
     sldi     0,6, 56
     srdi     8,7, 64-56
+#endif
     or      0,0,8
     ld      6,16(5)
     std     0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,7, 56
+    sldi     8,6, 64-56
+#else
     sldi     0,7, 56
     srdi     8,6, 64-56
+#endif
     or      0,0,8
     ld      7,24(5)
     std     0,8(4)
@@ -1078,8 +1372,13 @@ L(du7_do):
     blt     cr6,L(du7_fini)  /* if total DWs = 3, then bypass loop */
     bf      31,L(du7_loop)
     /* there is a third DW to copy */
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 56
+    sldi     8,7, 64-56
+#else
     sldi     0,6, 56
     srdi     8,7, 64-56
+#endif
     or      0,0,8
     std     0,0(4)
     mr      6,7
@@ -1090,8 +1389,13 @@ L(du7_do):
     b       L(du7_loop)
     .align 4
 L(du7_1dw):
+#ifdef __LITTLE_ENDIAN__
+    srdi     0,6, 56
+    sldi     8,7, 64-56
+#else
     sldi     0,6, 56
     srdi     8,7, 64-56
+#endif
     addi    5,5,16
     or      0,0,8
     bf      31,L(du7_loop)
@@ -1103,23 +1407,43 @@ L(du7_1dw):
     .align 4
 /* copy 32 bytes at a time */
 L(du7_loop):
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 56
+    sldi   8,7, 64-56
+#else
     sldi   0,6, 56
     srdi   8,7, 64-56
+#endif
     or    0,0,8
     ld    6,0(5)
     std   0,0(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 56
+    sldi   8,6, 64-56
+#else
     sldi   0,7, 56
     srdi   8,6, 64-56
+#endif
     or    0,0,8
     ld    7,8(5)
     std   0,8(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 56
+    sldi   8,7, 64-56
+#else
     sldi   0,6, 56
     srdi   8,7, 64-56
+#endif
     or    0,0,8
     ld    6,16(5)
     std   0,16(4)
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,7, 56
+    sldi   8,6, 64-56
+#else
     sldi   0,7, 56
     srdi   8,6, 64-56
+#endif
     or    0,0,8
     ld    7,24(5)
     std   0,24(4)
@@ -1129,8 +1453,13 @@ L(du7_loop):
     .align 4
 L(du7_fini):
     /* calculate and store the final DW */
+#ifdef __LITTLE_ENDIAN__
+    srdi   0,6, 56
+    sldi   8,7, 64-56
+#else
     sldi   0,6, 56
     srdi   8,7, 64-56
+#endif
     or    0,0,8
     std   0,0(4)
     b     L(du_done)
diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memset.S b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
index 3e8ae2d25..d61988a19 100644
--- a/libc/sysdeps/powerpc/powerpc64/power6/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power6/memset.S
@@ -47,14 +47,14 @@ L(_memset):
 
 /* Align to doubleword boundary.  */
 	cmpldi	cr5, rLEN, 31
-	rlwimi	rCHR, rCHR, 8, 16, 23 /* Replicate byte to halfword.  */
+	insrdi	rCHR, rCHR, 8, 48	/* Replicate byte to halfword.  */
 	beq+	L(aligned2)
 	mtcrf	0x01, rMEMP0
 	subfic	rALIGN, rALIGN, 8
 	cror	28,30,31		/* Detect odd word aligned.  */
 	add	rMEMP, rMEMP, rALIGN
 	sub	rLEN, rLEN, rALIGN
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 	bt	29, L(g4)
 /* Process the even word of doubleword.  */
 	bf+	31, L(g2)
@@ -76,14 +76,14 @@ L(g0):
 
 /* Handle the case of size < 31.  */
 L(aligned2):
-	rlwimi	rCHR, rCHR, 16, 0, 15 /* Replicate halfword to word.  */
+	insrdi	rCHR, rCHR, 16, 32	/* Replicate halfword to word.  */
 L(aligned):
 	mtcrf	0x01, rLEN
 	ble	cr5, L(medium)
 /* Align to 32-byte boundary.  */
 	andi.	rALIGN, rMEMP, 0x18
 	subfic	rALIGN, rALIGN, 0x20
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word. */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word. */
 	beq	L(caligned)
 	mtcrf	0x01, rALIGN
 	add	rMEMP, rMEMP, rALIGN
@@ -344,7 +344,7 @@ L(le4):
 /* Memset of 0-31 bytes.  */
 	.align 5
 L(medium):
-	insrdi	rCHR,rCHR,32,0 /* Replicate word to double word.  */
+	insrdi	rCHR, rCHR, 32, 0	/* Replicate word to double word.  */
 	cmpldi	cr1, rLEN, 16
 L(medium_tail2):
 	add	rMEMP, rMEMP, rLEN
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
index d0071c765..ebec0e0ba 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_finite.S
@@ -39,10 +39,8 @@ EALIGN (__finite, 4, 0)
 
 	stfd    fp1,-16(r1)   /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-
-	lhz     r4,-16(r1)    /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz     r4,-16+HISHORT(r1)  /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	clrlwi  r4,r4,17      /* r4 = abs(r4).  */
 	cmpwi   cr7,r4,0x7ff0 /* r4 == 0x7ff0?  */
 	bltlr   cr7	      /* LT means finite, other non-finite.  */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
index 1aea12383..8d088db5a 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/fpu/s_isinf.S
@@ -38,9 +38,8 @@ EALIGN (__isinf, 4, 0)
 
 	stfd    fp1,-16(r1)   /* Transfer FP to GPR's.  */
 	ori	2,2,0	      /* Force a new dispatch group.  */
-	lhz	r4,-16(r1)    /* Fetch the upper portion of the high word of
-			      the FP value (where the exponent and sign bits
-			      are).  */
+	lhz	r4,-16+HISHORT(r1)  /* Fetch the upper 16 bits of the FP value
+				    (biased exponent and sign bit).  */
 	cmpwi	cr7,r4,0x7ff0 /* r4 == 0x7ff0?  */
 	li	r3,1
 	beqlr   cr7	      /* EQ means INF, otherwise -INF.  */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
index 3416897f5..5076dd0c1 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S
@@ -25,109 +25,112 @@ ENTRY (__memchr)
 	CALL_MCOUNT 2
 	dcbt	0,r3
 	clrrdi  r8,r3,3
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
 	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	insrdi	r4,r4,16,32
 	cmpldi	r5,32
+	li	r9, -1
+	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
 	insrdi  r4,r4,32,0
+	addi	r7,r7,-1
+#ifdef __LITTLE_ENDIAN__
+	sld	r9,r9,r6
+#else
+	srd	r9,r9,r6
+#endif
 	ble	L(small_range)
 
-	cmpld	cr7,r3,r7     /* Compare the starting address (r3) with the
-				 ending address (r7).  If (r3 >= r7),
-				 the size passed in was zero or negative.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1         /* Artificially set our ending address (r7)
-				 such that we will exit early.  */
-
-L(proceed):
-	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-	cmpldi	cr6,r6,0      /* cr6 == Do we have padding?  */
 	ld	r12,0(r8)     /* Load doubleword from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTEs in DWORD1.  */
-	beq	cr6,L(proceed_no_padding)
-	sld	r10,r10,r6
-	srd	r10,r10,r6
-L(proceed_no_padding):
-	cmpldi	cr7,r10,0     /* Does r10 indicate we got a hit?  */
+	cmpb	r3,r12,r4     /* Check for BYTEs in DWORD1.  */
+	and	r3,r3,r9
+	clrldi	r5,r7,61      /* Byte count - 1 in last dword.  */
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
+	cmpldi	cr7,r3,0      /* Does r3 indicate we got a hit?  */
 	bne	cr7,L(done)
 
-	/* See if we are at the last acceptable address yet.  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(null)
-
 	mtcrf   0x01,r8
 	/* Are we now aligned to a quadword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-
 	bt	28,L(loop_setup)
 
 	/* Handle DWORD2 of pair.  */
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	cmpldi	cr7,r10,0
+	cmpb	r3,r12,r4
+	cmpldi	cr7,r3,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(null)
-
 L(loop_setup):
-	sub	r5,r7,r9
-	srdi	r6,r5,4	      /* Number of loop iterations.  */
+	/* The last dword we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the dword at
+	   (s + size - 1) & ~7, or r7.  The first dword read is at
+	   r8 + 8, we read 2 * cnt dwords, so the last dword read will
+	   be at r8 + 8 + 16 * cnt - 8.  Solving for cnt gives
+	   cnt = (r7 - r8) / 16  */
+	sub	r6,r7,r8
+	srdi	r6,r6,4	      /* Number of loop iterations.  */
 	mtctr	r6            /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since
-	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE in the string.  Since
+	   it's a small loop (8 instructions), align it to 32-bytes.  */
+	.align	5
 L(loop):
 	/* Load two doublewords, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 	ld	r12,8(r8)
 	ldu	r11,16(r8)
-	cmpb	r10,r12,r4
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one doubleword.  */
-	cmpldi	cr7,r5,0
+	or	r6,r9,r3      /* Merge everything in one doubleword.  */
+	cmpldi	cr7,r6,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
 
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  */
-	subi	r11,r7,8
-	cmpld	cr6,r8,r11
-	blt	cr6,L(loop_small)
-	b	L(null)
+	/* We may have one more dword to read.  */
+	cmpld	r8,r7
+	beqlr
 
+	ldu	r12,8(r8)
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	bne	cr6,L(done)
+	blr
+
+	.align	4
+L(found):
 	/* OK, one (or both) of the doublewords contains BYTE.  Check
 	   the first doubleword and decrement the address in case the first
 	   doubleword really contains BYTE.  */
-	.align	4
-L(found):
-	cmpldi	cr6,r10,0
+	cmpldi	cr6,r3,0
 	addi	r8,r8,-8
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second doubleword.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,8
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
+	/* r3 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as BYTE in the original
 	   doubleword from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the range.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r3,-1
+	andc    r0,r0,r3
+	popcntd	r0,r0	      /* Count trailing zeros.  */
+#else
+	cntlzd	r0,r3	      /* Count leading zeros before the match.  */
+#endif
+	cmpld	r8,r7         /* Are we on the last dword?  */
+	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
 	add	r3,r8,r0
-	cmpld	r3,r7
-	bge	L(null)
+	cmpld	cr7,r0,r5     /* If on the last dword, check byte offset.  */
+	bnelr
+	blelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -139,67 +142,44 @@ L(null):
 	.align	4
 L(small_range):
 	cmpldi	r5,0
-	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-	beq	L(null)       /* This branch is for the cmpldi r5,0 above.  */
+	beq	L(null)
 	ld	r12,0(r8)     /* Load word from memory.  */
-	cmpldi	cr6,r6,0      /* cr6 == Do we have padding?  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-			      /* If no padding, skip the shifts.  */
-	beq	cr6,L(small_no_padding)
-	sld	r10,r10,r6
-	srd	r10,r10,r6
-L(small_no_padding):
-	cmpldi	cr7,r10,0
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmpldi	cr7,r3,0
+	clrldi	r5,r7,61      /* Byte count - 1 in last dword.  */
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
+	cmpld	r8,r7         /* Are we done already?  */
 	bne	cr7,L(done)
+	beqlr
 
-	/* Are we done already?  */
-	addi    r9,r8,8
-	cmpld	r9,r7
-	bge	L(null)
-	/* If we're not done, drop through into loop_small.  */
-
-L(loop_small):                /* loop_small has been unrolled.  */
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,8
-	cmpldi	cr6,r10,0
-	cmpld	r9,r7
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	cmpld	r8,r7
 	bne	cr6,L(done)   /* Found something.  */
-	bge	L(null)       /* Hit end of string (length).  */
+	beqlr		      /* Hit end of string (length).  */
 
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,8
-	cmpldi	cr6,r10,0
-	cmpld	r9,r7
-	bne	cr6,L(done)   /* Found something.  */
-	bge	L(null)
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	cmpld	r8,r7
+	bne	cr6,L(done)
+	beqlr
 
 	ldu	r12,8(r8)
-	subi	r11,r7,8
-	cmpb	r10,r12,r4
-	cmpldi	cr6,r10,0
-	ori	r2,r2,0       /* Force a dispatch group.  */
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	cmpld	r8,r7
 	bne	cr6,L(done)
+	beqlr
 
-	cmpld	r8,r11        /* At end of range?  */
-	bge	L(null)
-
-	/* For most cases we will never get here.  Under some combinations of
-	   padding + length there is a leftover double that still needs to be
-	   checked.  */
 	ldu	r12,8(r8)
-	cmpb	r10,r12,r4
-	addi	r9,r8,8
-	cmpldi	cr6,r10,0
-	cmpld	r9,r7
-	bne	cr6,L(done)   /* Found something.  */
-
-	/* Save a branch and exit directly.  */
-	li	r3,0
+	cmpb	r3,r12,r4
+	cmpldi	cr6,r3,0
+	bne	cr6,L(done)
 	blr
 
-
 END (__memchr)
 weak_alias (__memchr, memchr)
 libc_hidden_builtin_def (memchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
index f190c6461..6851cdc75 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S
@@ -23,10 +23,9 @@
 		    size_t size [r5])  */
 
 	.machine power7
-EALIGN (memcmp,4,0)
+EALIGN (memcmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -37,354 +36,557 @@ EALIGN (memcmp,4,0)
 #define rWORD4	r9	/* next word in s2 */
 #define rWORD5	r10	/* next word in s1 */
 #define rWORD6	r11	/* next word in s2 */
-#define rBITDIF	r12	/* bits that differ in s1 & s2 words */
 #define rWORD7	r30	/* next word in s1 */
 #define rWORD8	r31	/* next word in s2 */
 
-	xor	rTMP,rSTR2,rSTR1
-	cmpldi	cr6,rN,0
-	cmpldi	cr1,rN,12
-	clrldi.	rTMP,rTMP,61
-	clrldi	rBITDIF,rSTR1,61
-	cmpldi	cr5,rBITDIF,0
-	beq-	cr6,L(zeroLength)
-	dcbt	0,rSTR1
-	dcbt	0,rSTR2
+	xor	r0, rSTR2, rSTR1
+	cmpldi	cr6, rN, 0
+	cmpldi	cr1, rN, 12
+	clrldi.	r0, r0, 61
+	clrldi	r12, rSTR1, 61
+	cmpldi	cr5, r12, 0
+	beq-	cr6, L(zeroLength)
+	dcbt	0, rSTR1
+	dcbt	0, rSTR2
 /* If less than 8 bytes or not aligned, use the unaligned
    byte loop.  */
-	blt	cr1,L(bytealigned)
-	std	rWORD8,-8(r1)
-	cfi_offset(rWORD8,-8)
-	std	rWORD7,-16(r1)
-	cfi_offset(rWORD7,-16)
+	blt	cr1, L(bytealigned)
+	std	rWORD8, -8(r1)
+	cfi_offset(rWORD8, -8)
+	std	rWORD7, -16(r1)
+	cfi_offset(rWORD7, -16)
 	bne	L(unaligned)
 /* At this point we know both strings have the same alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then we are already double word
-   aligned and can perform the DWaligned loop.
+   of r12 to 0.  If r12 == 0 then we are already double word
+   aligned and can perform the DW aligned loop.
 
    Otherwise we know the two strings have the same alignment (but not
-   yet DW).  So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   yet DW).  So we force the string addresses to the next lower DW
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
-   normal (DWaligned) compare loop, starting at the second double word,
+   normal (DW aligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
-   correct and the first DW (shifted) is in the expected resister pair.  */
+   versioning for the first DW. This ensures that the loop count is
+   correct and the first DW (shifted) is in the expected register pair.  */
 	.align	4
 L(samealignment):
-	clrrdi	rSTR1,rSTR1,3
-	clrrdi	rSTR2,rSTR2,3
-	beq	cr5,L(DWaligned)
-	add	rN,rN,rBITDIF
-	sldi	r11,rBITDIF,3
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
-	ld	rWORD1,0(rSTR1)
-	ld	rWORD2,0(rSTR2)
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	clrldi	rN,rN,61
+	clrrdi	rSTR1, rSTR1, 3
+	clrrdi	rSTR2, rSTR2, 3
+	beq	cr5, L(DWaligned)
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+#endif
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
 	beq	L(dPs4)
-	mtctr	rTMP
-	bgt	cr1,L(dPs3)
-	beq	cr1,L(dPs2)
+	mtctr	r0
+	bgt	cr1, L(dPs3)
+	beq	cr1, L(dPs2)
 
 /* Remainder is 8 */
 	.align	3
 L(dsP1):
-	sld	rWORD5,rWORD1,r11
-	sld	rWORD6,rWORD2,r11
-	cmpld	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
+	cmpld	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP1e)
 /* Remainder is 16 */
 	.align	4
 L(dPs2):
-	sld	rWORD5,rWORD1,r11
-	sld	rWORD6,rWORD2,r11
-	cmpld	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD2, rWORD6
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
 /* Do something useful in this cycle since we have to branch anyway.  */
-	ld	rWORD7,8(rSTR1)
-	ld	rWORD8,8(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
 	b	L(dP2e)
 /* Remainder is 24 */
 	.align	4
 L(dPs3):
-	sld	rWORD3,rWORD1,r11
-	sld	rWORD4,rWORD2,r11
-	cmpld	cr1,rWORD3,rWORD4
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD2, rWORD6
+	cmpld	cr1, rWORD3, rWORD4
 	b	L(dP3e)
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(dPs4):
-	mtctr	rTMP
-	sld	rWORD1,rWORD1,r11
-	sld	rWORD2,rWORD2,r11
-	cmpld	cr0,rWORD1,rWORD2
+	mtctr	r0
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD2, rWORD6
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(dP4e)
 
 /* At this point we know both strings are double word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(DWaligned):
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	clrldi	rN,rN,61
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	srdi	r0, rN, 5	/* Divide by 32 */
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
 	beq	L(dP4)
-	bgt	cr1,L(dP3)
-	beq	cr1,L(dP2)
+	bgt	cr1, L(dP3)
+	beq	cr1, L(dP2)
 
 /* Remainder is 8 */
 	.align	4
 L(dP1):
-	mtctr	rTMP
+	mtctr	r0
 /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early
    (8-15 byte compare), we want to use only volatile registers.  This
    means we can avoid restoring non-volatile registers since we did not
    change any on the early exit path.  The key here is the non-early
    exit path only cares about the condition code (cr5), not about which
    register pair was used.  */
-	ld	rWORD5,0(rSTR1)
-	ld	rWORD6,0(rSTR2)
-	cmpld	cr5,rWORD5,rWORD6
-	blt	cr7,L(dP1x)
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 0(rSTR1)
+	ld	rWORD6, 0(rSTR2)
+#endif
+	cmpld	cr5, rWORD5, rWORD6
+	blt	cr7, L(dP1x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP1e):
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	bne	cr0,L(dLcr0)
-
-	ldu	rWORD7,32(rSTR1)
-	ldu	rWORD8,32(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmpld	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5x)
+	bne	cr7, L(dLcr7x)
+
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmpld	cr5, rWORD7, rWORD8
 	bdnz	L(dLoop)
-	bne	cr6,L(dLcr6)
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	bne	cr6, L(dLcr6)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 	.align	3
 L(dP1x):
-	sldi.	r12,rN,3
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
+	sldi.	r12, rN, 3
+	bne	cr5, L(dLcr5x)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 16 */
 	.align	4
 L(dP2):
-	mtctr	rTMP
-	ld	rWORD5,0(rSTR1)
-	ld	rWORD6,0(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP2x)
-	ld	rWORD7,8(rSTR1)
-	ld	rWORD8,8(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 0(rSTR1)
+	ld	rWORD6, 0(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
 L(dP2e):
-	ld	rWORD1,16(rSTR1)
-	ld	rWORD2,16(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	ld	rWORD3,24(rSTR1)
-	ld	rWORD4,24(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr6,L(dLcr6)
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 24(rSTR1)
+	ld	rWORD4, 24(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr6, L(dLcr6)
+	bne	cr5, L(dLcr5)
 	b	L(dLoop2)
 /* Again we are on a early exit path (16-23 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP2x):
-	ld	rWORD3,8(rSTR1)
-	ld	rWORD4,8(rSTR2)
-	cmpld	cr5,rWORD3,rWORD4
-	sldi.	r12,rN,3
-	bne	cr6,L(dLcr6)
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr5,L(dLcr5)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	sldi.	r12, rN, 3
+	bne	cr6, L(dLcr6x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr1, L(dLcr1x)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Remainder is 24 */
 	.align	4
 L(dP3):
-	mtctr	rTMP
-	ld	rWORD3,0(rSTR1)
-	ld	rWORD4,0(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 0(rSTR1)
+	ld	rWORD4, 0(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
 L(dP3e):
-	ld	rWORD5,8(rSTR1)
-	ld	rWORD6,8(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	blt	cr7,L(dP3x)
-	ld	rWORD7,16(rSTR1)
-	ld	rWORD8,16(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	ld	rWORD1,24(rSTR1)
-	ld	rWORD2,24(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	bne	cr1,L(dLcr1)
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 8(rSTR1)
+	ld	rWORD6, 8(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	blt	cr7, L(dP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 16(rSTR1)
+	ld	rWORD8, 16(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 24(rSTR1)
+	ld	rWORD2, 24(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+	bne	cr1, L(dLcr1)
+	bne	cr6, L(dLcr6)
 	b	L(dLoop1)
 /* Again we are on a early exit path (24-31 byte compare), we want to
    only use volatile registers and avoid restoring non-volatile
    registers.  */
 	.align	4
 L(dP3x):
-	ld	rWORD1,16(rSTR1)
-	ld	rWORD2,16(rSTR2)
-	cmpld	cr5,rWORD1,rWORD2
-	sldi.	r12,rN,3
-	bne	cr1,L(dLcr1)
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	bne	cr6,L(dLcr6)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	sldi.	r12, rN, 3
+	bne	cr1, L(dLcr1x)
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+	bne	cr6, L(dLcr6x)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
+	bne	cr7, L(dLcr7x)
 	bne	L(d00)
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(dP4):
-	mtctr	rTMP
-	ld	rWORD1,0(rSTR1)
-	ld	rWORD2,0(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
+	mtctr	r0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 L(dP4e):
-	ld	rWORD3,8(rSTR1)
-	ld	rWORD4,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	ld	rWORD5,16(rSTR1)
-	ld	rWORD6,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	ldu	rWORD7,24(rSTR1)
-	ldu	rWORD8,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
-	bne	cr1,L(dLcr1)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 16(rSTR1)
+	ld	rWORD6, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 24(rSTR1)
+	ldu	rWORD8, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
+	bne	cr1, L(dLcr1)
 	bdz-	L(d24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(dLoop):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
 L(dLoop1):
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
 L(dLoop2):
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr0,L(dLcr0)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr7, L(dLcr7)
 L(dLoop3):
-	ldu	rWORD7,32(rSTR1)
-	ldu	rWORD8,32(rSTR2)
-	bne	cr1,L(dLcr1)
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+#endif
+	bne	cr1, L(dLcr1)
+	cmpld	cr7, rWORD1, rWORD2
 	bdnz	L(dLoop)
 
 L(dL4):
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(dLcr6)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(dLcr5)
-	cmpld	cr5,rWORD7,rWORD8
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(dLcr6)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(dLcr5)
+	cmpld	cr5, rWORD7, rWORD8
 L(d44):
-	bne	cr0,L(dLcr0)
+	bne	cr7, L(dLcr7)
 L(d34):
-	bne	cr1,L(dLcr1)
+	bne	cr1, L(dLcr1)
 L(d24):
-	bne	cr6,L(dLcr6)
+	bne	cr6, L(dLcr6)
 L(d14):
-	sldi.	r12,rN,3
-	bne	cr5,L(dLcr5)
+	sldi.	r12, rN, 3
+	bne	cr5, L(dLcr5)
 L(d04):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	subfic	rN,r12,64	/* Shift count is 64 - (rN * 8).  */
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	subfic	rN, r12, 64	/* Shift count is 64 - (rN * 8).  */
 	beq	L(zeroLength)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  Since
    we are aligned it is safe to load the whole double word, and use
    shift right double to eliminate bits beyond the compare length.  */
 L(d00):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	srd	rWORD1,rWORD1,rN
-	srd	rWORD2,rWORD2,rN
-	cmpld	cr5,rWORD1,rWORD2
-	bne	cr5,L(dLcr5x)
-	li	rRTN,0
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	rWORD1, rWORD1, rN
+	srd	rWORD2, rWORD2, rN
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr7, L(dLcr7x)
+	li	rRTN, 0
 	blr
+
 	.align	4
-L(dLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgtlr	cr0
-	li	rRTN,-1
+L(dLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr7x):
+	li	rRTN, 1
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 	.align	4
 L(dLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr1x):
+	li	rRTN, 1
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
 L(dLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+L(dLcr6x):
+	li	rRTN, 1
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 	.align	4
 L(dLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dLcr5x):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr5
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 	.align	4
 L(bytealigned):
 	mtctr	rN
-	beq	cr6,L(zeroLength)
+#if 0
+/* Huh?  We've already branched on cr6!  */
+	beq	cr6, L(zeroLength)
+#endif
 
 /* We need to prime this loop.  This loop is swing modulo scheduled
    to avoid pipe delays.  The dependent instruction latencies (load to
@@ -396,38 +598,38 @@ L(bytealigned):
    So we must precondition some registers and condition codes so that
    we don't exit the loop early on the first iteration.  */
 
-	lbz	rWORD1,0(rSTR1)
-	lbz	rWORD2,0(rSTR2)
+	lbz	rWORD1, 0(rSTR1)
+	lbz	rWORD2, 0(rSTR2)
 	bdz	L(b11)
-	cmpld	cr0,rWORD1,rWORD2
-	lbz	rWORD3,1(rSTR1)
-	lbz	rWORD4,1(rSTR2)
+	cmpld	cr7, rWORD1, rWORD2
+	lbz	rWORD3, 1(rSTR1)
+	lbz	rWORD4, 1(rSTR2)
 	bdz	L(b12)
-	cmpld	cr1,rWORD3,rWORD4
-	lbzu	rWORD5,2(rSTR1)
-	lbzu	rWORD6,2(rSTR2)
+	cmpld	cr1, rWORD3, rWORD4
+	lbzu	rWORD5, 2(rSTR1)
+	lbzu	rWORD6, 2(rSTR2)
 	bdz	L(b13)
 	.align	4
 L(bLoop):
-	lbzu	rWORD1,1(rSTR1)
-	lbzu	rWORD2,1(rSTR2)
-	bne	cr0,L(bLcr0)
+	lbzu	rWORD1, 1(rSTR1)
+	lbzu	rWORD2, 1(rSTR2)
+	bne	cr7, L(bLcr7)
 
-	cmpld	cr6,rWORD5,rWORD6
+	cmpld	cr6, rWORD5, rWORD6
 	bdz	L(b3i)
 
-	lbzu	rWORD3,1(rSTR1)
-	lbzu	rWORD4,1(rSTR2)
-	bne	cr1,L(bLcr1)
+	lbzu	rWORD3, 1(rSTR1)
+	lbzu	rWORD4, 1(rSTR2)
+	bne	cr1, L(bLcr1)
 
-	cmpld	cr0,rWORD1,rWORD2
+	cmpld	cr7, rWORD1, rWORD2
 	bdz	L(b2i)
 
-	lbzu	rWORD5,1(rSTR1)
-	lbzu	rWORD6,1(rSTR2)
-	bne	cr6,L(bLcr6)
+	lbzu	rWORD5, 1(rSTR1)
+	lbzu	rWORD6, 1(rSTR2)
+	bne	cr6, L(bLcr6)
 
-	cmpld	cr1,rWORD3,rWORD4
+	cmpld	cr1, rWORD3, rWORD4
 	bdnz	L(bLoop)
 
 /* We speculatively loading bytes before we have tested the previous
@@ -437,542 +639,727 @@ L(bLoop):
    tested.  In this case we must complete the pending operations
    before returning.  */
 L(b1i):
-	bne	cr0,L(bLcr0)
-	bne	cr1,L(bLcr1)
+	bne	cr7, L(bLcr7)
+	bne	cr1, L(bLcr1)
 	b	L(bx56)
 	.align	4
 L(b2i):
-	bne	cr6,L(bLcr6)
-	bne	cr0,L(bLcr0)
+	bne	cr6, L(bLcr6)
+	bne	cr7, L(bLcr7)
 	b	L(bx34)
 	.align	4
 L(b3i):
-	bne	cr1,L(bLcr1)
-	bne	cr6,L(bLcr6)
+	bne	cr1, L(bLcr1)
+	bne	cr6, L(bLcr6)
 	b	L(bx12)
 	.align	4
-L(bLcr0):
-	li	rRTN,1
-	bgtlr	cr0
-	li	rRTN,-1
+L(bLcr7):
+	li	rRTN, 1
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 L(bLcr1):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr1
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 L(bLcr6):
-	li	rRTN,1
+	li	rRTN, 1
 	bgtlr	cr6
-	li	rRTN,-1
+	li	rRTN, -1
 	blr
 
 L(b13):
-	bne	cr0,L(bx12)
-	bne	cr1,L(bx34)
+	bne	cr7, L(bx12)
+	bne	cr1, L(bx34)
 L(bx56):
-	sub	rRTN,rWORD5,rWORD6
+	sub	rRTN, rWORD5, rWORD6
 	blr
 	nop
 L(b12):
-	bne	cr0,L(bx12)
+	bne	cr7, L(bx12)
 L(bx34):
-	sub	rRTN,rWORD3,rWORD4
+	sub	rRTN, rWORD3, rWORD4
 	blr
 L(b11):
 L(bx12):
-	sub	rRTN,rWORD1,rWORD2
+	sub	rRTN, rWORD1, rWORD2
 	blr
 	.align	4
-L(zeroLengthReturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
 L(zeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 	.align	4
 /* At this point we know the strings have different alignment and the
-   compare length is at least 8 bytes.  rBITDIF contains the low order
+   compare length is at least 8 bytes.  r12 contains the low order
    3 bits of rSTR1 and cr5 contains the result of the logical compare
-   of rBITDIF to 0.  If rBITDIF == 0 then rStr1 is double word
+   of r12 to 0.  If r12 == 0 then rStr1 is double word
    aligned and can perform the DWunaligned loop.
 
    Otherwise we know that rSTR1 is not already DW aligned yet.
    So we can force the string addresses to the next lower DW
-   boundary and special case this first DW word using shift left to
+   boundary and special case this first DW using shift left to
    eliminate bits preceding the first byte.  Since we want to join the
    normal (DWaligned) compare loop, starting at the second double word,
    we need to adjust the length (rN) and special case the loop
-   versioning for the first DW. This insures that the loop count is
+   versioning for the first DW. This ensures that the loop count is
    correct and the first DW (shifted) is in the expected resister pair.  */
-#define rSHL	r29	/* Unaligned shift left count.  */
-#define rSHR	r28	/* Unaligned shift right count.  */
-#define rB		r27	/* Left rotation temp for rWORD2.  */
-#define rD		r26	/* Left rotation temp for rWORD4.  */
-#define rF		r25	/* Left rotation temp for rWORD6.  */
-#define rH		r24	/* Left rotation temp for rWORD8.  */
-#define rA		r0	/* Right rotation temp for rWORD2.  */
-#define rC		r12	/* Right rotation temp for rWORD4.  */
-#define rE		r0	/* Right rotation temp for rWORD6.  */
-#define rG		r12	/* Right rotation temp for rWORD8.  */
+#define rSHL		r29	/* Unaligned shift left count.  */
+#define rSHR		r28	/* Unaligned shift right count.  */
+#define rWORD8_SHIFT	r27	/* Left rotation temp for rWORD2.  */
+#define rWORD2_SHIFT	r26	/* Left rotation temp for rWORD4.  */
+#define rWORD4_SHIFT	r25	/* Left rotation temp for rWORD6.  */
+#define rWORD6_SHIFT	r24	/* Left rotation temp for rWORD8.  */
 L(unaligned):
-	std	r29,-24(r1)
-	cfi_offset(r29,-24)
-	clrldi	rSHL,rSTR2,61
-	beq	cr6,L(duzeroLength)
-	std	r28,-32(r1)
-	cfi_offset(r28,-32)
-	beq	cr5,L(DWunaligned)
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
-/* Adjust the logical start of rSTR2 ro compensate for the extra bits
+	std	rSHL, -24(r1)
+	cfi_offset(rSHL, -24)
+	clrldi	rSHL, rSTR2, 61
+	beq	cr6, L(duzeroLength)
+	std	rSHR, -32(r1)
+	cfi_offset(rSHR, -32)
+	beq	cr5, L(DWunaligned)
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
+/* Adjust the logical start of rSTR2 to compensate for the extra bits
    in the 1st rSTR1 DW.  */
-	sub	r27,rSTR2,rBITDIF
+	sub	rWORD8_SHIFT, rSTR2, r12
 /* But do not attempt to address the DW before that DW that contains
    the actual start of rSTR2.  */
-	clrrdi	rSTR2,rSTR2,3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
+	clrrdi	rSTR2, rSTR2, 3
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
 /* Compute the left/right shift counts for the unaligned rSTR2,
    compensating for the logical (DW aligned) start of rSTR1.  */
-	clrldi	rSHL,r27,61
-	clrrdi	rSTR1,rSTR1,3
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
-	sldi	rSHL,rSHL,3
-	cmpld	cr5,r27,rSTR2
-	add	rN,rN,rBITDIF
-	sldi	r11,rBITDIF,3
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
-	subfic	rSHR,rSHL,64
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
+	clrldi	rSHL, rWORD8_SHIFT, 61
+	clrrdi	rSTR1, rSTR1, 3
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
+	sldi	rSHL, rSHL, 3
+	cmpld	cr5, rWORD8_SHIFT, rSTR2
+	add	rN, rN, r12
+	sldi	rWORD6, r12, 3
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
+	subfic	rSHR, rSHL, 64
+	srdi	r0, rN, 5	/* Divide by 32 */
+	andi.	r12, rN, 24	/* Get the DW remainder */
 /* We normally need to load 2 DWs to start the unaligned rSTR2, but in
    this special case those bits may be discarded anyway.  Also we
    must avoid loading a DW where none of the bits are part of rSTR2 as
    this may cross a page boundary and cause a page fault.  */
-	li	rWORD8,0
-	blt	cr5,L(dus0)
-	ld	rWORD8,0(rSTR2)
-	la	rSTR2,8(rSTR2)
-	sld	rWORD8,rWORD8,rSHL
+	li	rWORD8, 0
+	blt	cr5, L(dus0)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD8, 0(rSTR2)
+	addi	rSTR2, rSTR2, 8
+#endif
+	sld	rWORD8, rWORD8, rSHL
 
 L(dus0):
-	ld	rWORD1,0(rSTR1)
-	ld	rWORD2,0(rSTR2)
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	srd	rG,rWORD2,rSHR
-	clrldi	rN,rN,61
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+	ld	rWORD2, 0(rSTR2)
+#endif
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	srd	r12, rWORD2, rSHR
+	clrldi	rN, rN, 61
 	beq	L(duPs4)
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	bgt	cr1,L(duPs3)
-	beq	cr1,L(duPs2)
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	bgt	cr1, L(duPs3)
+	beq	cr1, L(duPs2)
 
 /* Remainder is 8 */
 	.align	4
 L(dusP1):
-	sld	rB,rWORD2,rSHL
-	sld	rWORD7,rWORD1,r11
-	sld	rWORD8,rWORD8,r11
-	bge	cr7,L(duP1e)
+	sld	rWORD8_SHIFT, rWORD2, rSHL
+	sld	rWORD7, rWORD1, rWORD6
+	sld	rWORD8, rWORD8, rWORD6
+	bge	cr7, L(duP1e)
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
-	cmpld	cr5,rWORD7,rWORD8
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+	cmpld	cr5, rWORD7, rWORD8
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
 	.align	4
 L(duPs2):
-	sld	rH,rWORD2,rSHL
-	sld	rWORD5,rWORD1,r11
-	sld	rWORD6,rWORD8,r11
+	sld	rWORD6_SHIFT, rWORD2, rSHL
+	sld	rWORD5, rWORD1, rWORD6
+	sld	rWORD6, rWORD8, rWORD6
 	b	L(duP2e)
 /* Remainder is 24 */
 	.align	4
 L(duPs3):
-	sld	rF,rWORD2,rSHL
-	sld	rWORD3,rWORD1,r11
-	sld	rWORD4,rWORD8,r11
+	sld	rWORD4_SHIFT, rWORD2, rSHL
+	sld	rWORD3, rWORD1, rWORD6
+	sld	rWORD4, rWORD8, rWORD6
 	b	L(duP3e)
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(duPs4):
-	mtctr	rTMP
-	or	rWORD8,rG,rWORD8
-	sld	rD,rWORD2,rSHL
-	sld	rWORD1,rWORD1,r11
-	sld	rWORD2,rWORD8,r11
+	mtctr	r0
+	or	rWORD8, r12, rWORD8
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	sld	rWORD1, rWORD1, rWORD6
+	sld	rWORD2, rWORD8, rWORD6
 	b	L(duP4e)
 
 /* At this point we know rSTR1 is double word aligned and the
    compare length is at least 8 bytes.  */
 	.align	4
 L(DWunaligned):
-	std	r27,-40(r1)
-	cfi_offset(r27,-40)
-	clrrdi	rSTR2,rSTR2,3
-	std	r26,-48(r1)
-	cfi_offset(r26,-48)
-	srdi	rTMP,rN,5	/* Divide by 32 */
-	std	r25,-56(r1)
-	cfi_offset(r25,-56)
-	andi.	rBITDIF,rN,24	/* Get the DW remainder */
-	std	r24,-64(r1)
-	cfi_offset(r24,-64)
-	sldi	rSHL,rSHL,3
-	ld	rWORD6,0(rSTR2)
-	ldu	rWORD8,8(rSTR2)
-	cmpldi	cr1,rBITDIF,16
-	cmpldi	cr7,rN,32
-	clrldi	rN,rN,61
-	subfic	rSHR,rSHL,64
-	sld	rH,rWORD6,rSHL
+	std	rWORD8_SHIFT, -40(r1)
+	cfi_offset(rWORD8_SHIFT, -40)
+	clrrdi	rSTR2, rSTR2, 3
+	std	rWORD2_SHIFT, -48(r1)
+	cfi_offset(rWORD2_SHIFT, -48)
+	srdi	r0, rN, 5	/* Divide by 32 */
+	std	rWORD4_SHIFT, -56(r1)
+	cfi_offset(rWORD4_SHIFT, -56)
+	andi.	r12, rN, 24	/* Get the DW remainder */
+	std	rWORD6_SHIFT, -64(r1)
+	cfi_offset(rWORD6_SHIFT, -64)
+	sldi	rSHL, rSHL, 3
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD6, 0(rSTR2)
+	ldu	rWORD8, 8(rSTR2)
+#endif
+	cmpldi	cr1, r12, 16
+	cmpldi	cr7, rN, 32
+	clrldi	rN, rN, 61
+	subfic	rSHR, rSHL, 64
+	sld	rWORD6_SHIFT, rWORD6, rSHL
 	beq	L(duP4)
-	mtctr	rTMP
-	bgt	cr1,L(duP3)
-	beq	cr1,L(duP2)
+	mtctr	r0
+	bgt	cr1, L(duP3)
+	beq	cr1, L(duP2)
 
 /* Remainder is 8 */
 	.align	4
 L(duP1):
-	srd	rG,rWORD8,rSHR
-	ld	rWORD7,0(rSTR1)
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP1x)
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD7, 0(rSTR1)
+#endif
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP1x)
 L(duP1e):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	bne	cr5,L(duLcr5)
-	or	rWORD4,rC,rD
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	bne	cr0,L(duLcr0)
-	or	rWORD6,rE,rF
-	cmpld	cr6,rWORD5,rWORD6
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	bne	cr5, L(duLcr5)
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	bne	cr7, L(duLcr7)
+	or	rWORD6, r0, rWORD4_SHIFT
+	cmpld	cr6, rWORD5, rWORD6
 	b	L(duLoop3)
 	.align	4
 /* At this point we exit early with the first double word compare
    complete and remainder of 0 to 7 bytes.  See L(du14) for details on
    how we handle the remaining bytes.  */
 L(duP1x):
-	cmpld	cr5,rWORD7,rWORD8
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+	cmpld	cr5, rWORD7, rWORD8
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 /* Remainder is 16 */
 	.align	4
 L(duP2):
-	srd	rE,rWORD8,rSHR
-	ld	rWORD5,0(rSTR1)
-	or	rWORD6,rE,rH
-	sld	rH,rWORD8,rSHL
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD5, 0(rSTR1)
+#endif
+	or	rWORD6, r0, rWORD6_SHIFT
+	sld	rWORD6_SHIFT, rWORD8, rSHL
 L(duP2e):
-	ld	rWORD7,8(rSTR1)
-	ld	rWORD8,8(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP2x)
-	ld	rWORD1,16(rSTR1)
-	ld	rWORD2,16(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	ld	rWORD3,24(rSTR1)
-	ld	rWORD4,24(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	bne	cr5,L(duLcr5)
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	cmpld	cr1,rWORD3,rWORD4
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 8(rSTR1)
+	ld	rWORD8, 8(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP2x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 16(rSTR1)
+	ld	rWORD2, 16(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 24(rSTR1)
+	ld	rWORD4, 24(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr5, L(duLcr5)
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	cmpld	cr1, rWORD3, rWORD4
 	b	L(duLoop2)
 	.align	4
 L(duP2x):
-	cmpld	cr5,rWORD7,rWORD8
-	addi	rSTR1,rSTR1,8
-	addi	rSTR2,rSTR2,8
-	bne	cr6,L(duLcr6)
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+	cmpld	cr5, rWORD7, rWORD8
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#endif
+	bne	cr6, L(duLcr6)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Remainder is 24 */
 	.align	4
 L(duP3):
-	srd	rC,rWORD8,rSHR
-	ld	rWORD3,0(rSTR1)
-	sld	rF,rWORD8,rSHL
-	or	rWORD4,rC,rH
+	srd	r12, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD3, 0(rSTR1)
+#endif
+	sld	rWORD4_SHIFT, rWORD8, rSHL
+	or	rWORD4, r12, rWORD6_SHIFT
 L(duP3e):
-	ld	rWORD5,8(rSTR1)
-	ld	rWORD6,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	ld	rWORD7,16(rSTR1)
-	ld	rWORD8,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	blt	cr7,L(duP3x)
-	ld	rWORD1,24(rSTR1)
-	ld	rWORD2,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	cmpld	cr0,rWORD1,rWORD2
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 8(rSTR1)
+	ld	rWORD6, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD7, 16(rSTR1)
+	ld	rWORD8, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	blt	cr7, L(duP3x)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 24(rSTR1)
+	ld	rWORD2, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+	cmpld	cr7, rWORD1, rWORD2
 	b	L(duLoop1)
 	.align	4
 L(duP3x):
-	addi	rSTR1,rSTR1,16
-	addi	rSTR2,rSTR2,16
-	bne	cr1,L(duLcr1)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr6,L(duLcr6)
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
-	cmpld	cr7,rN,rSHR
+#ifndef __LITTLE_ENDIAN__
+	addi	rSTR1, rSTR1, 16
+	addi	rSTR2, rSTR2, 16
+#endif
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr6, L(duLcr6)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	b	L(dutrim)
 
 /* Count is a multiple of 32, remainder is 0 */
 	.align	4
 L(duP4):
-	mtctr	rTMP
-	srd	rA,rWORD8,rSHR
-	ld	rWORD1,0(rSTR1)
-	sld	rD,rWORD8,rSHL
-	or	rWORD2,rA,rH
+	mtctr	r0
+	srd	r0, rWORD8, rSHR
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	addi	rSTR1, rSTR1, 8
+#else
+	ld	rWORD1, 0(rSTR1)
+#endif
+	sld	rWORD2_SHIFT, rWORD8, rSHL
+	or	rWORD2, r0, rWORD6_SHIFT
 L(duP4e):
-	ld	rWORD3,8(rSTR1)
-	ld	rWORD4,8(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
-	ld	rWORD5,16(rSTR1)
-	ld	rWORD6,16(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr0,L(duLcr0)
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
-	ldu	rWORD7,24(rSTR1)
-	ldu	rWORD8,24(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr1,L(duLcr1)
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
-	cmpld	cr5,rWORD7,rWORD8
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 8(rSTR1)
+	ld	rWORD4, 8(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 16(rSTR1)
+	ld	rWORD6, 16(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 24(rSTR1)
+	ldu	rWORD8, 24(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr1, L(duLcr1)
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
+	cmpld	cr5, rWORD7, rWORD8
 	bdz	L(du24)		/* Adjust CTR as we start with +4 */
 /* This is the primary loop */
 	.align	4
 L(duLoop):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD2,8(rSTR2)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	srd	rA,rWORD2,rSHR
-	sld	rD,rWORD2,rSHL
-	or	rWORD2,rA,rB
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD1, 8(rSTR1)
+	ld	rWORD2, 8(rSTR2)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	srd	r0, rWORD2, rSHR
+	sld	rWORD2_SHIFT, rWORD2, rSHL
+	or	rWORD2, r0, rWORD8_SHIFT
 L(duLoop1):
-	ld	rWORD3,16(rSTR1)
-	ld	rWORD4,16(rSTR2)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	srd	rC,rWORD4,rSHR
-	sld	rF,rWORD4,rSHL
-	or	rWORD4,rC,rD
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD3, 0, rSTR1
+	ldbrx	rWORD4, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD3, 16(rSTR1)
+	ld	rWORD4, 16(rSTR2)
+#endif
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	srd	r12, rWORD4, rSHR
+	sld	rWORD4_SHIFT, rWORD4, rSHL
+	or	rWORD4, r12, rWORD2_SHIFT
 L(duLoop2):
-	ld	rWORD5,24(rSTR1)
-	ld	rWORD6,24(rSTR2)
-	cmpld	cr5,rWORD7,rWORD8
-	bne	cr0,L(duLcr0)
-	srd	rE,rWORD6,rSHR
-	sld	rH,rWORD6,rSHL
-	or	rWORD6,rE,rF
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD5, 0, rSTR1
+	ldbrx	rWORD6, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD5, 24(rSTR1)
+	ld	rWORD6, 24(rSTR2)
+#endif
+	cmpld	cr5, rWORD7, rWORD8
+	bne	cr7, L(duLcr7)
+	srd	r0, rWORD6, rSHR
+	sld	rWORD6_SHIFT, rWORD6, rSHL
+	or	rWORD6, r0, rWORD4_SHIFT
 L(duLoop3):
-	ldu	rWORD7,32(rSTR1)
-	ldu	rWORD8,32(rSTR2)
-	cmpld	cr0,rWORD1,rWORD2
-	bne-	cr1,L(duLcr1)
-	srd	rG,rWORD8,rSHR
-	sld	rB,rWORD8,rSHL
-	or	rWORD8,rG,rH
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD7, 0, rSTR1
+	ldbrx	rWORD8, 0, rSTR2
+	addi	rSTR1, rSTR1, 8
+	addi	rSTR2, rSTR2, 8
+#else
+	ldu	rWORD7, 32(rSTR1)
+	ldu	rWORD8, 32(rSTR2)
+#endif
+	cmpld	cr7, rWORD1, rWORD2
+	bne	cr1, L(duLcr1)
+	srd	r12, rWORD8, rSHR
+	sld	rWORD8_SHIFT, rWORD8, rSHL
+	or	rWORD8, r12, rWORD6_SHIFT
 	bdnz	L(duLoop)
 
 L(duL4):
-	bne	cr1,L(duLcr1)
-	cmpld	cr1,rWORD3,rWORD4
-	bne	cr6,L(duLcr6)
-	cmpld	cr6,rWORD5,rWORD6
-	bne	cr5,L(duLcr5)
-	cmpld	cr5,rWORD7,rWORD8
+#if 0
+/* Huh?  We've already branched on cr1!  */
+	bne	cr1, L(duLcr1)
+#endif
+	cmpld	cr1, rWORD3, rWORD4
+	bne	cr6, L(duLcr6)
+	cmpld	cr6, rWORD5, rWORD6
+	bne	cr5, L(duLcr5)
+	cmpld	cr5, rWORD7, rWORD8
 L(du44):
-	bne	cr0,L(duLcr0)
+	bne	cr7, L(duLcr7)
 L(du34):
-	bne	cr1,L(duLcr1)
+	bne	cr1, L(duLcr1)
 L(du24):
-	bne	cr6,L(duLcr6)
+	bne	cr6, L(duLcr6)
 L(du14):
-	sldi.	rN,rN,3
-	bne	cr5,L(duLcr5)
+	sldi.	rN, rN, 3
+	bne	cr5, L(duLcr5)
 /* At this point we have a remainder of 1 to 7 bytes to compare.  We use
    shift right double to eliminate bits beyond the compare length.
-   This allows the use of double word subtract to compute the final
-   result.
 
    However it may not be safe to load rWORD2 which may be beyond the
    string length. So we compare the bit length of the remainder to
    the right shift count (rSHR). If the bit count is less than or equal
    we do not need to load rWORD2 (all significant bits are already in
-   rB).  */
-	cmpld	cr7,rN,rSHR
+   rWORD8_SHIFT).  */
+	cmpld	cr7, rN, rSHR
 	beq	L(duZeroReturn)
-	li	rA,0
-	ble	cr7,L(dutrim)
-	ld	rWORD2,8(rSTR2)
-	srd	rA,rWORD2,rSHR
+	li	r0, 0
+	ble	cr7, L(dutrim)
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD2, 0, rSTR2
+	addi	rSTR2, rSTR2, 8
+#else
+	ld	rWORD2, 8(rSTR2)
+#endif
+	srd	r0, rWORD2, rSHR
 	.align	4
 L(dutrim):
-	ld	rWORD1,8(rSTR1)
-	ld	rWORD8,-8(r1)
-	subfic	rN,rN,64	/* Shift count is 64 - (rN * 8).  */
-	or	rWORD2,rA,rB
-	ld	rWORD7,-16(r1)
-	ld	r29,-24(r1)
-	srd	rWORD1,rWORD1,rN
-	srd	rWORD2,rWORD2,rN
-	ld	r28,-32(r1)
-	ld	r27,-40(r1)
-	li	rRTN,0
-	cmpld	cr0,rWORD1,rWORD2
-	ld	r26,-48(r1)
-	ld	r25,-56(r1)
-	beq	cr0,L(dureturn24)
-	li	rRTN,1
-	ld	r24,-64(r1)
-	bgtlr	cr0
-	li	rRTN,-1
+#ifdef __LITTLE_ENDIAN__
+	ldbrx	rWORD1, 0, rSTR1
+#else
+	ld	rWORD1, 8(rSTR1)
+#endif
+	ld	rWORD8, -8(r1)
+	subfic	rN, rN, 64	/* Shift count is 64 - (rN * 8).  */
+	or	rWORD2, r0, rWORD8_SHIFT
+	ld	rWORD7, -16(r1)
+	ld	rSHL, -24(r1)
+	srd	rWORD1, rWORD1, rN
+	srd	rWORD2, rWORD2, rN
+	ld	rSHR, -32(r1)
+	ld	rWORD8_SHIFT, -40(r1)
+	li	rRTN, 0
+	cmpld	cr7, rWORD1, rWORD2
+	ld	rWORD2_SHIFT, -48(r1)
+	ld	rWORD4_SHIFT, -56(r1)
+	beq	cr7, L(dureturn24)
+	li	rRTN, 1
+	ld	rWORD6_SHIFT, -64(r1)
+	bgtlr	cr7
+	li	rRTN, -1
 	blr
 	.align	4
-L(duLcr0):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr0,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+L(duLcr7):
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr7, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr1):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr1,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr1, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr6):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr6,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr6, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	4
 L(duLcr5):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
-	li	rRTN,1
-	bgt	cr5,L(dureturn29)
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
-	li	rRTN,-1
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
+	li	rRTN, 1
+	bgt	cr5, L(dureturn29)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
+	li	rRTN, -1
 	b	L(dureturn27)
 	.align	3
 L(duZeroReturn):
-	li	rRTN,0
+	li	rRTN, 0
 	.align	4
 L(dureturn):
-	ld	rWORD8,-8(r1)
-	ld	rWORD7,-16(r1)
+	ld	rWORD8, -8(r1)
+	ld	rWORD7, -16(r1)
 L(dureturn29):
-	ld	r29,-24(r1)
-	ld	r28,-32(r1)
+	ld	rSHL, -24(r1)
+	ld	rSHR, -32(r1)
 L(dureturn27):
-	ld	r27,-40(r1)
+	ld	rWORD8_SHIFT, -40(r1)
 L(dureturn26):
-	ld	r26,-48(r1)
+	ld	rWORD2_SHIFT, -48(r1)
 L(dureturn25):
-	ld	r25,-56(r1)
+	ld	rWORD4_SHIFT, -56(r1)
 L(dureturn24):
-	ld	r24,-64(r1)
+	ld	rWORD6_SHIFT, -64(r1)
 	blr
 L(duzeroLength):
-	li	rRTN,0
+	li	rRTN, 0
 	blr
 
 END (memcmp)
 libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp,bcmp)
+weak_alias (memcmp, bcmp)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
index 800a9f1bb..e8df75f59 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -23,418 +23,361 @@
 /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]);
    Returns 'dst'.  */
 
+#define dst 11		/* Use r11 so r3 kept unchanged.  */
+#define src 4
+#define cnt 5
+
 	.machine power7
 EALIGN (memcpy, 5, 0)
 	CALL_MCOUNT 3
 
-	cmpldi  cr1,5,31
+	cmpldi	cr1,cnt,31
 	neg	0,3
-	std	3,-16(1)
-	std	31,-8(1)
-	cfi_offset(31,-8)
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
 				    code.  */
 
-	andi.   11,3,7	      /* Check alignment of DST.  */
-
-
-	clrldi  10,4,61       /* Check alignment of SRC.  */
-	cmpld   cr6,10,11     /* SRC and DST alignments match?  */
-	mr	12,4
-	mr	31,5
+#ifdef __LITTLE_ENDIAN__
+/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
+   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
+   loop is only used for quadword aligned copies.  */
+	andi.	10,3,15
+	clrldi	11,4,60
+#else
+	andi.	10,3,7		/* Check alignment of DST.  */
+	clrldi	11,4,61		/* Check alignment of SRC.  */
+#endif
+	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
+
+	mr	dst,3
 	bne	cr6,L(copy_GE_32_unaligned)
+	beq	L(aligned_copy)
 
-	srdi    9,5,3	      /* Number of full quadwords remaining.  */
-
-	beq    L(copy_GE_32_aligned_cont)
-
-	clrldi  0,0,61
-	mtcrf   0x01,0
-	subf    31,0,5
-
-	/* Get the SRC aligned to 8 bytes.  */
-
-1:	bf	31,2f
-	lbz	6,0(12)
-	addi    12,12,1
-	stb	6,0(3)
-	addi    3,3,1
-2:	bf      30,4f
-	lhz     6,0(12)
-	addi    12,12,2
-	sth     6,0(3)
-	addi    3,3,2
-4:	bf      29,0f
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-0:
-	clrldi  10,12,61      /* Check alignment of SRC again.  */
-	srdi    9,31,3	      /* Number of full doublewords remaining.  */
-
-L(copy_GE_32_aligned_cont):
-
-	clrldi  11,31,61
-	mtcrf   0x01,9
-
-	srdi    8,31,5
-	cmpldi  cr1,9,4
-	cmpldi  cr6,11,0
-	mr	11,12
-
-	/* Copy 1~3 doublewords so the main loop starts
-	at a multiple of 32 bytes.  */
+	mtocrf	0x01,0
+#ifdef __LITTLE_ENDIAN__
+	clrldi	0,0,60
+#else
+	clrldi	0,0,61
+#endif
 
-	bf	30,1f
-	ld      6,0(12)
-	ld      7,8(12)
-	addi    11,12,16
-	mtctr   8
-	std     6,0(3)
-	std     7,8(3)
-	addi    10,3,16
-	bf      31,4f
-	ld      0,16(12)
-	std     0,16(3)
-	blt     cr1,3f
-	addi    11,12,24
-	addi    10,3,24
-	b       4f
-
-	.align  4
-1:	/* Copy 1 doubleword and set the counter.  */
-	mr	10,3
-	mtctr   8
-	bf      31,4f
-	ld      6,0(12)
-	addi    11,12,8
-	std     6,0(3)
-	addi    10,3,8
-
-L(aligned_copy):
-	/* Main aligned copy loop. Copies up to 128-bytes at a time. */
-	.align  4
+/* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
+1:
+	bf	31,2f
+	lbz	6,0(src)
+	addi	src,src,1
+	stb	6,0(dst)
+	addi	dst,dst,1
+2:
+	bf	30,4f
+	lhz	6,0(src)
+	addi	src,src,2
+	sth	6,0(dst)
+	addi	dst,dst,2
 4:
-	/* check for any 32-byte or 64-byte lumps that are outside of a
-	   nice 128-byte range.  R8 contains the number of 32-byte
-	   lumps, so drop this into the CR, and use the SO/EQ bits to help
-	   handle the 32- or 64- byte lumps.  Then handle the rest with an
-	   unrolled 128-bytes-at-a-time copy loop. */
-	mtocrf	1,8
-	li	6,16	# 16() index
-	li	7,32	# 32() index
-	li	8,48	# 48() index
-
-L(aligned_32byte):
-	/* if the SO bit (indicating a 32-byte lump) is not set, move along. */
-	bns	cr7,L(aligned_64byte)
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	addi	11,11,32
-	stxvd2x	6,0,10
-	stxvd2x	7,10,6
-	addi	10,10,32
-
-L(aligned_64byte):
-	/* if the EQ bit (indicating a 64-byte lump) is not set, move along. */
-	bne	cr7,L(aligned_128setup)
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	lxvd2x	8,11,7
-	lxvd2x	9,11,8
-	addi	11,11,64
-	stxvd2x	6,0,10
-	stxvd2x	7,10,6
-	stxvd2x	8,10,7
-	stxvd2x	9,10,8
-	addi	10,10,64
-
-L(aligned_128setup):
-	/* Set up for the 128-byte at a time copy loop.  */
-	srdi	8,31,7
-	cmpdi	8,0	# Any 4x lumps left?
-	beq	3f	# if not, move along.
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	mtctr	8	# otherwise, load the ctr and begin.
-	li	8,48	# 48() index
+	bf	29,8f
+	lwz	6,0(src)
+	addi	src,src,4
+	stw	6,0(dst)
+	addi	dst,dst,4
+8:
+#ifdef __LITTLE_ENDIAN__
+	bf	28,16f
+	ld	6,0(src)
+	addi	src,src,8
+	std	6,0(dst)
+	addi	dst,dst,8
+16:
+#endif
+	subf	cnt,0,cnt
+
+/* Main aligned copy loop. Copies 128 bytes at a time. */
+L(aligned_copy):
+	li	6,16
+	li	7,32
+	li	8,48
+	mtocrf	0x02,cnt
+	srdi	12,cnt,7
+	cmpdi	12,0
+	beq	L(aligned_tail)
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	mtctr	12
 	b	L(aligned_128loop)
 
+	.align  4
 L(aligned_128head):
 	/* for the 2nd + iteration of this loop. */
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
 L(aligned_128loop):
-	lxvd2x	8,11,7
-	lxvd2x	9,11,8
-	stxvd2x	6,0,10
-	addi	11,11,64
-	stxvd2x	7,10,6
-	stxvd2x	8,10,7
-	stxvd2x	9,10,8
-	lxvd2x	6,0,11
-	lxvd2x	7,11,6
-	addi	10,10,64
-	lxvd2x	8,11,7
-	lxvd2x	9,11,8
-	addi	11,11,64
-	stxvd2x	6,0,10
-	stxvd2x	7,10,6
-	stxvd2x	8,10,7
-	stxvd2x	9,10,8
-	addi	10,10,64
+	lxvd2x	8,src,7
+	lxvd2x	9,src,8
+	stxvd2x	6,0,dst
+	addi	src,src,64
+	stxvd2x	7,dst,6
+	stxvd2x	8,dst,7
+	stxvd2x	9,dst,8
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	addi	dst,dst,64
+	lxvd2x	8,src,7
+	lxvd2x	9,src,8
+	addi	src,src,64
+	stxvd2x	6,0,dst
+	stxvd2x	7,dst,6
+	stxvd2x	8,dst,7
+	stxvd2x	9,dst,8
+	addi	dst,dst,64
 	bdnz	L(aligned_128head)
 
-3:
-	/* Check for tail bytes.  */
-	rldicr  0,31,0,60
-	mtcrf   0x01,31
-	beq	cr6,0f
-
-.L9:
-	add	3,3,0
-	add	12,12,0
-
-	/*  At this point we have a tail of 0-7 bytes and we know that the
-	destination is doubleword-aligned.  */
-4:	/* Copy 4 bytes.  */
-	bf	29,2f
-
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-2:	/* Copy 2 bytes.  */
-	bf	30,1f
-
-	lhz     6,0(12)
-	addi    12,12,2
-	sth     6,0(3)
-	addi    3,3,2
-1:	/* Copy 1 byte.  */
-	bf	31,0f
-
-	lbz	6,0(12)
-	stb	6,0(3)
-0:	/* Return original DST pointer.  */
-	ld	31,-8(1)
-	ld	3,-16(1)
+L(aligned_tail):
+	mtocrf	0x01,cnt
+	bf	25,32f
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	lxvd2x	8,src,7
+	lxvd2x	9,src,8
+	addi	src,src,64
+	stxvd2x	6,0,dst
+	stxvd2x	7,dst,6
+	stxvd2x	8,dst,7
+	stxvd2x	9,dst,8
+	addi	dst,dst,64
+32:
+	bf	26,16f
+	lxvd2x	6,0,src
+	lxvd2x	7,src,6
+	addi	src,src,32
+	stxvd2x	6,0,dst
+	stxvd2x	7,dst,6
+	addi	dst,dst,32
+16:
+	bf	27,8f
+	lxvd2x	6,0,src
+	addi	src,src,16
+	stxvd2x	6,0,dst
+	addi	dst,dst,16
+8:
+	bf	28,4f
+	ld	6,0(src)
+	addi	src,src,8
+	std     6,0(dst)
+	addi	dst,dst,8
+4:	/* Copies 4~7 bytes.  */
+	bf	29,L(tail2)
+	lwz	6,0(src)
+	stw     6,0(dst)
+	bf      30,L(tail5)
+	lhz     7,4(src)
+	sth     7,4(dst)
+	bflr	31
+	lbz     8,6(src)
+	stb     8,6(dst)
+	/* Return original DST pointer.  */
 	blr
 
-	/* Handle copies of 0~31 bytes.  */
-	.align  4
+
+/* Handle copies of 0~31 bytes.  */
+	.align	4
 L(copy_LT_32):
-	cmpldi  cr6,5,8
-	mr	12,4
-	mtcrf   0x01,5
+	mr	dst,3
+	cmpldi	cr6,cnt,8
+	mtocrf	0x01,cnt
 	ble	cr6,L(copy_LE_8)
 
 	/* At least 9 bytes to go.  */
 	neg	8,4
-	clrrdi  11,4,2
-	andi.   0,8,3
-	cmpldi  cr1,5,16
-	mr	10,5
+	andi.	0,8,3
+	cmpldi	cr1,cnt,16
 	beq	L(copy_LT_32_aligned)
 
-	/* Force 4-bytes alignment for SRC.  */
-	mtocrf  0x01,0
-	subf    10,0,5
-2:	bf	30,1f
-
-	lhz	6,0(12)
-	addi    12,12,2
-	sth	6,0(3)
-	addi    3,3,2
-1:	bf	31,L(end_4bytes_alignment)
-
-	lbz	6,0(12)
-	addi    12,12,1
-	stb	6,0(3)
-	addi    3,3,1
-
-	.align  4
+	/* Force 4-byte alignment for SRC.  */
+	mtocrf	0x01,0
+	subf	cnt,0,cnt
+2:
+	bf	30,1f
+	lhz	6,0(src)
+	addi	src,src,2
+	sth	6,0(dst)
+	addi	dst,dst,2
+1:
+	bf	31,L(end_4bytes_alignment)
+	lbz	6,0(src)
+	addi	src,src,1
+	stb	6,0(dst)
+	addi	dst,dst,1
+
+	.align	4
 L(end_4bytes_alignment):
-	cmpldi  cr1,10,16
-	mtcrf   0x01,10
+	cmpldi	cr1,cnt,16
+	mtocrf	0x01,cnt
 
 L(copy_LT_32_aligned):
 	/* At least 6 bytes to go, and SRC is word-aligned.  */
 	blt	cr1,8f
 
 	/* Copy 16 bytes.  */
-	lwz	6,0(12)
-	lwz     7,4(12)
-	stw     6,0(3)
-	lwz     8,8(12)
-	stw     7,4(3)
-	lwz     6,12(12)
-	addi    12,12,16
-	stw     8,8(3)
-	stw     6,12(3)
-	addi    3,3,16
+	lwz	6,0(src)
+	lwz	7,4(src)
+	stw	6,0(dst)
+	lwz	8,8(src)
+	stw	7,4(dst)
+	lwz	6,12(src)
+	addi	src,src,16
+	stw	8,8(dst)
+	stw	6,12(dst)
+	addi	dst,dst,16
 8:	/* Copy 8 bytes.  */
-	bf	28,4f
+	bf	28,L(tail4)
+	lwz	6,0(src)
+	lwz	7,4(src)
+	addi	src,src,8
+	stw	6,0(dst)
+	stw	7,4(dst)
+	addi	dst,dst,8
+
+	.align	4
+/* Copies 4~7 bytes.  */
+L(tail4):
+	bf	29,L(tail2)
+	lwz	6,0(src)
+	stw	6,0(dst)
+	bf	30,L(tail5)
+	lhz	7,4(src)
+	sth	7,4(dst)
+	bflr	31
+	lbz	8,6(src)
+	stb	8,6(dst)
+	/* Return original DST pointer.  */
+	blr
 
-	lwz     6,0(12)
-	lwz     7,4(12)
-	addi    12,12,8
-	stw     6,0(3)
-	stw     7,4(3)
-	addi    3,3,8
-4:	/* Copy 4 bytes.  */
-	bf	29,2f
-
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-2:	/* Copy 2-3 bytes.  */
+	.align	4
+/* Copies 2~3 bytes.  */
+L(tail2):
 	bf	30,1f
-
-	lhz     6,0(12)
-	sth     6,0(3)
-	bf      31,0f
-	lbz     7,2(12)
-	stb     7,2(3)
-	ld	3,-16(1)
+	lhz	6,0(src)
+	sth	6,0(dst)
+	bflr	31
+	lbz	7,2(src)
+	stb	7,2(dst)
 	blr
 
-	.align  4
-1:	/* Copy 1 byte.  */
-	bf	31,0f
+	.align	4
+L(tail5):
+	bflr	31
+	lbz	6,4(src)
+	stb	6,4(dst)
+	blr
 
-	lbz	6,0(12)
-	stb	6,0(3)
-0:	/* Return original DST pointer.  */
-	ld	3,-16(1)
+	.align	4
+1:
+	bflr	31
+	lbz	6,0(src)
+	stb	6,0(dst)
+	/* Return original DST pointer.  */
 	blr
 
-	/* Handles copies of 0~8 bytes.  */
-	.align  4
+
+/* Handles copies of 0~8 bytes.  */
+	.align	4
 L(copy_LE_8):
-	bne	cr6,4f
+	bne	cr6,L(tail4)
 
 	/* Though we could've used ld/std here, they are still
 	slow for unaligned cases.  */
 
-	lwz	6,0(4)
-	lwz     7,4(4)
-	stw     6,0(3)
-	stw     7,4(3)
-	ld      3,-16(1)      /* Return original DST pointers.  */
+	lwz	6,0(src)
+	lwz	7,4(src)
+	stw	6,0(dst)
+	stw	7,4(dst)
 	blr
 
-	.align  4
-4:	/* Copies 4~7 bytes.  */
-	bf	29,2b
-
-	lwz	6,0(4)
-	stw     6,0(3)
-	bf      30,5f
-	lhz     7,4(4)
-	sth     7,4(3)
-	bf      31,0f
-	lbz     8,6(4)
-	stb     8,6(3)
-	ld	3,-16(1)
-	blr
-
-	.align  4
-5:	/* Copy 1 byte.  */
-	bf	31,0f
-
-	lbz	6,4(4)
-	stb	6,4(3)
-
-0:	/* Return original DST pointer.  */
-	ld	3,-16(1)
-	blr
 
-	/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
-	SRC is not.  Use aligned quadword loads from SRC, shifted to realign
-	the data, allowing for aligned DST stores.  */
-	.align  4
+/* Handle copies of 32+ bytes where DST is aligned (to quadword) but
+   SRC is not.	Use aligned quadword loads from SRC, shifted to realign
+   the data, allowing for aligned DST stores.  */
+	.align	4
 L(copy_GE_32_unaligned):
-	clrldi  0,0,60	      /* Number of bytes until the 1st
-			      quadword.  */
-	andi.   11,3,15       /* Check alignment of DST (against
-			      quadwords).  */
-	srdi    9,5,4	      /* Number of full quadwords remaining.  */
+	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
+#ifndef __LITTLE_ENDIAN__
+	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
+#endif
+	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
 
 	beq	L(copy_GE_32_unaligned_cont)
 
-	/* SRC is not quadword aligned, get it aligned.  */
+	/* DST is not quadword aligned, get it aligned.  */
 
-	mtcrf   0x01,0
-	subf    31,0,5
+	mtocrf	0x01,0
+	subf	cnt,0,cnt
 
 	/* Vector instructions work best when proper alignment (16-bytes)
 	is present.  Move 0~15 bytes as needed to get DST quadword-aligned.  */
-1:	/* Copy 1 byte.  */
+1:
 	bf	31,2f
-
-	lbz	6,0(12)
-	addi    12,12,1
-	stb	6,0(3)
-	addi    3,3,1
-2:	/* Copy 2 bytes.  */
+	lbz	6,0(src)
+	addi	src,src,1
+	stb	6,0(dst)
+	addi	dst,dst,1
+2:
 	bf	30,4f
-
-	lhz     6,0(12)
-	addi    12,12,2
-	sth     6,0(3)
-	addi    3,3,2
-4:	/* Copy 4 bytes.  */
+	lhz	6,0(src)
+	addi	src,src,2
+	sth	6,0(dst)
+	addi	dst,dst,2
+4:
 	bf	29,8f
-
-	lwz     6,0(12)
-	addi    12,12,4
-	stw     6,0(3)
-	addi    3,3,4
-8:	/* Copy 8 bytes.  */
+	lwz	6,0(src)
+	addi	src,src,4
+	stw	6,0(dst)
+	addi	dst,dst,4
+8:
 	bf	28,0f
-
-	ld	6,0(12)
-	addi    12,12,8
-	std	6,0(3)
-	addi    3,3,8
+	ld	6,0(src)
+	addi	src,src,8
+	std	6,0(dst)
+	addi	dst,dst,8
 0:
-	clrldi  10,12,60      /* Check alignment of SRC.  */
-	srdi    9,31,4	      /* Number of full quadwords remaining.  */
+	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
 
 	/* The proper alignment is present, it is OK to copy the bytes now.  */
 L(copy_GE_32_unaligned_cont):
 
 	/* Setup two indexes to speed up the indexed vector operations.  */
-	clrldi  11,31,60
-	li      6,16	      /* Index for 16-bytes offsets.  */
+	clrldi	10,cnt,60
+	li	6,16	      /* Index for 16-bytes offsets.  */
 	li	7,32	      /* Index for 32-bytes offsets.  */
-	cmpldi  cr1,11,0
-	srdi    8,31,5	      /* Setup the loop counter.  */
-	mr      10,3
-	mr      11,12
-	mtcrf   0x01,9
-	cmpldi  cr6,9,1
-	lvsl    5,0,12
-	lvx     3,0,12
-	bf      31,L(setup_unaligned_loop)
-
-	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
-	lvx     4,12,6
-	vperm   6,3,4,5
-	addi    11,12,16
-	addi    10,3,16
-	stvx    6,0,3
+	cmpldi	cr1,10,0
+	srdi	8,cnt,5	      /* Setup the loop counter.  */
+	mtocrf	0x01,9
+	cmpldi	cr6,9,1
+#ifdef __LITTLE_ENDIAN__
+	lvsr	5,0,src
+#else
+	lvsl	5,0,src
+#endif
+	lvx	3,0,src
+	li	0,0
+	bf	31,L(setup_unaligned_loop)
+
+	/* Copy another 16 bytes to align to 32-bytes due to the loop.  */
+	lvx	4,src,6
+#ifdef __LITTLE_ENDIAN__
+	vperm	6,4,3,5
+#else
+	vperm	6,3,4,5
+#endif
+	addi	src,src,16
+	stvx	6,0,dst
+	addi	dst,dst,16
 	vor	3,4,4
+	clrrdi	0,src,60
 
 L(setup_unaligned_loop):
-	mtctr   8
-	ble     cr6,L(end_unaligned_loop)
+	mtctr	8
+	ble	cr6,L(end_unaligned_loop)
 
 	/* Copy 32 bytes at a time using vector instructions.  */
-	.align  4
+	.align	4
 L(unaligned_loop):
 
 	/* Note: vr6/vr10 may contain data that was already copied,
@@ -442,62 +385,55 @@ L(unaligned_loop):
 	some portions again. This is faster than having unaligned
 	vector instructions though.  */
 
-	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm   6,3,4,5	      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr6.  */
-	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm   10,4,3,5      /* Merge the correctly-aligned portions
-			      of vr3/vr4 into vr10.  */
-	addi    11,11,32
-	stvx    6,0,10
-	stvx    10,10,6
-	addi    10,10,32
-
+	lvx	4,src,6
+#ifdef __LITTLE_ENDIAN__
+	vperm	6,4,3,5
+#else
+	vperm	6,3,4,5
+#endif
+	lvx	3,src,7
+#ifdef __LITTLE_ENDIAN__
+	vperm	10,3,4,5
+#else
+	vperm	10,4,3,5
+#endif
+	addi	src,src,32
+	stvx	6,0,dst
+	stvx	10,dst,6
+	addi	dst,dst,32
 	bdnz	L(unaligned_loop)
 
-	.align  4
+	clrrdi	0,src,60
+
+	.align	4
 L(end_unaligned_loop):
 
 	/* Check for tail bytes.  */
-	rldicr  0,31,0,59
-	mtcrf   0x01,31
-	beq	cr1,0f
+	mtocrf	0x01,cnt
+	beqlr	cr1
 
-	add	3,3,0
-	add	12,12,0
+	add	src,src,0
 
 	/*  We have 1~15 tail bytes to copy, and DST is quadword aligned.  */
-8:	/* Copy 8 bytes.  */
+	/* Copy 8 bytes.  */
 	bf	28,4f
-
-	lwz	6,0(12)
-	lwz	7,4(12)
-	addi    12,12,8
-	stw	6,0(3)
-	stw	7,4(3)
-	addi    3,3,8
-4:	/* Copy 4 bytes.  */
-	bf	29,2f
-
-	lwz	6,0(12)
-	addi    12,12,4
-	stw	6,0(3)
-	addi    3,3,4
-2:	/* Copy 2~3 bytes.  */
-	bf	30,1f
-
-	lhz	6,0(12)
-	addi    12,12,2
-	sth	6,0(3)
-	addi    3,3,2
-1:	/* Copy 1 byte.  */
-	bf	31,0f
-
-	lbz	6,0(12)
-	stb	6,0(3)
-0:	/* Return original DST pointer.  */
-	ld	31,-8(1)
-	ld	3,-16(1)
+	lwz	6,0(src)
+	lwz	7,4(src)
+	addi	src,src,8
+	stw	6,0(dst)
+	stw	7,4(dst)
+	addi	dst,dst,8
+4:	/* Copy 4~7 bytes.  */
+	bf	29,L(tail2)
+	lwz	6,0(src)
+	stw	6,0(dst)
+	bf	30,L(tail5)
+	lhz	7,4(src)
+	sth	7,4(dst)
+	bflr	31
+	lbz	8,6(src)
+	stb	8,6(dst)
+	/* Return original DST pointer.  */
 	blr
 
 END_GEN_TB (memcpy,TB_TOCLESS)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
index f20be938d..b93ab7da5 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S
@@ -365,13 +365,21 @@ L(copy_GE_32_unaligned_cont):
 	mr	11,12
 	mtcrf	0x01,9
 	cmpldi	cr6,9,1
-	lvsl	5,0,12
+#ifdef __LITTLE_ENDIAN__
+	lvsr    5,0,12
+#else
+	lvsl    5,0,12
+#endif
 	lvx	3,0,12
 	bf	31,L(setup_unaligned_loop)
 
 	/* Copy another 16 bytes to align to 32-bytes due to the loop .  */
 	lvx	4,12,6
-	vperm	6,3,4,5
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	addi	11,12,16
 	addi	10,3,16
 	stvx	6,0,3
@@ -391,11 +399,17 @@ L(unaligned_loop):
 	vector instructions though.  */
 
 	lvx	4,11,6	      /* vr4 = r11+16.  */
-	vperm	6,3,4,5	      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr6.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   6,4,3,5
+#else
+	vperm   6,3,4,5
+#endif
 	lvx	3,11,7	      /* vr3 = r11+32.  */
-	vperm	10,4,3,5      /* Merge the correctly-aligned portions
-				 of vr3/vr4 into vr10.  */
+#ifdef __LITTLE_ENDIAN__
+	vperm   10,3,4,5
+#else
+	vperm   10,4,3,5
+#endif
 	addi	11,11,32
 	stvx	6,0,10
 	stvx	10,10,6
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
index c49995210..a9e86cb19 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S
@@ -23,118 +23,132 @@
 	.machine  power7
 ENTRY (__memrchr)
 	CALL_MCOUNT
-	dcbt	0,r3
-	mr	r7,r3
-	add	r3,r7,r5      /* Calculate the last acceptable address.  */
-	cmpld	cr7,r3,r7     /* Is the address equal or less than r3?  */
+	add	r7,r3,r5      /* Calculate the last acceptable address.  */
+	neg	r0,r7
+	addi	r7,r7,-1
+	mr	r10,r3
+	clrrdi	r6,r7,7
+	li	r9,3<<5
+	dcbt	r9,r6,16      /* Stream hint, decreasing addresses.  */
 
 	/* Replicate BYTE to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi  r4,r4,32,0
-	bge	cr7,L(proceed)
-
-	li	r3,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
 	li	r6,-8
-	addi	r9,r3,-1
-	clrrdi  r8,r9,3
-	addi	r8,r8,8
-	neg	r0,r3
+	li	r9,-1
 	rlwinm	r0,r0,3,26,28 /* Calculate padding.  */
-
+	clrrdi	r8,r7,3
+	srd	r9,r9,r0
 	cmpldi	r5,32
+	clrrdi	r0,r10,3
 	ble	L(small_range)
 
-	ldbrx	r12,r8,r6     /* Load reversed doubleword from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-	sld	r10,r10,r0
-	srd	r10,r10,r0
-	cmpldi	cr7,r10,0     /* If r10 == 0, no BYTEs have been found.  */
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8      /* Load reversed doubleword from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmpldi	cr7,r3,0      /* If r3 == 0, no BYTEs have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,-8
-	cmpld	cr6,r9,r7
-	ble	cr6,L(null)
-
 	mtcrf   0x01,r8
-	/* Are we now aligned to a doubleword boundary?  If so, skip to
+	/* Are we now aligned to a quadword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
-	mr	r8,r9
-	bt	28,L(loop_setup)
+	bf	28,L(loop_setup)
 
 	/* Handle DWORD2 of pair.  */
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,r8,r6
+#else
 	ldbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmpldi	cr7,r10,0
-	bne	cr7,L(done)
-
-	/* Are we done already.  */
+#endif
 	addi	r8,r8,-8
-	cmpld	cr6,r8,r7
-	ble	cr6,L(null)
+	cmpb	r3,r12,r4
+	cmpldi	cr7,r3,0
+	bne	cr7,L(done)
 
 L(loop_setup):
-	li	r0,-16
-	sub	r5,r8,r7
-	srdi	r9,r5,4	      /* Number of loop iterations.  */
+	/* The last dword we want to read in the loop below is the one
+	   containing the first byte of the string, ie. the dword at
+	   s & ~7, or r0.  The first dword read is at r8 - 8, we
+	   read 2 * cnt dwords, so the last dword read will be at
+	   r8 - 8 - 16 * cnt + 8.  Solving for cnt gives
+	   cnt = (r8 - r0) / 16  */
+	sub	r5,r8,r0
+	addi	r8,r8,-8
+	srdi	r9,r5,4       /* Number of loop iterations.  */
 	mtctr	r9	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for BYTE backwards in the string.  Since it's a
-	   small loop (< 8 instructions), align it to 32-bytes.  */
-	.p2align  5
+
+	/* Main loop to look for BYTE backwards in the string.
+	   FIXME: Investigate whether 32 byte align helps with this
+	   9 instruction loop.  */
+	.align	5
 L(loop):
 	/* Load two doublewords, compare and merge in a
 	   single register for speed.  This is an attempt
 	   to speed up the byte-checking process for bigger strings.  */
 
-	ldbrx	r12,r8,r6
-	ldbrx	r11,r8,r0
-	addi	r8,r8,-8
-	cmpb	r10,r12,r4
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+	ldx	r11,r8,r6
+#else
+	ldbrx	r12,0,r8
+	ldbrx	r11,r8,r6
+#endif
+	cmpb	r3,r12,r4
 	cmpb	r9,r11,r4
-	or	r5,r9,r10     /* Merge everything in one doubleword.  */
+	or	r5,r9,r3      /* Merge everything in one doubleword.  */
 	cmpldi	cr7,r5,0
 	bne	cr7,L(found)
-	addi	r8,r8,-8
+	addi	r8,r8,-16
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for BYTE in the whole range.  Just return
-	   the original range.  */
-	addi	r8,r8,8
-	cmpld	cr6,r8,r7
-	bgt	cr6,L(loop_small)
-	b	L(null)
-
-	/* OK, one (or both) of the words contains BYTE.  Check
-	   the first word and decrement the address in case the first
-	   word really contains BYTE.  */
+
+	/* We may have one more word to read.  */
+	cmpld	r8,r0
+	bnelr
+
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmpldi	cr7,r3,0
+	bne	cr7,L(done)
+	blr
+
 	.align	4
 L(found):
-	cmpldi	cr6,r10,0
-	addi	r8,r8,8
+	/* OK, one (or both) of the dwords contains BYTE.  Check
+	   the first dword.  */
+	cmpldi	cr6,r3,0
 	bne	cr6,L(done)
 
 	/* BYTE must be in the second word.  Adjust the address
-	   again and move the result of cmpb to r10 so we can calculate the
+	   again and move the result of cmpb to r3 so we can calculate the
 	   pointer.  */
 
-	mr	r10,r9
+	mr	r3,r9
 	addi	r8,r8,-8
 
-	/* r10 has the output of the cmpb instruction, that is, it contains
-	   0xff in the same position as the BYTE in the original
+	/* r3 has the output of the cmpb instruction, that is, it contains
+	   0xff in the same position as BYTE in the original
 	   word from the string.  Use that to calculate the pointer.
 	   We need to make sure BYTE is *before* the end of the
 	   range.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
-	srdi	r6,r0,3	      /* Convert leading zeroes to bytes.  */
-	addi	r0,r6,1
+	cntlzd	r9,r3	      /* Count leading zeros before the match.  */
+	cmpld	r8,r0         /* Are we on the last word?  */
+	srdi	r6,r9,3	      /* Convert leading zeros to bytes.  */
+	addi	r0,r6,-7
 	sub	r3,r8,r0
-	cmpld	r3,r7
-	blt	L(null)
+	cmpld	cr7,r3,r10
+	bnelr
+	bgelr	cr7
+	li	r3,0
 	blr
 
 	.align	4
@@ -148,29 +162,35 @@ L(small_range):
 	cmpldi	r5,0
 	beq	L(null)
 
-	ldbrx	r12,r8,r6     /* Load reversed doubleword from memory.  */
-	cmpb	r10,r12,r4    /* Check for BYTE in DWORD1.  */
-	sld	r10,r10,r0
-	srd	r10,r10,r0
-	cmpldi	cr7,r10,0
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8      /* Load reversed doubleword from memory.  */
+#endif
+	cmpb	r3,r12,r4     /* Check for BYTE in DWORD1.  */
+	and	r3,r3,r9
+	cmpldi	cr7,r3,0
 	bne	cr7,L(done)
 
 	/* Are we done already?  */
+	cmpld	r8,r0
 	addi	r8,r8,-8
-	cmpld	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	beqlr
 
-	.p2align  5
+	.align	5
 L(loop_small):
-	ldbrx	r12,r8,r6
-	cmpb	r10,r12,r4
-	cmpldi	cr6,r10,0
-	bne	cr6,L(done)
+#ifdef __LITTLE_ENDIAN__
+	ldx	r12,0,r8
+#else
+	ldbrx	r12,0,r8
+#endif
+	cmpb	r3,r12,r4
+	cmpld	r8,r0
+	cmpldi	cr7,r3,0
+	bne	cr7,L(done)
 	addi	r8,r8,-8
-	cmpld	r8,r7
-	ble	L(null)
-	b	L(loop_small)
+	bne	L(loop_small)
+	blr
 
 END (__memrchr)
 weak_alias (__memrchr, memrchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memset.S b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
index b24cfa163..8b081e87c 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/memset.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/memset.S
@@ -32,8 +32,8 @@ L(_memset):
 	mr	10,3
 
 	/* Replicate byte to word.  */
-	rlwimi	4,4,8,16,23
-	rlwimi	4,4,16,0,15
+	insrdi	4,4,8,48
+	insrdi	4,4,16,32
 	ble	cr6,L(small)	/* If length <= 8, use short copy code.  */
 
 	neg	0,3
@@ -321,7 +321,7 @@ L(medium):
 	clrldi	0,0,62
 	beq	L(medium_aligned)
 
-	/* Force 4-bytes alignment for SRC.  */
+	/* Force 4-bytes alignment for DST.  */
 	mtocrf	0x01,0
 	subf	5,0,5
 1:	/* Copy 1 byte.  */
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
index 50a33d8fa..547aed771 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S
@@ -27,8 +27,8 @@ ENTRY (__rawmemchr)
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi	r4,r4,32,0
 
 	/* Now r4 has a doubleword of c bytes.  */
@@ -36,8 +36,13 @@ ENTRY (__rawmemchr)
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
 	ld	r12,0(r8)     /* Load doubleword from memory.  */
 	cmpb	r5,r12,r4     /* Compare each byte against c byte.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r5,r5,r6
+	sld	r5,r5,r6
+#else
 	sld	r5,r5,r6      /* Move left to discard ignored bits.  */
 	srd	r5,r5,r6      /* Bring the bits back as zeros.  */
+#endif
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c bytes have been found.  */
 	bne	cr7,L(done)
 
@@ -91,8 +96,14 @@ L(loop):
 	   doubleword from the string.  Use that fact to find out what is
 	   the position of the byte inside the string.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0	      /* Count trailing zeros.  */
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+#endif
+	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching char.  */
 	blr
 END (__rawmemchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
index 3ffe7a188..4679a158f 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S
@@ -35,8 +35,8 @@ ENTRY (strchr)
 	beq	cr7,L(null_match)
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi  r4,r4,32,0
 
 	/* Now r4 has a doubleword of c bytes and r0 has
@@ -47,11 +47,17 @@ ENTRY (strchr)
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r11,r11,r6
+	sld	r10,r10,r6
+	sld	r11,r11,r6
+#else
 	sld	r10,r10,r6
 	sld	r11,r11,r6
 	srd	r10,r10,r6
 	srd	r11,r11,r6
+#endif
 	or	r5,r10,r11    /* OR the results to speed things up.  */
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -108,15 +114,24 @@ L(loop):
 	mr	r11,r7
 	addi	r8,r8,8
 
-	/* r5 has the output of the cmpb instruction, that is, it contains
+	/* r10/r11 have the output of the cmpb instructions, that is,
 	   0xff in the same position as the c/null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done):
-	cntlzd	r4,r10	      /* Count leading zeroes before c matches.  */
-	cntlzd	r0,r11	      /* Count leading zeroes before null matches.  */
-	cmpld	cr7,r4,r0
+#ifdef __LITTLE_ENDIAN__
+	addi    r3,r10,-1
+	andc    r3,r3,r10
+	popcntd	r0,r3
+	addi    r4,r11,-1
+	andc    r4,r4,r11
+	cmpld	cr7,r3,r4
 	bgt	cr7,L(no_match)
-	srdi	r0,r4,3	      /* Convert leading zeroes to bytes.  */
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before c matches.  */
+	cmpld	cr7,r11,r10
+	bgt	cr7,L(no_match)
+#endif
+	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching c byte
 				 or null in case c was not found.  */
 	blr
@@ -135,9 +150,13 @@ L(null_match):
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and bring them back as zeros.  */
-
+#ifdef __LITTLE_ENDIAN__
+	srd	r5,r5,r6
+	sld	r5,r5,r6
+#else
 	sld	r5,r5,r6
 	srd	r5,r5,r6
+#endif
 	cmpdi	cr7,r5,0      /* If r10 == 0, no c or null bytes
 				 have been found.  */
 	bne	cr7,L(done_null)
@@ -192,7 +211,13 @@ L(loop_null):
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done_null):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of the matching null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
index 9dbc51b0d..df457525e 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S
@@ -27,8 +27,8 @@ ENTRY (__strchrnul)
 	clrrdi	r8,r3,3	      /* Align the address to doubleword boundary.  */
 
 	/* Replicate byte to doubleword.  */
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
+	insrdi	r4,r4,8,48
+	insrdi	r4,r4,16,32
 	insrdi	r4,r4,32,0
 
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
@@ -44,10 +44,17 @@ ENTRY (__strchrnul)
 
 	/* Move the doublewords left and right to discard the bits that are
 	   not part of the string and to bring them back as zeros.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	srd	r9,r9,r6
+	sld	r10,r10,r6
+	sld	r9,r9,r6
+#else
 	sld	r10,r10,r6
 	sld	r9,r9,r6
 	srd	r10,r10,r6
 	srd	r9,r9,r6
+#endif
 	or	r5,r9,r10     /* OR the results to speed things up.  */
 	cmpdi	cr7,r5,0      /* If r5 == 0, no c or null bytes
 				 have been found.  */
@@ -97,7 +104,7 @@ L(loop):
 	bne	cr6,L(done)
 
 	/* The c/null byte must be in the second doubleword.  Adjust the
-	   address again and move the result of cmpb to r10 so we can calculate
+	   address again and move the result of cmpb to r5 so we can calculate
 	   the pointer.  */
 	mr	r5,r10
 	addi	r8,r8,8
@@ -106,7 +113,13 @@ L(loop):
 	   0xff in the same position as the c/null byte in the original
 	   doubleword from the string.  Use that to calculate the pointer.  */
 L(done):
+#ifdef __LITTLE_ENDIAN__
+	addi    r0,r5,-1
+	andc    r0,r0,r5
+	popcntd	r0,r0
+#else
 	cntlzd	r0,r5	      /* Count leading zeros before the match.  */
+#endif
 	srdi	r0,r0,3	      /* Convert leading zeros to bytes.  */
 	add	r3,r8,r0      /* Return address of matching c/null byte.  */
 	blr
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
index 343216952..807ef1082 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S
@@ -30,7 +30,11 @@ ENTRY (strlen)
 				 with cmpb.  */
 	li	r5,-1	      /* MASK = 0xffffffffffffffff.  */
 	ld	r12,0(r4)     /* Load doubleword from memory.  */
+#ifdef __LITTLE_ENDIAN__
+	sld	r5,r5,r6
+#else
 	srd	r5,r5,r6      /* MASK = MASK >> padding.  */
+#endif
 	orc	r9,r12,r5     /* Mask bits that are not part of the string.  */
 	cmpb	r10,r9,r0     /* Check for null bytes in DWORD1.  */
 	cmpdi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
@@ -48,9 +52,6 @@ ENTRY (strlen)
 	cmpb	r10,r12,r0
 	cmpdi	cr7,r10,0
 	bne	cr7,L(done)
-	b	L(loop)	      /* We branch here (rather than falling through)
-				 to skip the nops due to heavy alignment
-				 of the loop below.  */
 
 	/* Main loop to look for the end of the string.  Since it's a
 	   small loop (< 8 instructions), align it to 32-bytes.  */
@@ -87,9 +88,15 @@ L(loop):
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the length.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	r9, r10, -1   /* Form a mask from trailing zeros.  */
+	andc	r9, r9, r10
+	popcntd r0, r9	      /* Count the bits in the mask.  */
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
+#endif
 	subf	r5,r3,r4
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
+	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
 	add	r3,r5,r0      /* Compute final length.  */
 	blr
 END (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
index 77ecad5ab..e618b010b 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S
@@ -27,7 +27,7 @@
 EALIGN (strncmp,5,0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -40,6 +40,7 @@ EALIGN (strncmp,5,0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	nop
@@ -83,12 +84,57 @@ L(g1):	add	rTMP,rFEFE,rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
+	addi	rNEG, rBITDIF, 1
+	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
+	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
+	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
+	andc	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	cmpb	rBITDIF, rWORD1, rWORD2	/* 0xff on equal bytes.  */
+	addi	rNEG, rBITDIF, 1
+	orc	rNEG, rNEG, rBITDIF	/* 0's below LS differing byte.  */
+	sldi	rNEG, rNEG, 8		/* 1's above LS differing byte.  */
+	andc	rWORD1, rWORD1, rNEG	/* mask off MS bytes.  */
+	andc	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP,r7F7F,rWORD1
 	beq	cr1,L(equal)
 	add	rTMP,rTMP,r7F7F
 	xor.	rBITDIF,rWORD1,rWORD2
-
 	andc	rNEG,rNEG,rTMP
 	blt	L(highbit)
 	cntlzd	rBITDIF,rBITDIF
@@ -97,7 +143,7 @@ L(endstring):
 	cmpd	cr1,rNEG,rBITDIF
 	sub	rRTN,rWORD1,rWORD2
 	blt	cr1,L(equal)
-	sradi	rRTN,rRTN,63
+	sradi	rRTN,rRTN,63		/* must return an int.  */
 	ori	rRTN,rRTN,1
 	blr
 L(equal):
@@ -105,7 +151,7 @@ L(equal):
 	blr
 
 L(different):
-	ldu	rWORD1,-8(rSTR1)
+	ld	rWORD1,-8(rSTR1)
 	xor.	rBITDIF,rWORD1,rWORD2
 	sub	rRTN,rWORD1,rWORD2
 	blt	L(highbit)
@@ -113,11 +159,10 @@ L(different):
 	ori	rRTN,rRTN,1
 	blr
 L(highbit):
-	srdi	rWORD2,rWORD2,56
-	srdi	rWORD1,rWORD1,56
-	sub	rRTN,rWORD1,rWORD2
+	sradi	rRTN,rWORD2,63
+	ori	rRTN,rRTN,1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align	4
diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
index 37c7dbfe8..51591069d 100644
--- a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S
@@ -24,33 +24,29 @@
 ENTRY (__strnlen)
 	CALL_MCOUNT 2
 	dcbt	0,r3
-	clrrdi  r8,r3,3
+	clrrdi	r8,r3,3
 	add	r7,r3,r4      /* Calculate the last acceptable address.  */
 	cmpldi	r4,32
 	li	r0,0	      /* Doubleword with null chars.  */
+	addi	r7,r7,-1
+
 	/* If we have less than 33 bytes to search, skip to a faster code.  */
 	ble	L(small_range)
 
-	cmpld	cr7,r3,r7    /* Is the address equal or less than r3?  If
-				it's equal or less, it means size is either 0
-				or a negative number.  */
-	ble	cr7,L(proceed)
-
-	li	r7,-1	      /* Make r11 the biggest if r4 <= 0.  */
-L(proceed):
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
 	ld	r12,0(r8)     /* Load doubleword from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in DWORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	sld	r10,r10,r6
+#else
 	sld	r10,r10,r6
 	srd	r10,r10,r6
+#endif
 	cmpldi	cr7,r10,0     /* If r10 == 0, no null's have been found.  */
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(end_max)
-
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
 	mtcrf   0x01,r8
 	/* Are we now aligned to a quadword boundary?  If so, skip to
 	   the main loop.  Otherwise, go through the alignment code.  */
@@ -63,17 +59,18 @@ L(proceed):
 	cmpldi	cr7,r10,0
 	bne	cr7,L(done)
 
-	/* Are we done already?  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	bge	cr6,L(end_max)
-
 L(loop_setup):
-	sub	r5,r7,r9
+	/* The last dword we want to read in the loop below is the one
+	   containing the last byte of the string, ie. the dword at
+	   (s + size - 1) & ~7, or r7.  The first dword read is at
+	   r8 + 8, we read 2 * cnt dwords, so the last dword read will
+	   be at r8 + 8 + 16 * cnt - 8.  Solving for cnt gives
+	   cnt = (r7 - r8) / 16  */
+	sub	r5,r7,r8
 	srdi	r6,r5,4	      /* Number of loop iterations.  */
 	mtctr	r6	      /* Setup the counter.  */
-	b	L(loop)
-	/* Main loop to look for the null byte backwards in the string.  Since
+
+	/* Main loop to look for the null byte in the string.  Since
 	   it's a small loop (< 8 instructions), align it to 32-bytes.  */
 	.p2align  5
 L(loop):
@@ -89,15 +86,18 @@ L(loop):
 	cmpldi	cr7,r5,0
 	bne	cr7,L(found)
 	bdnz	L(loop)
-	/* We're here because the counter reached 0, and that means we
-	   didn't have any matches for null in the whole range.  Just return
-	   the original size.  */
-	addi	r9,r8,8
-	cmpld	cr6,r9,r7
-	blt	cr6,L(loop_small)
+
+	/* We may have one more dword to read.  */
+	cmpld	cr6,r8,r7
+	beq	cr6,L(end_max)
+
+	ldu	r12,8(r8)
+	cmpb	r10,r12,r0
+	cmpldi	cr6,r10,0
+	bne	cr6,L(done)
 
 L(end_max):
-	sub	r3,r7,r3
+	mr	r3,r4
 	blr
 
 	/* OK, one (or both) of the doublewords contains a null byte.  Check
@@ -119,52 +119,59 @@ L(found):
 	/* r10 has the output of the cmpb instruction, that is, it contains
 	   0xff in the same position as the null byte in the original
 	   doubleword from the string.  Use that to calculate the length.
-	   We need to make sure the null char is *before* the start of the
-	   range (since we're going backwards).  */
+	   We need to make sure the null char is *before* the end of the
+	   range.  */
 L(done):
-	cntlzd	r0,r10	      /* Count leading zeroes before the match.  */
-	srdi	r0,r0,3	      /* Convert leading zeroes to bytes.  */
-	add	r9,r8,r0
-	sub	r6,r9,r3      /* Length until the match.  */
-	cmpld	r9,r7
-	bgt	L(end_max)
-	mr	r3,r6
-	blr
-
-	.align	4
-L(zero):
-	li	r3,0
+#ifdef __LITTLE_ENDIAN__
+	addi	r0,r10,-1
+	andc	r0,r0,r10
+	popcntd	r0,r0
+#else
+	cntlzd	r0,r10	      /* Count leading zeros before the match.  */
+#endif
+	sub	r3,r8,r3
+	srdi	r0,r0,3	      /* Convert leading/trailing zeros to bytes.  */
+	add	r3,r3,r0      /* Length until the match.  */
+	cmpld	r3,r4
+	blelr
+	mr	r3,r4
 	blr
 
 /* Deals with size <= 32.  */
 	.align	4
 L(small_range):
 	cmpldi	r4,0
-	beq	L(zero)
+	beq	L(end_max)
+
+	clrrdi	r7,r7,3       /* Address of last doubleword.  */
 
 	rlwinm	r6,r3,3,26,28 /* Calculate padding.  */
-	ld	r12,0(r8)     /* Load word from memory.  */
+	ld	r12,0(r8)     /* Load doubleword from memory.  */
 	cmpb	r10,r12,r0    /* Check for null bytes in DWORD1.  */
+#ifdef __LITTLE_ENDIAN__
+	srd	r10,r10,r6
+	sld	r10,r10,r6
+#else
 	sld	r10,r10,r6
 	srd	r10,r10,r6
+#endif
 	cmpldi	cr7,r10,0
 	bne	cr7,L(done)
 
-	addi    r9,r8,8
-	cmpld	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmpld	r8,r7
+	beq	L(end_max)
 
 	.p2align  5
 L(loop_small):
 	ldu	r12,8(r8)
 	cmpb	r10,r12,r0
-	addi	r9,r8,8
 	cmpldi	cr6,r10,0
 	bne	cr6,L(done)
-	cmpld	r9,r7
-	bge	L(end_max)
-	b	L(loop_small)
+	cmpld	r8,r7
+	bne	L(loop_small)
+	mr	r3,r4
+	blr
+
 END (__strnlen)
 weak_alias (__strnlen, strnlen)
 libc_hidden_builtin_def (strnlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
index 58ec61062..1829b9ab6 100644
--- a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
+++ b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S
@@ -95,7 +95,7 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
 	mfcr r0
 	std  r16,((JB_GPRS+2)*8)(3)
 	stfd fp16,((JB_FPRS+2)*8)(3)
-	std  r0,(JB_CR*8)(3)
+	stw  r0,((JB_CR*8)+4)(3)	/* 32-bit CR.  */
 	std  r17,((JB_GPRS+3)*8)(3)
 	stfd fp17,((JB_FPRS+3)*8)(3)
 	std  r18,((JB_GPRS+4)*8)(3)
@@ -139,50 +139,46 @@ JUMPTARGET(GLUE(__sigsetjmp,_ent)):
 	la	r5,((JB_VRS)*8)(3)
 	andi.	r6,r5,0xf
 	mfspr	r0,VRSAVE
-	stw	r0,((JB_VRSAVE)*8)(3)
+	stw	r0,((JB_VRSAVE)*8)(3)	/* 32-bit VRSAVE.  */
 	addi	r6,r5,16
 	beq+	L(aligned_save_vmx)
-	lvsr	v0,0,r5
-	vspltisb v1,-1         /* set v1 to all 1's */
-	vspltisb v2,0          /* set v2 to all 0's */
-	vperm   v3,v2,v1,v0   /* v3 contains shift mask with num all 1 bytes
-				 on left = misalignment  */
 
+	lvsr	v0,0,r5
+	lvsl	v1,0,r5
+	addi	r6,r5,-16
 
-	/* Special case for v20 we need to preserve what is in save area
-	   below v20 before obliterating it */
-	lvx     v5,0,r5
-	vperm   v20,v20,v20,v0
-	vsel    v5,v5,v20,v3
-	vsel    v20,v20,v2,v3
-	stvx    v5,0,r5
+# define save_misaligned_vmx(savevr,prevvr,shiftvr,tmpvr,savegpr,addgpr) \
+	addi	addgpr,addgpr,32;					 \
+	vperm	tmpvr,prevvr,savevr,shiftvr;				 \
+	stvx	tmpvr,0,savegpr
 
-# define save_2vmx_partial(savevr,prev_savevr,hivr,shiftvr,maskvr,savegpr,addgpr) \
-	addi    addgpr,addgpr,32; \
-	vperm   savevr,savevr,savevr,shiftvr; \
-	vsel    hivr,prev_savevr,savevr,maskvr; \
-	stvx    hivr,0,savegpr;
+	/*
+	 * We have to be careful not to corrupt the data below v20 and
+	 * above v31. To keep things simple we just rotate both ends in
+	 * the opposite direction to our main permute so we can use
+	 * the common macro.
+	 */
 
-	save_2vmx_partial(v21,v20,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v22,v21,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v23,v22,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v24,v23,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v25,v24,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v26,v25,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v27,v26,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v28,v27,v5,v0,v3,r5,r6)
-	save_2vmx_partial(v29,v28,v5,v0,v3,r6,r5)
-	save_2vmx_partial(v30,v29,v5,v0,v3,r5,r6)
+	/* load and rotate data below v20 */
+	lvx	v2,0,r5
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v20,v2,v0,v3,r5,r6)
+	save_misaligned_vmx(v21,v20,v0,v3,r6,r5)
+	save_misaligned_vmx(v22,v21,v0,v3,r5,r6)
+	save_misaligned_vmx(v23,v22,v0,v3,r6,r5)
+	save_misaligned_vmx(v24,v23,v0,v3,r5,r6)
+	save_misaligned_vmx(v25,v24,v0,v3,r6,r5)
+	save_misaligned_vmx(v26,v25,v0,v3,r5,r6)
+	save_misaligned_vmx(v27,v26,v0,v3,r6,r5)
+	save_misaligned_vmx(v28,v27,v0,v3,r5,r6)
+	save_misaligned_vmx(v29,v28,v0,v3,r6,r5)
+	save_misaligned_vmx(v30,v29,v0,v3,r5,r6)
+	save_misaligned_vmx(v31,v30,v0,v3,r6,r5)
+	/* load and rotate data above v31 */
+	lvx	v2,0,r6
+	vperm	v2,v2,v2,v1
+	save_misaligned_vmx(v2,v31,v0,v3,r5,r6)
 
-	/* Special case for r31 we need to preserve what is in save area
-	   above v31 before obliterating it */
-	addi    r5,r5,32
-	vperm   v31,v31,v31,v0
-	lvx     v4,0,r5
-	vsel    v5,v30,v31,v3
-	stvx    v5,0,r6
-	vsel    v4,v31,v4,v3
-	stvx    v4,0,r5
 	b	L(no_vmx)
 
 L(aligned_save_vmx):
diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp.S b/libc/sysdeps/powerpc/powerpc64/setjmp.S
index 667b9d12d..0a3b2fc02 100644
--- a/libc/sysdeps/powerpc/powerpc64/setjmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/setjmp.S
@@ -26,9 +26,9 @@
 
 #else /* !NOT_IN_libc */
 /* Build a versioned object for libc.  */
-default_symbol_version (__vmxsetjmp, setjmp, GLIBC_2.3.4)
-default_symbol_version (__vmx_setjmp,_setjmp,GLIBC_2.3.4)
-default_symbol_version (__vmx__sigsetjmp,__sigsetjmp,GLIBC_2.3.4)
+versioned_symbol (libc, __vmxsetjmp, setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx_setjmp, _setjmp, GLIBC_2_3_4)
+versioned_symbol (libc, __vmx__sigsetjmp, __sigsetjmp, GLIBC_2_3_4)
 # define setjmp __vmxsetjmp
 # define _setjmp __vmx_setjmp
 # define __sigsetjmp __vmx__sigsetjmp
@@ -44,9 +44,9 @@ strong_alias (__vmx__sigsetjmp, __setjmp)
 #  undef __sigjmp_save
 #  undef JB_SIZE
 #  define __NO_VMX__
-symbol_version (__novmxsetjmp, setjmp, GLIBC_2.3)
-symbol_version (__novmx_setjmp,_setjmp,GLIBC_2.3);
-symbol_version (__novmx__sigsetjmp,__sigsetjmp,GLIBC_2.3)
+compat_symbol (libc, __novmxsetjmp, setjmp, GLIBC_2_3)
+compat_symbol (libc, __novmx_setjmp,_setjmp, GLIBC_2_3);
+compat_symbol (libc, __novmx__sigsetjmp,__sigsetjmp, GLIBC_2_3)
 #  define setjmp __novmxsetjmp
 #  define _setjmp __novmx_setjmp
 #  define __sigsetjmp __novmx__sigsetjmp
diff --git a/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h b/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
index 9da879c61..e80a683e6 100644
--- a/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
+++ b/libc/sysdeps/powerpc/powerpc64/stackguard-macros.h
@@ -2,3 +2,13 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("ld %0,-28688(13)" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+  ({												\
+     uintptr_t x;										\
+     asm ("ld %0,%1(13)"										\
+	  : "=r" (x)										\
+	  : "i" (offsetof (tcbhead_t, pointer_guard) - TLS_TCB_OFFSET - sizeof (tcbhead_t))	\
+         );											\
+     x;												\
+   })
diff --git a/libc/sysdeps/powerpc/powerpc64/stpcpy.S b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
index 070cd4662..c0b39729e 100644
--- a/libc/sysdeps/powerpc/powerpc64/stpcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/stpcpy.S
@@ -62,7 +62,22 @@ L(g2):	add	rTMP, rFEFE, rWORD
 
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
-L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
+L(g1):
+#ifdef __LITTLE_ENDIAN__
+	rlwinm.	rTMP, rALT, 0, 24, 31
+	stbu	rALT, 4(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 24, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm.	rTMP, rALT, 16, 24, 31
+	stbu	rTMP, 1(rDEST)
+	beqlr-
+	rlwinm	rTMP, rALT, 8, 24, 31
+	stbu	rTMP, 1(rDEST)
+	blr
+#else
+	rlwinm.	rTMP, rALT, 8, 24, 31
 	stbu	rTMP, 4(rDEST)
 	beqlr-
 	rlwinm.	rTMP, rALT, 16, 24, 31
@@ -73,6 +88,7 @@ L(g1):	rlwinm.	rTMP, rALT, 8, 24, 31
 	beqlr-
 	stbu	rALT, 1(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strchr.S b/libc/sysdeps/powerpc/powerpc64/strchr.S
index d2d8cd361..da707ae58 100644
--- a/libc/sysdeps/powerpc/powerpc64/strchr.S
+++ b/libc/sysdeps/powerpc/powerpc64/strchr.S
@@ -37,11 +37,13 @@ ENTRY (strchr)
 #define rIGN	r10	/* number of bits we should ignore in the first word */
 #define rMASK	r11	/* mask with the bits to ignore set to 0 */
 #define rTMP3	r12
+#define rTMP4	rIGN
+#define rTMP5	rMASK
 
 	dcbt	0,rRTN
-	rlwimi	rCHR, rCHR, 8, 16, 23
+	insrdi	rCHR, rCHR, 8, 48
 	li	rMASK, -1
-	rlwimi	rCHR, rCHR, 16, 0, 15
+	insrdi	rCHR, rCHR, 16, 32
 	rlwinm	rIGN, rRTN, 3, 26, 28
 	insrdi	rCHR, rCHR, 32, 0
 	lis	rFEFE, -0x101
@@ -54,64 +56,93 @@ ENTRY (strchr)
 	add	rFEFE, rFEFE, rTMP1
 /* Test the first (partial?) word.  */
 	ld	rWORD, 0(rSTR)
+#ifdef __LITTLE_ENDIAN__
+	sld	rMASK, rMASK, rIGN
+#else
 	srd	rMASK, rMASK, rIGN
+#endif
 	orc	rWORD, rWORD, rMASK
 	add	rTMP1, rFEFE, rWORD
 	nor	rTMP2, r7F7F, rWORD
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2
 	xor	rTMP3, rCHR, rWORD
 	orc	rTMP3, rTMP3, rMASK
 	b	L(loopentry)
 
 /* The loop.  */
 
-L(loop):ldu rWORD, 8(rSTR)
-	and.	rTMP1, rTMP1, rTMP2
+L(loop):
+	ldu	rWORD, 8(rSTR)
+	and.	rTMP5, rTMP1, rTMP2
 /* Test for 0.	*/
-	add	rTMP1, rFEFE, rWORD
-	nor	rTMP2, r7F7F, rWORD
+	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
+	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
 	bne	L(foundit)
-	and.	rTMP1, rTMP1, rTMP2
+	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
 /* Start test for the bytes we're looking for.  */
 	xor	rTMP3, rCHR, rWORD
 L(loopentry):
 	add	rTMP1, rFEFE, rTMP3
 	nor	rTMP2, r7F7F, rTMP3
 	beq	L(loop)
+
 /* There is a zero byte in the word, but may also be a matching byte (either
    before or after the zero byte).  In fact, we may be looking for a
-   zero byte, in which case we return a match.  We guess that this hasn't
-   happened, though.  */
-L(missed):
-	and.	rTMP1, rTMP1, rTMP2
+   zero byte, in which case we return a match.  */
+	and.	rTMP5, rTMP1, rTMP2
 	li	rRTN, 0
 	beqlr
-/* It did happen. Decide which one was first...
-   I'm not sure if this is actually faster than a sequence of
-   rotates, compares, and branches (we use it anyway because it's shorter).  */
+/* At this point:
+   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
+   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
+   But there may be false matches in the next most significant byte from
+   a true match due to carries.  This means we need to recalculate the
+   matches using a longer method for big-endian.  */
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzd	rCLZB, rTMP1
+	addi	rTMP2, rTMP4, -1
+	andc	rTMP2, rTMP2, rTMP4
+	cmpld	rTMP1, rTMP2
+	bgtlr
+	subfic	rCLZB, rCLZB, 64-7
+#else
+/* I think we could reduce this by two instructions by keeping the "nor"
+   results from the loop for reuse here.  See strlen.S tail.  Similarly
+   one instruction could be pruned from L(foundit).  */
 	and	rFEFE, r7F7F, rWORD
-	or	rMASK, r7F7F, rWORD
+	or	rTMP5, r7F7F, rWORD
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rFEFE, rFEFE, r7F7F
 	add	rTMP1, rTMP1, r7F7F
-	nor	rWORD, rMASK, rFEFE
-	nor	rTMP2, rIGN, rTMP1
+	nor	rWORD, rTMP5, rFEFE
+	nor	rTMP2, rTMP4, rTMP1
+	cntlzd	rCLZB, rTMP2
 	cmpld	rWORD, rTMP2
 	bgtlr
-	cntlzd	rCLZB, rTMP2
+#endif
 	srdi	rCLZB, rCLZB, 3
 	add	rRTN, rSTR, rCLZB
 	blr
 
 L(foundit):
+#ifdef __LITTLE_ENDIAN__
+	addi	rTMP1, rTMP5, -1
+	andc	rTMP1, rTMP1, rTMP5
+	cntlzd	rCLZB, rTMP1
+	subfic	rCLZB, rCLZB, 64-7-64
+	sradi	rCLZB, rCLZB, 3
+#else
 	and	rTMP1, r7F7F, rTMP3
-	or	rIGN, r7F7F, rTMP3
+	or	rTMP4, r7F7F, rTMP3
 	add	rTMP1, rTMP1, r7F7F
-	nor	rTMP2, rIGN, rTMP1
+	nor	rTMP2, rTMP4, rTMP1
 	cntlzd	rCLZB, rTMP2
 	subi	rSTR, rSTR, 8
 	srdi	rCLZB, rCLZB, 3
+#endif
 	add	rRTN, rSTR, rCLZB
 	blr
 END (strchr)
diff --git a/libc/sysdeps/powerpc/powerpc64/strcmp.S b/libc/sysdeps/powerpc/powerpc64/strcmp.S
index c9d6dac12..70854689d 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcmp.S
@@ -25,7 +25,7 @@
 EALIGN (strcmp, 4, 0)
 	CALL_MCOUNT 2
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -35,6 +35,7 @@ EALIGN (strcmp, 4, 0)
 #define r7F7F	r8	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r9	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r10	/* bits that differ in s1 & s2 words */
+#define rTMP	r11
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -58,19 +59,66 @@ L(g0):	ldu	rWORD1, 8(rSTR1)
 	ldu	rWORD2, 8(rSTR2)
 L(g1):	add	rTMP, rFEFE, rWORD1
 	nor	rNEG, r7F7F, rWORD1
-
 	and.	rTMP, rTMP, rNEG
 	cmpd	cr1, rWORD1, rWORD2
 	beq+	L(g0)
-L(endstring):
+
 /* OK. We've hit the end of the string. We need to be careful that
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
+L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzd	rBITDIF, rBITDIF
@@ -79,7 +127,7 @@ L(endstring):
 	cmpd	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
 	blt-	cr1, L(equal)
-	sradi	rRTN, rRTN, 63
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
 	ori	rRTN, rRTN, 1
 	blr
 L(equal):
@@ -95,11 +143,10 @@ L(different):
 	ori	rRTN, rRTN, 1
 	blr
 L(highbit):
-	srdi	rWORD2, rWORD2, 56
-	srdi	rWORD1, rWORD1, 56
-	sub	rRTN, rWORD1, rWORD2
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strcpy.S b/libc/sysdeps/powerpc/powerpc64/strcpy.S
index 4c6fd3f9d..a7fd85bad 100644
--- a/libc/sysdeps/powerpc/powerpc64/strcpy.S
+++ b/libc/sysdeps/powerpc/powerpc64/strcpy.S
@@ -68,6 +68,32 @@ L(g2):	add	rTMP, rFEFE, rWORD
 	mr	rALT, rWORD
 /* We've hit the end of the string.  Do the rest byte-by-byte.  */
 L(g1):
+#ifdef __LITTLE_ENDIAN__
+	extrdi.	rTMP, rALT, 8, 56
+	stb	rALT, 8(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 48
+	stb	rTMP, 9(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 40
+	stb	rTMP, 10(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 32
+	stb	rTMP, 11(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 24
+	stb	rTMP, 12(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 16
+	stb	rTMP, 13(rDEST)
+	beqlr-
+	extrdi.	rTMP, rALT, 8, 8
+	stb	rTMP, 14(rDEST)
+	beqlr-
+	extrdi	rTMP, rALT, 8, 0
+	stb	rTMP, 15(rDEST)
+	blr
+#else
 	extrdi.	rTMP, rALT, 8, 0
 	stb	rTMP, 8(rDEST)
 	beqlr-
@@ -91,6 +117,7 @@ L(g1):
 	beqlr-
 	stb	rALT, 15(rDEST)
 	blr
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte copy.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/powerpc64/strlen.S b/libc/sysdeps/powerpc/powerpc64/strlen.S
index 0f9b5eea9..4ed1ba3ad 100644
--- a/libc/sysdeps/powerpc/powerpc64/strlen.S
+++ b/libc/sysdeps/powerpc/powerpc64/strlen.S
@@ -29,7 +29,12 @@
       1 is subtracted you get a value in the range 0x00-0x7f, none of which
       have their high bit set. The expression here is
       (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when
-      there were no 0x00 bytes in the word.
+      there were no 0x00 bytes in the word.  You get 0x80 in bytes that
+      match, but possibly false 0x80 matches in the next more significant
+      byte to a true match due to carries.  For little-endian this is
+      of no consequence since the least significant match is the one
+      we're interested in, but big-endian needs method 2 to find which
+      byte matches.
 
    2) Given a word 'x', we can test to see _which_ byte was zero by
       calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f).
@@ -62,7 +67,7 @@
    Answer:
    1) Added a Data Cache Block Touch early to prefetch the first 128
    byte cache line. Adding dcbt instructions to the loop would not be
-   effective since most strings will be shorter than the cache line.*/
+   effective since most strings will be shorter than the cache line.  */
 
 /* Some notes on register usage: Under the SVR4 ABI, we can use registers
    0 and 3 through 12 (so long as we don't call any procedures) without
@@ -78,7 +83,7 @@
 ENTRY (strlen)
 	CALL_MCOUNT 1
 
-#define rTMP1	r0
+#define rTMP4	r0
 #define rRTN	r3	/* incoming STR arg, outgoing result */
 #define rSTR	r4	/* current string position */
 #define rPADN	r5	/* number of padding bits we prepend to the
@@ -88,9 +93,9 @@ ENTRY (strlen)
 #define rWORD1	r8	/* current string doubleword */
 #define rWORD2	r9	/* next string doubleword */
 #define rMASK	r9	/* mask for first string doubleword */
-#define rTMP2	r10
-#define rTMP3	r11
-#define rTMP4	r12
+#define rTMP1	r10
+#define rTMP2	r11
+#define rTMP3	r12
 
 	dcbt	0,rRTN
 	clrrdi	rSTR, rRTN, 3
@@ -100,30 +105,36 @@ ENTRY (strlen)
 	addi	r7F7F, r7F7F, 0x7f7f
 	li	rMASK, -1
 	insrdi	r7F7F, r7F7F, 32, 0
-/* That's the setup done, now do the first pair of doublewords.
-   We make an exception and use method (2) on the first two doublewords,
-   to reduce overhead.  */
+/* We use method (2) on the first two doublewords, because rFEFE isn't
+   required which reduces setup overhead.  Also gives a faster return
+   for small strings on big-endian due to needing to recalculate with
+   method (2) anyway.  */
+#ifdef __LITTLE_ENDIAN__
+	sld	rMASK, rMASK, rPADN
+#else
 	srd	rMASK, rMASK, rPADN
+#endif
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	lis	rFEFE, -0x101
 	add	rTMP1, rTMP1, r7F7F
 	addi	rFEFE, rFEFE, -0x101
-	nor	rTMP1, rTMP2, rTMP1
-	and.	rWORD1, rTMP1, rMASK
+	nor	rTMP3, rTMP2, rTMP1
+	and.	rTMP3, rTMP3, rMASK
 	mtcrf	0x01, rRTN
 	bne	L(done0)
-	sldi  rTMP1, rFEFE, 32
-	add  rFEFE, rFEFE, rTMP1
+	sldi	rTMP1, rFEFE, 32
+	add	rFEFE, rFEFE, rTMP1
 /* Are we now aligned to a doubleword boundary?  */
 	bt	28, L(loop)
 
 /* Handle second doubleword of pair.  */
+/* Perhaps use method (1) here for little-endian, saving one instruction?  */
 	ldu	rWORD1, 8(rSTR)
 	and	rTMP1, r7F7F, rWORD1
 	or	rTMP2, r7F7F, rWORD1
 	add	rTMP1, rTMP1, r7F7F
-	nor.	rWORD1, rTMP2, rTMP1
+	nor.	rTMP3, rTMP2, rTMP1
 	bne	L(done0)
 
 /* The loop.  */
@@ -137,28 +148,52 @@ L(loop):
 	add	rTMP3, rFEFE, rWORD2
 	nor	rTMP4, r7F7F, rWORD2
 	bne	L(done1)
-	and.	rTMP1, rTMP3, rTMP4
+	and.	rTMP3, rTMP3, rTMP4
 	beq	L(loop)
 
+#ifndef __LITTLE_ENDIAN__
 	and	rTMP1, r7F7F, rWORD2
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP4, rTMP1
+	andc	rTMP3, rTMP4, rTMP1
 	b	L(done0)
 
 L(done1):
 	and	rTMP1, r7F7F, rWORD1
 	subi	rSTR, rSTR, 8
 	add	rTMP1, rTMP1, r7F7F
-	andc	rWORD1, rTMP2, rTMP1
+	andc	rTMP3, rTMP2, rTMP1
 
 /* When we get to here, rSTR points to the first doubleword in the string that
-   contains a zero byte, and the most significant set bit in rWORD1 is in that
-   byte.  */
+   contains a zero byte, and rTMP3 has 0x80 for bytes that are zero, and 0x00
+   otherwise.  */
 L(done0):
-	cntlzd	rTMP3, rWORD1
+	cntlzd	rTMP3, rTMP3
 	subf	rTMP1, rRTN, rSTR
 	srdi	rTMP3, rTMP3, 3
 	add	rRTN, rTMP1, rTMP3
 	blr
+#else
+
+L(done0):
+	addi	rTMP1, rTMP3, -1	/* Form a mask from trailing zeros.  */
+	andc	rTMP1, rTMP1, rTMP3
+	cntlzd	rTMP1, rTMP1		/* Count bits not in the mask.  */
+	subf	rTMP3, rRTN, rSTR
+	subfic	rTMP1, rTMP1, 64-7
+	srdi	rTMP1, rTMP1, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+
+L(done1):
+	addi	rTMP3, rTMP1, -1
+	andc	rTMP3, rTMP3, rTMP1
+	cntlzd	rTMP3, rTMP3
+	subf	rTMP1, rRTN, rSTR
+	subfic	rTMP3, rTMP3, 64-7-64
+	sradi	rTMP3, rTMP3, 3
+	add	rRTN, rTMP1, rTMP3
+	blr
+#endif
+
 END (strlen)
 libc_hidden_builtin_def (strlen)
diff --git a/libc/sysdeps/powerpc/powerpc64/strncmp.S b/libc/sysdeps/powerpc/powerpc64/strncmp.S
index 779d9f7f6..8f842c4b2 100644
--- a/libc/sysdeps/powerpc/powerpc64/strncmp.S
+++ b/libc/sysdeps/powerpc/powerpc64/strncmp.S
@@ -25,7 +25,7 @@
 EALIGN (strncmp, 4, 0)
 	CALL_MCOUNT 3
 
-#define rTMP	r0
+#define rTMP2	r0
 #define rRTN	r3
 #define rSTR1	r3	/* first string arg */
 #define rSTR2	r4	/* second string arg */
@@ -36,6 +36,7 @@ EALIGN (strncmp, 4, 0)
 #define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
 #define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
 #define rBITDIF	r11	/* bits that differ in s1 & s2 words */
+#define rTMP	r12
 
 	dcbt	0,rSTR1
 	or	rTMP, rSTR2, rSTR1
@@ -77,12 +78,59 @@ L(g1):	add	rTMP, rFEFE, rWORD1
    we don't compare two strings as different because of gunk beyond
    the end of the strings...  */
 
+#ifdef __LITTLE_ENDIAN__
+L(endstring):
+	addi    rTMP2, rTMP, -1
+	beq	cr1, L(equal)
+	andc    rTMP2, rTMP2, rTMP
+	rldimi	rTMP2, rTMP2, 1, 0
+	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
+	and	rWORD1, rWORD1, rTMP2
+	cmpd	cr1, rWORD1, rWORD2
+	beq	cr1, L(equal)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
+	ori	rRTN, rRTN, 1
+	blr
+L(equal):
+	li	rRTN, 0
+	blr
+
+L(different):
+	ld	rWORD1, -8(rSTR1)
+	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
+	neg	rNEG, rBITDIF
+	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
+	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
+	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
+	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
+	sld	rWORD2, rWORD2, rNEG
+	xor.	rBITDIF, rWORD1, rWORD2
+	sub	rRTN, rWORD1, rWORD2
+	blt-	L(highbit)
+	sradi	rRTN, rRTN, 63
+	ori	rRTN, rRTN, 1
+	blr
+L(highbit):
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
+	blr
+
+#else
 L(endstring):
 	and	rTMP, r7F7F, rWORD1
 	beq	cr1, L(equal)
 	add	rTMP, rTMP, r7F7F
 	xor.	rBITDIF, rWORD1, rWORD2
-
 	andc	rNEG, rNEG, rTMP
 	blt-	L(highbit)
 	cntlzd	rBITDIF, rBITDIF
@@ -91,7 +139,7 @@ L(endstring):
 	cmpd	cr1, rNEG, rBITDIF
 	sub	rRTN, rWORD1, rWORD2
 	blt-	cr1, L(equal)
-	sradi	rRTN, rRTN, 63
+	sradi	rRTN, rRTN, 63		/* must return an int.  */
 	ori	rRTN, rRTN, 1
 	blr
 L(equal):
@@ -99,7 +147,7 @@ L(equal):
 	blr
 
 L(different):
-	ldu	rWORD1, -8(rSTR1)
+	ld	rWORD1, -8(rSTR1)
 	xor.	rBITDIF, rWORD1, rWORD2
 	sub	rRTN, rWORD1, rWORD2
 	blt-	L(highbit)
@@ -107,11 +155,10 @@ L(different):
 	ori	rRTN, rRTN, 1
 	blr
 L(highbit):
-	srdi	rWORD2, rWORD2, 56
-	srdi	rWORD1, rWORD1, 56
-	sub	rRTN, rWORD1, rWORD2
+	sradi	rRTN, rWORD2, 63
+	ori	rRTN, rRTN, 1
 	blr
-
+#endif
 
 /* Oh well.  In this case, we just do a byte-by-byte comparison.  */
 	.align 4
diff --git a/libc/sysdeps/powerpc/preconfigure b/libc/sysdeps/powerpc/preconfigure
new file mode 100644
index 000000000..1741c251f
--- /dev/null
+++ b/libc/sysdeps/powerpc/preconfigure
@@ -0,0 +1,11 @@
+# Check for e500.
+
+case "$machine" in
+powerpc)
+  $CC $CFLAGS $CPPFLAGS -E -dM -xc /dev/null > conftest.i
+  if grep -q __NO_FPRS__ conftest.i && ! grep -q _SOFT_FLOAT conftest.i; then
+    base_machine=powerpc machine=powerpc/powerpc32/e500
+  fi
+  rm -f conftest.i
+  ;;
+esac
diff --git a/libc/sysdeps/powerpc/soft-fp/sfp-machine.h b/libc/sysdeps/powerpc/soft-fp/sfp-machine.h
new file mode 100644
index 000000000..041187807
--- /dev/null
+++ b/libc/sysdeps/powerpc/soft-fp/sfp-machine.h
@@ -0,0 +1,115 @@
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)				\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)				\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)				\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_loop(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+/* Someone please check this.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#if defined __NO_FPRS__ && !defined _SOFT_FLOAT
+
+/* Exception flags.  We use the bit positions of the appropriate bits
+   in the FPEFSCR.  */
+
+# include <fenv_libc.h>
+# include <sysdep.h>
+# include <sys/prctl.h>
+
+int __feraiseexcept_soft (int);
+libc_hidden_proto (__feraiseexcept_soft)
+
+# define FP_EX_INEXACT         SPEFSCR_FINXS
+# define FP_EX_INVALID         SPEFSCR_FINVS
+# define FP_EX_DIVZERO         SPEFSCR_FDBZS
+# define FP_EX_UNDERFLOW       SPEFSCR_FUNFS
+# define FP_EX_OVERFLOW        SPEFSCR_FOVFS
+
+# define _FP_DECL_EX \
+  int _spefscr __attribute__ ((unused)), _ftrapex __attribute__ ((unused)) = 0
+# define FP_INIT_ROUNDMODE						\
+  do									\
+    {									\
+      int _r;								\
+      INTERNAL_SYSCALL_DECL (_err);					\
+									\
+      _spefscr = fegetenv_register ();					\
+      _r = INTERNAL_SYSCALL (prctl, _err, 2, PR_GET_FPEXC, &_ftrapex);	\
+      if (INTERNAL_SYSCALL_ERROR_P (_r, _err))				\
+	_ftrapex = 0;							\
+    }									\
+  while (0)
+# define FP_INIT_EXCEPTIONS /* Empty.  */
+
+# define FP_HANDLE_EXCEPTIONS  __feraiseexcept_soft (_fex)
+# define FP_ROUNDMODE          (_spefscr & 0x3)
+
+/* Not correct in general, but sufficient for the uses in soft-fp.  */
+# define FP_TRAPPING_EXCEPTIONS (_ftrapex & PR_FP_EXC_UND	\
+				 ? FP_EX_UNDERFLOW		\
+				 : 0)
+
+#else
+
+/* Exception flags.  We use the bit positions of the appropriate bits
+   in the FPSCR, which also correspond to the FE_* bits.  This makes
+   everything easier ;-).  */
+# define FP_EX_INVALID         (1 << (31 - 2))
+# define FP_EX_OVERFLOW        (1 << (31 - 3))
+# define FP_EX_UNDERFLOW       (1 << (31 - 4))
+# define FP_EX_DIVZERO         (1 << (31 - 5))
+# define FP_EX_INEXACT         (1 << (31 - 6))
+
+# define FP_HANDLE_EXCEPTIONS  __simulate_exceptions (_fex)
+# define FP_ROUNDMODE          __sim_round_mode
+# define FP_TRAPPING_EXCEPTIONS (~__sim_disabled_exceptions & 0x3e000000)
+
+#endif
+
+/* FIXME: these variables should be thread specific (see bugzilla bug
+   15483) and ideally preserved across signal handlers, like hardware
+   FP status words, but the latter is quite difficult to accomplish in
+   userland.  */
+
+extern int __sim_exceptions;
+libc_hidden_proto (__sim_exceptions);
+extern int __sim_disabled_exceptions;
+libc_hidden_proto (__sim_disabled_exceptions);
+extern int __sim_round_mode;
+libc_hidden_proto (__sim_round_mode);
+
+extern void __simulate_exceptions (int x) attribute_hidden;
diff --git a/libc/sysdeps/powerpc/sysdep.h b/libc/sysdeps/powerpc/sysdep.h
index 1b5334ad3..bc2cb6681 100644
--- a/libc/sysdeps/powerpc/sysdep.h
+++ b/libc/sysdeps/powerpc/sysdep.h
@@ -144,6 +144,21 @@
 
 #define VRSAVE	256
 
+/* The 32-bit words of a 64-bit dword are at these offsets in memory.  */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define LOWORD 0
+# define HIWORD 4
+#else
+# define LOWORD 4
+# define HIWORD 0
+#endif
+
+/* The high 16-bit word of a 64-bit dword is at this offset in memory.  */
+#if defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
+# define HISHORT 6
+#else
+# define HISHORT 0
+#endif
 
 /* This seems to always be the case on PPC.  */
 #define ALIGNARG(log2) log2
diff --git a/libc/sysdeps/s390/ffs.c b/libc/sysdeps/s390/ffs.c
index 807441da6..2dbb7430e 100644
--- a/libc/sysdeps/s390/ffs.c
+++ b/libc/sysdeps/s390/ffs.c
@@ -63,6 +63,7 @@ __ffs (x)
 }
 
 weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
 libc_hidden_builtin_def (ffs)
 #if ULONG_MAX == UINT_MAX
 #undef ffsl
diff --git a/libc/sysdeps/s390/s390-32/stackguard-macros.h b/libc/sysdeps/s390/s390-32/stackguard-macros.h
index b74c5799b..449e8d488 100644
--- a/libc/sysdeps/s390/s390-32/stackguard-macros.h
+++ b/libc/sysdeps/s390/s390-32/stackguard-macros.h
@@ -2,3 +2,14 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("ear %0,%%a0; l %0,0x14(%0)" : "=a" (x)); x; })
+
+/* On s390/s390x there is no unique pointer guard, instead we use the
+   same value as the stack guard.  */
+#define POINTER_CHK_GUARD \
+  ({							\
+     uintptr_t x;					\
+     asm ("ear %0,%%a0; l %0,%1(%0)"			\
+	  : "=a" (x)					\
+	  : "i" (offsetof (tcbhead_t, stack_guard)));	\
+     x;							\
+   })
diff --git a/libc/sysdeps/s390/s390-64/stackguard-macros.h b/libc/sysdeps/s390/s390-64/stackguard-macros.h
index 0cebb5f02..c8270fbe7 100644
--- a/libc/sysdeps/s390/s390-64/stackguard-macros.h
+++ b/libc/sysdeps/s390/s390-64/stackguard-macros.h
@@ -2,3 +2,17 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("ear %0,%%a0; sllg %0,%0,32; ear %0,%%a1; lg %0,0x28(%0)" : "=a" (x)); x; })
+
+/* On s390/s390x there is no unique pointer guard, instead we use the
+   same value as the stack guard.  */
+#define POINTER_CHK_GUARD \
+  ({							\
+     uintptr_t x;					\
+     asm ("ear %0,%%a0;"				\
+	  "sllg %0,%0,32;"				\
+	  "ear %0,%%a1;"				\
+	  "lg %0,%1(%0)"				\
+	 : "=a" (x)					\
+	 : "i" (offsetof (tcbhead_t, stack_guard)));	\
+     x;							\
+   })
diff --git a/libc/sysdeps/sh/stackguard-macros.h b/libc/sysdeps/sh/stackguard-macros.h
new file mode 100644
index 000000000..55a5771b6
--- /dev/null
+++ b/libc/sysdeps/sh/stackguard-macros.h
@@ -0,0 +1,6 @@
+#include <stdint.h>
+
+extern uintptr_t __stack_chk_guard;
+#define STACK_CHK_GUARD __stack_chk_guard
+
+#define POINTER_CHK_GUARD THREAD_GET_POINTER_GUARD()
diff --git a/libc/sysdeps/sparc/fpu/libm-test-ulps b/libc/sysdeps/sparc/fpu/libm-test-ulps
index cfc72a661..ecb871d6a 100644
--- a/libc/sysdeps/sparc/fpu/libm-test-ulps
+++ b/libc/sysdeps/sparc/fpu/libm-test-ulps
@@ -7505,6 +7505,34 @@ idouble: 1
 Test "gamma (-0.5)":
 ildouble: 1
 ldouble: 1
+Test "gamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "gamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "gamma (-0x1p-25)":
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-30)":
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-40)":
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "gamma (-0x1p-64)":
+ildouble: 1
+ldouble: 1
 Test "gamma (0.7)":
 double: 1
 float: 1
@@ -7512,6 +7540,25 @@ idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1
+Test "gamma (0x1p-10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-30)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-5)":
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-60)":
+ildouble: 1
+ldouble: 1
+Test "gamma (0x1p-70)":
+ildouble: 1
+ldouble: 1
 Test "gamma (1.2)":
 double: 1
 float: 2
@@ -7725,6 +7772,17 @@ double: 2
 float: 2
 idouble: 2
 ifloat: 2
+Test "jn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p127)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "jn (2, 0x1p16383)":
+ildouble: 2
+ldouble: 2
 Test "jn (2, 2.4048255576957729)":
 double: 2
 float: 1
@@ -7802,6 +7860,34 @@ ldouble: 3
 Test "lgamma (-0.5)":
 ildouble: 1
 ldouble: 1
+Test "lgamma (-0x1p-10)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-15)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+Test "lgamma (-0x1p-20)":
+double: 1
+idouble: 1
+Test "lgamma (-0x1p-25)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-30)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-40)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-5)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "lgamma (-0x1p-64)":
+ildouble: 1
+ldouble: 1
 Test "lgamma (0.7)":
 double: 1
 float: 1
@@ -7809,6 +7895,25 @@ idouble: 1
 ifloat: 1
 ildouble: 1
 ldouble: 1
+Test "lgamma (0x1p-10)":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-30)":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-5)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-60)":
+ildouble: 1
+ldouble: 1
+Test "lgamma (0x1p-70)":
+ildouble: 1
+ldouble: 1
 Test "lgamma (1.2)":
 double: 1
 float: 2
@@ -9239,7 +9344,9 @@ ildouble: 5
 ldouble: 5
 Test "yn (10, 1.0)":
 double: 1
+float: 2
 idouble: 1
+ifloat: 2
 ildouble: 1
 ldouble: 1
 Test "yn (10, 10.0)":
@@ -9251,7 +9358,25 @@ ildouble: 2
 ldouble: 2
 Test "yn (10, 2.0)":
 double: 2
+float: 1
 idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+Test "yn (2, 0x1.ffff62p+99)":
+double: 1
+idouble: 1
+Test "yn (2, 0x1p1023)":
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p127)":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+Test "yn (2, 0x1p16383)":
 ildouble: 2
 ldouble: 2
 Test "yn (3, 0.125)":
diff --git a/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c b/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c
index 551c40887..5e2449c91 100644
--- a/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c
+++ b/libc/sysdeps/sparc/sparc32/soft-fp/q_neg.c
@@ -24,7 +24,6 @@
 
 long double _Q_neg(const long double a)
 {
-  FP_DECL_EX;
   long double c = a;
 
 #if (__BYTE_ORDER == __BIG_ENDIAN)
@@ -36,11 +35,9 @@ long double _Q_neg(const long double a)
 #else
   FP_DECL_Q(A); FP_DECL_Q(C);
 
-  FP_UNPACK_Q(A, a);
+  FP_UNPACK_RAW_Q(A, a);
   FP_NEG_Q(C, A);
-  FP_PACK_Q(c, C);
+  FP_PACK_RAW_Q(c, C);
 #endif
-  FP_CLEAR_EXCEPTIONS;
-  FP_HANDLE_EXCEPTIONS;
   return c;
 }
diff --git a/libc/sysdeps/sparc/sparc32/stackguard-macros.h b/libc/sysdeps/sparc/sparc32/stackguard-macros.h
index c0b02b0bb..1eef0f19f 100644
--- a/libc/sysdeps/sparc/sparc32/stackguard-macros.h
+++ b/libc/sysdeps/sparc/sparc32/stackguard-macros.h
@@ -2,3 +2,6 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("ld [%%g7+0x14], %0" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+  ({ uintptr_t x; asm ("ld [%%g7+0x18], %0" : "=r" (x)); x; })
diff --git a/libc/sysdeps/sparc/sparc64/stackguard-macros.h b/libc/sysdeps/sparc/sparc64/stackguard-macros.h
index 80f063558..cc0c12c04 100644
--- a/libc/sysdeps/sparc/sparc64/stackguard-macros.h
+++ b/libc/sysdeps/sparc/sparc64/stackguard-macros.h
@@ -2,3 +2,6 @@
 
 #define STACK_CHK_GUARD \
   ({ uintptr_t x; asm ("ldx [%%g7+0x28], %0" : "=r" (x)); x; })
+
+#define POINTER_CHK_GUARD \
+  ({ uintptr_t x; asm ("ldx [%%g7+0x30], %0" : "=r" (x)); x; })
diff --git a/libc/sysdeps/unix/Makefile b/libc/sysdeps/unix/Makefile
index 375561f0d..2b607a007 100644
--- a/libc/sysdeps/unix/Makefile
+++ b/libc/sysdeps/unix/Makefile
@@ -51,12 +51,14 @@ $(objpfx)stub-syscalls.c: $(common-objpfx)sysd-syscalls \
 	 for call in $(unix-stub-syscalls); do \
 	   case $$call in \
 	   *@@*) \
-	     ver=$${call##*@}; call=$${call%%@*}; ver=$${ver//./_}; \
+	     ver=$${call##*@}; call=$${call%%@*}; \
+	     ver=`echo "$ver" | sed 's/\./_/g'`; \
 	     echo "strong_alias (_no_syscall, __$${call}_$${ver})"; \
 	     echo "versioned_symbol (libc, __$${call}_$${ver}, $$call, $$ver);"\
 	     ;; \
 	   *@*) \
-	     ver=$${call##*@}; call=$${call%%@*}; ver=$${ver//./_}; \
+	     ver=$${call##*@}; call=$${call%%@*}; \
+	     ver=`echo "$ver" | sed 's/\./_/g'`; \
 	     echo "strong_alias (_no_syscall, __$${call}_$${ver})"; \
 	     echo "compat_symbol (libc, __$${call}_$${ver}, $$call, $$ver);" \
 	     ;; \
diff --git a/libc/sysdeps/unix/make-syscalls.sh b/libc/sysdeps/unix/make-syscalls.sh
index f04f2abb3..6eba62c94 100644
--- a/libc/sysdeps/unix/make-syscalls.sh
+++ b/libc/sysdeps/unix/make-syscalls.sh
@@ -275,7 +275,7 @@ while read file srcfile caller syscall args strong weak; do
     # name in the vDSO and KERNEL_X.Y is its symbol version.
     vdso_symbol="${vdso_syscall%@*}"
     vdso_symver="${vdso_syscall#*@}"
-    vdso_symver="${vdso_symver//./_}"
+    vdso_symver=`echo "$vdso_symver" | sed 's/\./_/g'`
     echo "\
 \$(foreach p,\$(sysd-rules-targets),\$(objpfx)\$(patsubst %,\$p,$file).os): \\
 		\$(..)sysdeps/unix/make-syscalls.sh\
diff --git a/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
index b5929bd29..9b0421ee0 100644
--- a/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
+++ b/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h
@@ -96,6 +96,9 @@
 #ifndef __O_DSYNC
 # define __O_DSYNC	 010000
 #endif
+#ifndef __O_TMPFILE
+# define __O_TMPFILE   020200000
+#endif
 
 #ifndef F_GETLK
 # ifndef __USE_FILE_OFFSET64
@@ -128,6 +131,7 @@
 # define O_DIRECT	__O_DIRECT	/* Direct disk access.	*/
 # define O_NOATIME	__O_NOATIME	/* Do not set atime.  */
 # define O_PATH		__O_PATH	/* Resolve pathname but do not open file.  */
+# define O_TMPFILE	__O_TMPFILE	/* Atomically create nameless file.  */
 #endif
 
 /* For now, Linux has no separate synchronicitiy options for read
diff --git a/libc/sysdeps/unix/sysv/linux/configure b/libc/sysdeps/unix/sysv/linux/configure
index 76cfea1dc..88fab5662 100644
--- a/libc/sysdeps/unix/sysv/linux/configure
+++ b/libc/sysdeps/unix/sysv/linux/configure
@@ -187,7 +187,7 @@ case "$machine" in
     libc_cv_gcc_unwind_find_fde=yes
     arch_minimum_kernel=2.6.16
     ;;
-  powerpc/powerpc32)
+  powerpc/powerpc32*)
     libc_cv_gcc_unwind_find_fde=yes
     arch_minimum_kernel=2.6.16
     ;;
diff --git a/libc/sysdeps/unix/sysv/linux/configure.in b/libc/sysdeps/unix/sysv/linux/configure.in
index e55d9fdb0..5e5902d8e 100644
--- a/libc/sysdeps/unix/sysv/linux/configure.in
+++ b/libc/sysdeps/unix/sysv/linux/configure.in
@@ -43,7 +43,7 @@ case "$machine" in
     libc_cv_gcc_unwind_find_fde=yes
     arch_minimum_kernel=2.6.16
     ;;
-  powerpc/powerpc32)
+  powerpc/powerpc32*)
     libc_cv_gcc_unwind_find_fde=yes
     arch_minimum_kernel=2.6.16
     ;;
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/sigstack.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/sigstack.h
new file mode 100644
index 000000000..33be9e8db
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/sigstack.h
@@ -0,0 +1,54 @@
+/* sigstack, sigaltstack definitions.
+   Copyright (C) 1998-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SIGNAL_H
+# error "Never include this file directly.  Use <signal.h> instead"
+#endif
+
+
+/* Structure describing a signal stack (obsolete).  */
+struct sigstack
+  {
+    void *ss_sp;		/* Signal stack pointer.  */
+    int ss_onstack;		/* Nonzero if executing on this stack.  */
+  };
+
+
+/* Possible values for `ss_flags.'.  */
+enum
+{
+  SS_ONSTACK = 1,
+#define SS_ONSTACK	SS_ONSTACK
+  SS_DISABLE
+#define SS_DISABLE	SS_DISABLE
+};
+
+/* Minimum stack size for a signal handler.  */
+#define MINSIGSTKSZ	4096
+
+/* System default stack size.  */
+#define SIGSTKSZ	16384
+
+
+/* Alternate, preferred interface.  */
+typedef struct sigaltstack
+  {
+    void *ss_sp;
+    int ss_flags;
+    size_t ss_size;
+  } stack_t;
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies
new file mode 100644
index 000000000..70c0d2eda
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/405/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/405/fpu
+powerpc/powerpc32/405
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies
new file mode 100644
index 000000000..c3e52c550
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/440/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/440/fpu
+powerpc/powerpc32/440
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies
new file mode 100644
index 000000000..2829f9cca
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/464/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/464/fpu
+powerpc/powerpc32/464
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies
new file mode 100644
index 000000000..80f917079
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/476/Implies
@@ -0,0 +1,2 @@
+powerpc/powerpc32/476/fpu
+powerpc/powerpc32/476
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500/nofpu/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500/nofpu/Implies
new file mode 100644
index 000000000..00365c1cf
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/e500/nofpu/Implies
@@ -0,0 +1,3 @@
+powerpc/powerpc32/e500/nofpu
+powerpc/nofpu
+powerpc/soft-fp
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
index 926f34207..082d302e8 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext-common.S
@@ -151,15 +151,15 @@ ENTRY(__CONTEXT_FUNC_NAME)
 #   ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
 	mtlr    r8
-	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
 #   else
 	lwz     r7,_dl_hwcap@got(r7)
 	mtlr    r8
-	lwz     r7,4(r7)
+	lwz     r7,LOWORD(r7)
 #   endif
 #  else
-	lis	r7,(_dl_hwcap+4)@ha
-	lwz     r7,(_dl_hwcap+4)@l(r7)
+	lis	r7,(_dl_hwcap+LOWORD)@ha
+	lwz     r7,(_dl_hwcap+LOWORD)@l(r7)
 #  endif
 	andis.	r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 
@@ -262,8 +262,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
 # endif
 #endif
 
-#ifdef __GETCONTEXT_EXTRA
-	__GETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+	getcontext_e500
 #endif
 
 /* We need to set up parms and call sigprocmask which will clobber
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S
index 95902b13f..70e3c9762 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/makecontext.S
@@ -47,7 +47,9 @@ ENTRY(__makecontext)
 #ifdef PIC
 	mflr	r0
 	cfi_register(lr,r0)
-	bl	1f
+	/* Use this conditional form of branch and link to avoid destroying
+	   the cpu link stack used to predict blr return addresses.  */
+	bcl	20,31,1f
 1:	mflr	r6
 	addi	r6,r6,L(exitcode)-1b
 	mtlr	r0
@@ -136,7 +138,9 @@ ENTRY(__novec_makecontext)
 #ifdef PIC
 	mflr	r0
 	cfi_register(lr,r0)
-	bl	1f
+	/* Use this conditional form of branch and link to avoid destroying
+	   the cpu link stack used to predict blr return addresses.  */
+	bcl	20,31,1f
 1:	mflr	r6
 	addi	r6,r6,L(novec_exitcode)-1b
 	mtlr	r0
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/Implies
new file mode 100644
index 000000000..40836b6fb
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/Implies
@@ -0,0 +1,2 @@
+powerpc/nofpu
+powerpc/soft-fp
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/context-e500.h b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/context-e500.h
new file mode 100644
index 000000000..9eb1a9561
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/context-e500.h
@@ -0,0 +1,144 @@
+/* getcontext/setcontext/makecontext support for e500 high parts of registers.
+   Copyright (C) 2006-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _CONTEXT_E500_H
+#define _CONTEXT_E500_H 1
+
+#if defined __SPE__ || (defined __NO_FPRS__ && !defined _SOFT_FLOAT)
+
+# define __CONTEXT_ENABLE_E500 1
+
+/* We follow the kernel's layout, which saves the high parts of the
+   SPE registers in the vregs area, immediately followed by the ACC
+   value (call-clobbered, not handled here) and the SPEFSCR value.  */
+
+.macro getcontext_e500
+	la	r10,(_UC_VREGS)(r3)
+	evstwwe	r0,(0*4)(r10)
+	evstwwe	r1,(1*4)(r10)
+	evstwwe	r2,(2*4)(r10)
+	evstwwe	r3,(3*4)(r10)
+	evstwwe	r4,(4*4)(r10)
+	evstwwe	r5,(5*4)(r10)
+	evstwwe	r6,(6*4)(r10)
+	evstwwe	r7,(7*4)(r10)
+	evstwwe	r8,(8*4)(r10)
+	evstwwe	r9,(9*4)(r10)
+	evstwwe	r10,(10*4)(r10)
+	evstwwe	r11,(11*4)(r10)
+	evstwwe	r12,(12*4)(r10)
+	evstwwe	r13,(13*4)(r10)
+	evstwwe	r14,(14*4)(r10)
+	evstwwe	r15,(15*4)(r10)
+	evstwwe	r16,(16*4)(r10)
+	evstwwe	r17,(17*4)(r10)
+	evstwwe	r18,(18*4)(r10)
+	evstwwe	r19,(19*4)(r10)
+	evstwwe	r20,(20*4)(r10)
+	evstwwe	r21,(21*4)(r10)
+	evstwwe	r22,(22*4)(r10)
+	evstwwe	r23,(23*4)(r10)
+	evstwwe	r24,(24*4)(r10)
+	evstwwe	r25,(25*4)(r10)
+	evstwwe	r26,(26*4)(r10)
+	evstwwe	r27,(27*4)(r10)
+	evstwwe	r28,(28*4)(r10)
+	evstwwe	r29,(29*4)(r10)
+	evstwwe	r30,(30*4)(r10)
+	evstwwe	r31,(31*4)(r10)
+	mfspefscr	r9
+	stw	r9,(34*4)(r10)
+.endm
+
+.macro setcontext_e500
+	lwz	r3,_UC_VREGS+(0*4)(r31)
+	evmergelo	r0,r3,r0
+	lwz	r3,_UC_VREGS+(1*4)(r31)
+	evmergelo	r1,r3,r1
+	lwz	r3,_UC_VREGS+(2*4)(r31)
+	evmergelo	r2,r3,r2
+	lwz	r3,_UC_VREGS+(1*4)(r31)
+	evmergelo	r1,r3,r1
+	lwz	r3,_UC_VREGS+(2*4)(r31)
+	evmergelo	r2,r3,r2
+	lwz	r3,_UC_VREGS+(3*4)(r31)
+	evmergelo	r3,r3,r3
+	lwz	r3,_UC_VREGS+(4*4)(r31)
+	evmergelo	r4,r3,r4
+	lwz	r3,_UC_VREGS+(5*4)(r31)
+	evmergelo	r5,r3,r5
+	lwz	r3,_UC_VREGS+(6*4)(r31)
+	evmergelo	r6,r3,r6
+	lwz	r3,_UC_VREGS+(7*4)(r31)
+	evmergelo	r7,r3,r7
+	lwz	r3,_UC_VREGS+(8*4)(r31)
+	evmergelo	r8,r3,r8
+	lwz	r3,_UC_VREGS+(9*4)(r31)
+	evmergelo	r9,r3,r9
+	lwz	r3,_UC_VREGS+(10*4)(r31)
+	evmergelo	r10,r3,r10
+	lwz	r3,_UC_VREGS+(11*4)(r31)
+	evmergelo	r11,r3,r11
+	lwz	r3,_UC_VREGS+(12*4)(r31)
+	evmergelo	r12,r3,r12
+	lwz	r3,_UC_VREGS+(13*4)(r31)
+	evmergelo	r13,r3,r13
+	lwz	r3,_UC_VREGS+(14*4)(r31)
+	evmergelo	r14,r3,r14
+	lwz	r3,_UC_VREGS+(15*4)(r31)
+	evmergelo	r15,r3,r15
+	lwz	r3,_UC_VREGS+(16*4)(r31)
+	evmergelo	r16,r3,r16
+	lwz	r3,_UC_VREGS+(17*4)(r31)
+	evmergelo	r17,r3,r17
+	lwz	r3,_UC_VREGS+(18*4)(r31)
+	evmergelo	r18,r3,r18
+	lwz	r3,_UC_VREGS+(19*4)(r31)
+	evmergelo	r19,r3,r19
+	lwz	r3,_UC_VREGS+(20*4)(r31)
+	evmergelo	r20,r3,r20
+	lwz	r3,_UC_VREGS+(21*4)(r31)
+	evmergelo	r21,r3,r21
+	lwz	r3,_UC_VREGS+(22*4)(r31)
+	evmergelo	r22,r3,r22
+	lwz	r3,_UC_VREGS+(23*4)(r31)
+	evmergelo	r23,r3,r23
+	lwz	r3,_UC_VREGS+(24*4)(r31)
+	evmergelo	r24,r3,r24
+	lwz	r3,_UC_VREGS+(25*4)(r31)
+	evmergelo	r25,r3,r25
+	lwz	r3,_UC_VREGS+(26*4)(r31)
+	evmergelo	r26,r3,r26
+	lwz	r3,_UC_VREGS+(27*4)(r31)
+	evmergelo	r27,r3,r27
+	lwz	r3,_UC_VREGS+(28*4)(r31)
+	evmergelo	r28,r3,r28
+	lwz	r3,_UC_VREGS+(29*4)(r31)
+	evmergelo	r29,r3,r29
+	lwz	r3,_UC_VREGS+(30*4)(r31)
+	evmergelo	r30,r3,r30
+	lwz	r3,_UC_VREGS+(31*4)(r31)
+	evmergelo	r31,r3,r31
+	lwz	r3,_UC_VREGS+(34*4)(r31)
+	mtspefscr	r3
+.endm
+#else
+# undef __CONTEXT_ENABLE_E500
+#endif
+
+#endif /* context-e500.h */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/getcontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/getcontext.S
new file mode 100644
index 000000000..8bc3c7a43
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/getcontext.S
@@ -0,0 +1,60 @@
+/* Save current context.
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+#include <shlib-compat.h>
+
+#define __ASSEMBLY__
+#include <asm/ptrace.h>
+#include "ucontext_i.h"
+
+#include <context-e500.h>
+
+#define __CONTEXT_FUNC_NAME __getcontext
+#undef __CONTEXT_ENABLE_FPRS
+#undef __CONTEXT_ENABLE_VRS
+
+#include "getcontext-common.S"
+
+versioned_symbol (libc, __getcontext, getcontext, GLIBC_2_3_4)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
+
+/* For the nofpu case the old/new versions are the same function.  */
+strong_alias (__getcontext, __novec_getcontext)
+
+compat_symbol (libc, __novec_getcontext, getcontext, GLIBC_2_3_3)
+
+#endif
+
+#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_3_3)
+
+#define _ERRNO_H	1
+#include <bits/errno.h>
+
+	compat_text_section
+ENTRY (__getcontext_stub)
+	li	r3,ENOSYS
+	b	__syscall_error@local
+END (__getcontext_stub)
+	.previous
+
+compat_symbol (libc, __getcontext_stub, getcontext, GLIBC_2_1)
+
+#endif
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data
new file mode 100644
index 000000000..fde53bf33
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/c++-types.data
@@ -0,0 +1,67 @@
+blkcnt64_t:x
+blkcnt_t:l
+blksize_t:l
+caddr_t:Pc
+clockid_t:i
+clock_t:l
+daddr_t:i
+dev_t:y
+fd_mask:l
+fsblkcnt64_t:y
+fsblkcnt_t:m
+fsfilcnt64_t:y
+fsfilcnt_t:m
+fsid_t:8__fsid_t
+gid_t:j
+id_t:j
+ino64_t:y
+ino_t:m
+int16_t:s
+int32_t:i
+int64_t:x
+int8_t:a
+intptr_t:i
+key_t:i
+loff_t:x
+mode_t:j
+nlink_t:j
+off64_t:x
+off_t:l
+pid_t:i
+pthread_attr_t:14pthread_attr_t
+pthread_barrier_t:17pthread_barrier_t
+pthread_barrierattr_t:21pthread_barrierattr_t
+pthread_cond_t:14pthread_cond_t
+pthread_condattr_t:18pthread_condattr_t
+pthread_key_t:j
+pthread_mutex_t:15pthread_mutex_t
+pthread_mutexattr_t:19pthread_mutexattr_t
+pthread_once_t:i
+pthread_rwlock_t:16pthread_rwlock_t
+pthread_rwlockattr_t:20pthread_rwlockattr_t
+pthread_spinlock_t:i
+pthread_t:m
+quad_t:x
+register_t:i
+rlim64_t:y
+rlim_t:m
+sigset_t:10__sigset_t
+size_t:j
+socklen_t:j
+ssize_t:i
+suseconds_t:l
+time_t:l
+u_char:h
+uid_t:j
+uint:j
+u_int:j
+u_int16_t:t
+u_int32_t:j
+u_int64_t:y
+u_int8_t:h
+ulong:m
+u_long:m
+u_quad_t:y
+useconds_t:j
+ushort:t
+u_short:t
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/ld.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/ld.abilist
new file mode 100644
index 000000000..d71611f02
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/ld.abilist
@@ -0,0 +1,17 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __libc_memalign F
+ _r_debug D 0x14
+ calloc F
+ free F
+ malloc F
+ realloc F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __libc_stack_end D 0x4
+ _dl_mcount F
+GLIBC_2.3
+ GLIBC_2.3 A
+ __tls_get_addr F
+GLIBC_2.4
+ GLIBC_2.4 A
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libBrokenLocale.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libBrokenLocale.abilist
new file mode 100644
index 000000000..f4ca37f44
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libBrokenLocale.abilist
@@ -0,0 +1,3 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __ctype_get_mb_cur_max F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libanl.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libanl.abilist
new file mode 100644
index 000000000..c9755d8a3
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libanl.abilist
@@ -0,0 +1,6 @@
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ gai_cancel F
+ gai_error F
+ gai_suspend F
+ getaddrinfo_a F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist
new file mode 100644
index 000000000..9b6d66374
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libc.abilist
@@ -0,0 +1,2523 @@
+GCC_3.0
+ GCC_3.0 A
+ _Unwind_Find_FDE F
+ __deregister_frame_info_bases F
+ __register_frame_info_bases F
+ __register_frame_info_table_bases F
+GLIBC_2.0
+ GLIBC_2.0 A
+ _IO_adjust_column F
+ _IO_default_doallocate F
+ _IO_default_finish F
+ _IO_default_pbackfail F
+ _IO_default_uflow F
+ _IO_default_xsgetn F
+ _IO_default_xsputn F
+ _IO_do_write F
+ _IO_doallocbuf F
+ _IO_fclose F
+ _IO_fdopen F
+ _IO_feof F
+ _IO_ferror F
+ _IO_fflush F
+ _IO_fgetpos F
+ _IO_fgets F
+ _IO_file_attach F
+ _IO_file_close F
+ _IO_file_close_it F
+ _IO_file_doallocate F
+ _IO_file_fopen F
+ _IO_file_init F
+ _IO_file_jumps D 0x54
+ _IO_file_open F
+ _IO_file_overflow F
+ _IO_file_read F
+ _IO_file_seek F
+ _IO_file_seekoff F
+ _IO_file_setbuf F
+ _IO_file_stat F
+ _IO_file_sync F
+ _IO_file_underflow F
+ _IO_file_write F
+ _IO_file_xsputn F
+ _IO_flockfile F
+ _IO_flush_all F
+ _IO_flush_all_linebuffered F
+ _IO_fopen F
+ _IO_fprintf F
+ _IO_fputs F
+ _IO_fread F
+ _IO_free_backup_area F
+ _IO_fsetpos F
+ _IO_ftell F
+ _IO_ftrylockfile F
+ _IO_funlockfile F
+ _IO_fwrite F
+ _IO_getc F
+ _IO_getline F
+ _IO_gets F
+ _IO_init F
+ _IO_init_marker F
+ _IO_link_in F
+ _IO_list_all D 0x4
+ _IO_marker_delta F
+ _IO_marker_difference F
+ _IO_padn F
+ _IO_peekc_locked F
+ _IO_popen F
+ _IO_printf F
+ _IO_proc_close F
+ _IO_proc_open F
+ _IO_putc F
+ _IO_puts F
+ _IO_remove_marker F
+ _IO_seekmark F
+ _IO_seekoff F
+ _IO_seekpos F
+ _IO_setb F
+ _IO_setbuffer F
+ _IO_setvbuf F
+ _IO_sgetn F
+ _IO_sprintf F
+ _IO_sputbackc F
+ _IO_sscanf F
+ _IO_stderr_ D 0x50
+ _IO_stdin_ D 0x50
+ _IO_stdout_ D 0x50
+ _IO_str_init_readonly F
+ _IO_str_init_static F
+ _IO_str_overflow F
+ _IO_str_pbackfail F
+ _IO_str_seekoff F
+ _IO_str_underflow F
+ _IO_sungetc F
+ _IO_switch_to_get_mode F
+ _IO_un_link F
+ _IO_ungetc F
+ _IO_unsave_markers F
+ _IO_vfprintf F
+ _IO_vfscanf F
+ _IO_vsprintf F
+ __adjtimex F
+ __after_morecore_hook D 0x4
+ __argz_count F
+ __argz_next F
+ __argz_stringify F
+ __ashldi3 F
+ __ashrdi3 F
+ __assert_fail F
+ __assert_perror_fail F
+ __bsd_getpgrp F
+ __bzero F
+ __check_rhosts_file D 0x4
+ __clone F
+ __close F
+ __cmpdi2 F
+ __cmsg_nxthdr F
+ __connect F
+ __ctype32_b D 0x4
+ __ctype_b D 0x4
+ __ctype_get_mb_cur_max F
+ __ctype_tolower D 0x4
+ __ctype_toupper D 0x4
+ __curbrk D 0x4
+ __daylight D 0x4
+ __dcgettext F
+ __default_morecore F
+ __deregister_frame F
+ __deregister_frame_info F
+ __dgettext F
+ __divdi3 F
+ __dup2 F
+ __environ D 0x4
+ __errno_location F
+ __fcntl F
+ __ffs F
+ __finite F
+ __finitef F
+ __finitel F
+ __fixdfdi F
+ __fixsfdi F
+ __fixunsdfdi F
+ __fixunssfdi F
+ __floatdidf F
+ __floatdisf F
+ __fork F
+ __fpu_control D 0x4
+ __frame_state_for F
+ __free_hook D 0x4
+ __fxstat F
+ __getdelim F
+ __getpagesize F
+ __getpgid F
+ __getpid F
+ __gettimeofday F
+ __gmtime_r F
+ __h_errno_location F
+ __isinf F
+ __isinff F
+ __isinfl F
+ __isnan F
+ __isnanf F
+ __isnanl F
+ __iswctype F
+ __ivaliduser F
+ __libc_calloc F
+ __libc_free F
+ __libc_init_first F
+ __libc_mallinfo F
+ __libc_malloc F
+ __libc_mallopt F
+ __libc_memalign F
+ __libc_pvalloc F
+ __libc_realloc F
+ __libc_start_main F
+ __libc_valloc F
+ __lseek F
+ __lshrdi3 F
+ __lxstat F
+ __malloc_hook D 0x4
+ __malloc_initialize_hook D 0x4
+ __mbrlen F
+ __mbrtowc F
+ __memalign_hook D 0x4
+ __mempcpy F
+ __moddi3 F
+ __monstartup F
+ __morecore D 0x4
+ __nss_configure_lookup F
+ __nss_database_lookup F
+ __nss_group_lookup F
+ __nss_hosts_lookup F
+ __nss_next F
+ __nss_passwd_lookup F
+ __open F
+ __overflow F
+ __pipe F
+ __printf_fp F
+ __profile_frequency F
+ __progname D 0x4
+ __progname_full D 0x4
+ __rcmd_errstr D 0x4
+ __read F
+ __realloc_hook D 0x4
+ __register_frame F
+ __register_frame_info F
+ __register_frame_info_table F
+ __register_frame_table F
+ __res_randomid F
+ __sbrk F
+ __sched_get_priority_max F
+ __sched_get_priority_min F
+ __sched_getparam F
+ __sched_getscheduler F
+ __sched_setscheduler F
+ __sched_yield F
+ __secure_getenv F
+ __select F
+ __send F
+ __setpgid F
+ __sigaction F
+ __sigaddset F
+ __sigdelset F
+ __sigismember F
+ __sigpause F
+ __sigsetjmp F
+ __stpcpy F
+ __stpncpy F
+ __strcasecmp F
+ __strdup F
+ __strerror_r F
+ __strtod_internal F
+ __strtof_internal F
+ __strtok_r F
+ __strtol_internal F
+ __strtold_internal F
+ __strtoll_internal F
+ __strtoq_internal F
+ __strtoul_internal F
+ __strtoull_internal F
+ __strtouq_internal F
+ __sysv_signal F
+ __timezone D 0x4
+ __tzname D 0x8
+ __ucmpdi2 F
+ __udivdi3 F
+ __uflow F
+ __umoddi3 F
+ __underflow F
+ __vfscanf F
+ __vsnprintf F
+ __vsscanf F
+ __wait F
+ __waitpid F
+ __wcstod_internal F
+ __wcstof_internal F
+ __wcstol_internal F
+ __wcstold_internal F
+ __wcstoll_internal F
+ __wcstoul_internal F
+ __wcstoull_internal F
+ __write F
+ __xmknod F
+ __xpg_basename F
+ __xstat F
+ _environ D 0x4
+ _exit F
+ _libc_intl_domainname D 0x5
+ _longjmp F
+ _mcleanup F
+ _mcount F
+ _nl_default_dirname D 0x12
+ _nl_domain_bindings D 0x4
+ _nl_msg_cat_cntr D 0x4
+ _null_auth D 0xc
+ _obstack D 0x4
+ _obstack_allocated_p F
+ _obstack_begin F
+ _obstack_begin_1 F
+ _obstack_free F
+ _obstack_memory_used F
+ _obstack_newchunk F
+ _res D 0x200
+ _rpc_dtablesize F
+ _seterr_reply F
+ _setjmp F
+ _sys_errlist D 0x1ec
+ _sys_nerr D 0x4
+ _sys_siglist D 0x80
+ _tolower F
+ _toupper F
+ a64l F
+ abort F
+ abs F
+ accept F
+ access F
+ acct F
+ addmntent F
+ adjtime F
+ adjtimex F
+ advance F
+ alarm F
+ alphasort F
+ argz_add F
+ argz_add_sep F
+ argz_append F
+ argz_count F
+ argz_create F
+ argz_create_sep F
+ argz_delete F
+ argz_extract F
+ argz_insert F
+ argz_next F
+ argz_replace F
+ argz_stringify F
+ asctime F
+ asctime_r F
+ asprintf F
+ atexit F
+ atof F
+ atoi F
+ atol F
+ atoll F
+ authnone_create F
+ authunix_create F
+ authunix_create_default F
+ basename F
+ bcmp F
+ bcopy F
+ bdflush F
+ bind F
+ bindresvport F
+ bindtextdomain F
+ brk F
+ bsd_signal F
+ bsearch F
+ btowc F
+ bzero F
+ calloc F
+ callrpc F
+ canonicalize_file_name F
+ catclose F
+ catgets F
+ catopen F
+ cfgetispeed F
+ cfgetospeed F
+ cfmakeraw F
+ cfree F
+ cfsetispeed F
+ cfsetospeed F
+ cfsetspeed F
+ chdir F
+ chflags F
+ chmod F
+ chown F
+ chroot F
+ clearenv F
+ clearerr F
+ clearerr_unlocked F
+ clnt_broadcast F
+ clnt_create F
+ clnt_pcreateerror F
+ clnt_perrno F
+ clnt_perror F
+ clnt_spcreateerror F
+ clnt_sperrno F
+ clnt_sperror F
+ clntraw_create F
+ clnttcp_create F
+ clntudp_bufcreate F
+ clntudp_create F
+ clock F
+ clone F
+ close F
+ closedir F
+ closelog F
+ confstr F
+ connect F
+ copysign F
+ copysignf F
+ copysignl F
+ creat F
+ create_module F
+ ctermid F
+ ctime F
+ ctime_r F
+ cuserid F
+ daemon F
+ daylight D 0x4
+ dcgettext F
+ delete_module F
+ dgettext F
+ difftime F
+ dirfd F
+ dirname F
+ div F
+ dprintf F
+ drand48 F
+ drand48_r F
+ dup F
+ dup2 F
+ dysize F
+ ecvt F
+ ecvt_r F
+ endaliasent F
+ endfsent F
+ endgrent F
+ endhostent F
+ endmntent F
+ endnetent F
+ endnetgrent F
+ endprotoent F
+ endpwent F
+ endrpcent F
+ endservent F
+ endspent F
+ endttyent F
+ endusershell F
+ endutent F
+ environ D 0x4
+ envz_add F
+ envz_entry F
+ envz_get F
+ envz_merge F
+ envz_remove F
+ envz_strip F
+ erand48 F
+ erand48_r F
+ err F
+ error F
+ error_at_line F
+ error_message_count D 0x4
+ error_one_per_line D 0x4
+ error_print_progname D 0x4
+ errx F
+ ether_aton F
+ ether_aton_r F
+ ether_hostton F
+ ether_line F
+ ether_ntoa F
+ ether_ntoa_r F
+ ether_ntohost F
+ euidaccess F
+ execl F
+ execle F
+ execlp F
+ execv F
+ execve F
+ execvp F
+ exit F
+ fchdir F
+ fchflags F
+ fchmod F
+ fchown F
+ fclose F
+ fcloseall F
+ fcntl F
+ fcvt F
+ fcvt_r F
+ fdatasync F
+ fdopen F
+ feof F
+ feof_unlocked F
+ ferror F
+ ferror_unlocked F
+ fexecve F
+ fflush F
+ fflush_unlocked F
+ ffs F
+ fgetc F
+ fgetgrent F
+ fgetgrent_r F
+ fgetpos F
+ fgetpwent F
+ fgetpwent_r F
+ fgets F
+ fgetspent F
+ fgetspent_r F
+ fileno F
+ fileno_unlocked F
+ finite F
+ finitef F
+ finitel F
+ flock F
+ flockfile F
+ fnmatch F
+ fopen F
+ fopencookie F
+ fork F
+ fpathconf F
+ fprintf F
+ fputc F
+ fputc_unlocked F
+ fputs F
+ fread F
+ free F
+ freeaddrinfo F
+ freopen F
+ frexp F
+ frexpf F
+ frexpl F
+ fscanf F
+ fseek F
+ fsetpos F
+ fstatfs F
+ fsync F
+ ftell F
+ ftime F
+ ftok F
+ ftruncate F
+ ftrylockfile F
+ fts_children F
+ fts_close F
+ fts_open F
+ fts_read F
+ fts_set F
+ ftw F
+ funlockfile F
+ fwrite F
+ gcvt F
+ get_avphys_pages F
+ get_current_dir_name F
+ get_kernel_syms F
+ get_myaddress F
+ get_nprocs F
+ get_nprocs_conf F
+ get_phys_pages F
+ getaddrinfo F
+ getaliasbyname F
+ getaliasbyname_r F
+ getaliasent F
+ getaliasent_r F
+ getc F
+ getc_unlocked F
+ getchar F
+ getchar_unlocked F
+ getcwd F
+ getdelim F
+ getdirentries F
+ getdomainname F
+ getdtablesize F
+ getegid F
+ getenv F
+ geteuid F
+ getfsent F
+ getfsfile F
+ getfsspec F
+ getgid F
+ getgrent F
+ getgrent_r F
+ getgrgid F
+ getgrgid_r F
+ getgrnam F
+ getgrnam_r F
+ getgroups F
+ gethostbyaddr F
+ gethostbyaddr_r F
+ gethostbyname F
+ gethostbyname2 F
+ gethostbyname2_r F
+ gethostbyname_r F
+ gethostent F
+ gethostent_r F
+ gethostid F
+ gethostname F
+ getitimer F
+ getline F
+ getlogin F
+ getlogin_r F
+ getmntent F
+ getmntent_r F
+ getnetbyaddr F
+ getnetbyaddr_r F
+ getnetbyname F
+ getnetbyname_r F
+ getnetent F
+ getnetent_r F
+ getnetgrent F
+ getnetgrent_r F
+ getopt F
+ getopt_long F
+ getopt_long_only F
+ getpagesize F
+ getpass F
+ getpeername F
+ getpgid F
+ getpgrp F
+ getpid F
+ getppid F
+ getpriority F
+ getprotobyname F
+ getprotobyname_r F
+ getprotobynumber F
+ getprotobynumber_r F
+ getprotoent F
+ getprotoent_r F
+ getpublickey F
+ getpw F
+ getpwent F
+ getpwent_r F
+ getpwnam F
+ getpwnam_r F
+ getpwuid F
+ getpwuid_r F
+ getresgid F
+ getresuid F
+ getrlimit F
+ getrpcbyname F
+ getrpcbyname_r F
+ getrpcbynumber F
+ getrpcbynumber_r F
+ getrpcent F
+ getrpcent_r F
+ getrpcport F
+ getrusage F
+ gets F
+ getsecretkey F
+ getservbyname F
+ getservbyname_r F
+ getservbyport F
+ getservbyport_r F
+ getservent F
+ getservent_r F
+ getsid F
+ getsockname F
+ getsockopt F
+ getspent F
+ getspent_r F
+ getspnam F
+ getspnam_r F
+ getsubopt F
+ gettext F
+ gettimeofday F
+ getttyent F
+ getttynam F
+ getuid F
+ getusershell F
+ getutent F
+ getutent_r F
+ getutid F
+ getutid_r F
+ getutline F
+ getutline_r F
+ getw F
+ getwd F
+ glob F
+ glob_pattern_p F
+ globfree F
+ gmtime F
+ gmtime_r F
+ group_member F
+ gsignal F
+ gtty F
+ h_errlist D 0x14
+ h_nerr D 0x4
+ hasmntopt F
+ hcreate F
+ hcreate_r F
+ hdestroy F
+ hdestroy_r F
+ herror F
+ hsearch F
+ hsearch_r F
+ hstrerror F
+ htonl F
+ htons F
+ index F
+ inet_addr F
+ inet_aton F
+ inet_lnaof F
+ inet_makeaddr F
+ inet_netof F
+ inet_network F
+ inet_nsap_addr F
+ inet_nsap_ntoa F
+ inet_ntoa F
+ inet_ntop F
+ inet_pton F
+ init_module F
+ initgroups F
+ initstate F
+ initstate_r F
+ innetgr F
+ insque F
+ ioctl F
+ iruserok F
+ isalnum F
+ isalpha F
+ isascii F
+ isatty F
+ isblank F
+ iscntrl F
+ isdigit F
+ isfdtype F
+ isgraph F
+ isinf F
+ isinff F
+ isinfl F
+ islower F
+ isnan F
+ isnanf F
+ isnanl F
+ isprint F
+ ispunct F
+ isspace F
+ isupper F
+ iswalnum F
+ iswalpha F
+ iswcntrl F
+ iswctype F
+ iswdigit F
+ iswgraph F
+ iswlower F
+ iswprint F
+ iswpunct F
+ iswspace F
+ iswupper F
+ iswxdigit F
+ isxdigit F
+ jrand48 F
+ jrand48_r F
+ kill F
+ killpg F
+ klogctl F
+ l64a F
+ labs F
+ lchown F
+ lckpwdf F
+ lcong48 F
+ lcong48_r F
+ ldexp F
+ ldexpf F
+ ldexpl F
+ ldiv F
+ lfind F
+ link F
+ listen F
+ llabs F
+ lldiv F
+ llseek F
+ loc1 D 0x4
+ loc2 D 0x4
+ localeconv F
+ localtime F
+ localtime_r F
+ lockf F
+ locs D 0x4
+ longjmp F
+ lrand48 F
+ lrand48_r F
+ lsearch F
+ lseek F
+ madvise F
+ mallinfo F
+ malloc F
+ malloc_get_state F
+ malloc_set_state F
+ malloc_stats F
+ malloc_trim F
+ malloc_usable_size F
+ mallopt F
+ mallwatch D 0x4
+ mblen F
+ mbrlen F
+ mbrtowc F
+ mbsinit F
+ mbsnrtowcs F
+ mbsrtowcs F
+ mbstowcs F
+ mbtowc F
+ mcheck F
+ memalign F
+ memccpy F
+ memchr F
+ memcmp F
+ memcpy F
+ memfrob F
+ memmem F
+ memmove F
+ memset F
+ mkdir F
+ mkfifo F
+ mkstemp F
+ mktemp F
+ mktime F
+ mlock F
+ mlockall F
+ mmap F
+ modf F
+ modff F
+ modfl F
+ monstartup F
+ mount F
+ mprobe F
+ mprotect F
+ mrand48 F
+ mrand48_r F
+ mremap F
+ msgctl F
+ msgget F
+ msgrcv F
+ msgsnd F
+ msync F
+ mtrace F
+ munlock F
+ munlockall F
+ munmap F
+ muntrace F
+ nanosleep F
+ nfsservctl F
+ nice F
+ nl_langinfo F
+ nrand48 F
+ nrand48_r F
+ ntohl F
+ ntohs F
+ obstack_alloc_failed_handler D 0x4
+ obstack_exit_failure D 0x4
+ obstack_free F
+ obstack_printf F
+ obstack_vprintf F
+ on_exit F
+ open F
+ open_memstream F
+ opendir F
+ openlog F
+ optarg D 0x4
+ opterr D 0x4
+ optind D 0x4
+ optopt D 0x4
+ parse_printf_format F
+ pathconf F
+ pause F
+ pclose F
+ perror F
+ personality F
+ pipe F
+ pmap_getmaps F
+ pmap_getport F
+ pmap_rmtcall F
+ pmap_set F
+ pmap_unset F
+ poll F
+ popen F
+ prctl F
+ printf F
+ profil F
+ program_invocation_name D 0x4
+ program_invocation_short_name D 0x4
+ pselect F
+ psignal F
+ pthread_attr_destroy F
+ pthread_attr_getdetachstate F
+ pthread_attr_getinheritsched F
+ pthread_attr_getschedparam F
+ pthread_attr_getschedpolicy F
+ pthread_attr_getscope F
+ pthread_attr_init F
+ pthread_attr_setdetachstate F
+ pthread_attr_setinheritsched F
+ pthread_attr_setschedparam F
+ pthread_attr_setschedpolicy F
+ pthread_attr_setscope F
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+ pthread_condattr_destroy F
+ pthread_condattr_init F
+ pthread_equal F
+ pthread_exit F
+ pthread_getschedparam F
+ pthread_mutex_destroy F
+ pthread_mutex_init F
+ pthread_mutex_lock F
+ pthread_mutex_unlock F
+ pthread_self F
+ pthread_setcancelstate F
+ pthread_setcanceltype F
+ pthread_setschedparam F
+ ptrace F
+ putc F
+ putc_unlocked F
+ putchar F
+ putchar_unlocked F
+ putenv F
+ putpwent F
+ puts F
+ putspent F
+ pututline F
+ putw F
+ pvalloc F
+ qecvt F
+ qecvt_r F
+ qfcvt F
+ qfcvt_r F
+ qgcvt F
+ qsort F
+ query_module F
+ quotactl F
+ raise F
+ rand F
+ rand_r F
+ random F
+ random_r F
+ rcmd F
+ re_comp F
+ re_compile_fastmap F
+ re_compile_pattern F
+ re_exec F
+ re_match F
+ re_match_2 F
+ re_max_failures D 0x4
+ re_search F
+ re_search_2 F
+ re_set_registers F
+ re_set_syntax F
+ re_syntax_options D 0x4
+ read F
+ readdir F
+ readdir_r F
+ readlink F
+ readv F
+ realloc F
+ realpath F
+ reboot F
+ recv F
+ recvfrom F
+ recvmsg F
+ regcomp F
+ regerror F
+ regexec F
+ regfree F
+ register_printf_function F
+ registerrpc F
+ remove F
+ remque F
+ rename F
+ res_init F
+ revoke F
+ rewind F
+ rewinddir F
+ rexec F
+ rexecoptions D 0x4
+ rindex F
+ rmdir F
+ rpc_createerr D 0x10
+ rpmatch F
+ rresvport F
+ ruserok F
+ ruserpass F
+ sbrk F
+ scalbn F
+ scalbnf F
+ scalbnl F
+ scandir F
+ scanf F
+ sched_get_priority_max F
+ sched_get_priority_min F
+ sched_getparam F
+ sched_getscheduler F
+ sched_rr_get_interval F
+ sched_setparam F
+ sched_setscheduler F
+ sched_yield F
+ seed48 F
+ seed48_r F
+ seekdir F
+ select F
+ semctl F
+ semget F
+ semop F
+ send F
+ sendmsg F
+ sendto F
+ setaliasent F
+ setbuf F
+ setbuffer F
+ setcontext F
+ setdomainname F
+ setegid F
+ setenv F
+ seteuid F
+ setfsent F
+ setfsgid F
+ setfsuid F
+ setgid F
+ setgrent F
+ setgroups F
+ sethostent F
+ sethostid F
+ sethostname F
+ setitimer F
+ setjmp F
+ setlinebuf F
+ setlocale F
+ setlogin F
+ setlogmask F
+ setmntent F
+ setnetent F
+ setnetgrent F
+ setpgid F
+ setpgrp F
+ setpriority F
+ setprotoent F
+ setpwent F
+ setregid F
+ setresgid F
+ setresuid F
+ setreuid F
+ setrlimit F
+ setrpcent F
+ setservent F
+ setsid F
+ setsockopt F
+ setspent F
+ setstate F
+ setstate_r F
+ settimeofday F
+ setttyent F
+ setuid F
+ setusershell F
+ setutent F
+ setvbuf F
+ sgetspent F
+ sgetspent_r F
+ shmat F
+ shmctl F
+ shmdt F
+ shmget F
+ shutdown F
+ sigaction F
+ sigaddset F
+ sigaltstack F
+ sigandset F
+ sigblock F
+ sigdelset F
+ sigemptyset F
+ sigfillset F
+ siggetmask F
+ siginterrupt F
+ sigisemptyset F
+ sigismember F
+ siglongjmp F
+ signal F
+ sigorset F
+ sigpause F
+ sigpending F
+ sigprocmask F
+ sigreturn F
+ sigsetmask F
+ sigstack F
+ sigsuspend F
+ sigvec F
+ sigwait F
+ sleep F
+ snprintf F
+ socket F
+ socketpair F
+ sprintf F
+ srand F
+ srand48 F
+ srand48_r F
+ srandom F
+ srandom_r F
+ sscanf F
+ ssignal F
+ sstk F
+ statfs F
+ stderr D 0x4
+ stdin D 0x4
+ stdout D 0x4
+ step F
+ stime F
+ stpcpy F
+ stpncpy F
+ strcasecmp F
+ strcat F
+ strchr F
+ strcmp F
+ strcoll F
+ strcpy F
+ strcspn F
+ strdup F
+ strerror F
+ strerror_r F
+ strfmon F
+ strfry F
+ strftime F
+ strlen F
+ strncasecmp F
+ strncat F
+ strncmp F
+ strncpy F
+ strndup F
+ strnlen F
+ strpbrk F
+ strptime F
+ strrchr F
+ strsep F
+ strsignal F
+ strspn F
+ strstr F
+ strtod F
+ strtof F
+ strtok F
+ strtok_r F
+ strtol F
+ strtold F
+ strtoll F
+ strtoq F
+ strtoul F
+ strtoull F
+ strtouq F
+ strxfrm F
+ stty F
+ svc_exit F
+ svc_fdset D 0x80
+ svc_getreq F
+ svc_getreqset F
+ svc_register F
+ svc_run F
+ svc_sendreply F
+ svc_unregister F
+ svcauthdes_stats D 0xc
+ svcerr_auth F
+ svcerr_decode F
+ svcerr_noproc F
+ svcerr_noprog F
+ svcerr_progvers F
+ svcerr_systemerr F
+ svcerr_weakauth F
+ svcfd_create F
+ svcraw_create F
+ svctcp_create F
+ svcudp_bufcreate F
+ svcudp_create F
+ svcudp_enablecache F
+ swab F
+ swapoff F
+ swapon F
+ symlink F
+ sync F
+ sys_errlist D 0x1ec
+ sys_nerr D 0x4
+ sys_sigabbrev D 0x80
+ sys_siglist D 0x80
+ syscall F
+ sysconf F
+ sysctl F
+ sysinfo F
+ syslog F
+ system F
+ tcdrain F
+ tcflow F
+ tcflush F
+ tcgetattr F
+ tcgetpgrp F
+ tcsendbreak F
+ tcsetattr F
+ tcsetpgrp F
+ tdelete F
+ telldir F
+ tempnam F
+ textdomain F
+ tfind F
+ time F
+ timegm F
+ timelocal F
+ times F
+ timezone D 0x4
+ tmpfile F
+ tmpnam F
+ tmpnam_r F
+ toascii F
+ tolower F
+ toupper F
+ towctrans F
+ towlower F
+ towupper F
+ tr_break F
+ truncate F
+ tsearch F
+ ttyname F
+ ttyname_r F
+ ttyslot F
+ twalk F
+ tzname D 0x8
+ tzset F
+ ualarm F
+ ulckpwdf F
+ ulimit F
+ umask F
+ umount F
+ uname F
+ ungetc F
+ unlink F
+ unsetenv F
+ updwtmp F
+ uselib F
+ usleep F
+ ustat F
+ utime F
+ utimes F
+ utmpname F
+ valloc F
+ vasprintf F
+ vdprintf F
+ verr F
+ verrx F
+ vfork F
+ vfprintf F
+ vfscanf F
+ vhangup F
+ vlimit F
+ vprintf F
+ vscanf F
+ vsnprintf F
+ vsprintf F
+ vsscanf F
+ vsyslog F
+ vtimes F
+ vwarn F
+ vwarnx F
+ wait F
+ wait3 F
+ wait4 F
+ waitpid F
+ warn F
+ warnx F
+ wcpcpy F
+ wcpncpy F
+ wcrtomb F
+ wcscat F
+ wcschr F
+ wcscmp F
+ wcscoll F
+ wcscpy F
+ wcscspn F
+ wcsdup F
+ wcslen F
+ wcsncat F
+ wcsncmp F
+ wcsncpy F
+ wcsnrtombs F
+ wcspbrk F
+ wcsrchr F
+ wcsrtombs F
+ wcsspn F
+ wcsstr F
+ wcstod F
+ wcstof F
+ wcstok F
+ wcstol F
+ wcstold F
+ wcstombs F
+ wcstoq F
+ wcstoul F
+ wcstouq F
+ wcswidth F
+ wcsxfrm F
+ wctob F
+ wctomb F
+ wctrans F
+ wctype F
+ wcwidth F
+ wmemchr F
+ wmemcmp F
+ wmemcpy F
+ wmemmove F
+ wmemset F
+ write F
+ writev F
+ xdr_accepted_reply F
+ xdr_array F
+ xdr_authunix_parms F
+ xdr_bool F
+ xdr_bytes F
+ xdr_callhdr F
+ xdr_callmsg F
+ xdr_char F
+ xdr_cryptkeyarg F
+ xdr_cryptkeyarg2 F
+ xdr_cryptkeyres F
+ xdr_des_block F
+ xdr_double F
+ xdr_enum F
+ xdr_float F
+ xdr_free F
+ xdr_int F
+ xdr_key_netstarg F
+ xdr_key_netstres F
+ xdr_keybuf F
+ xdr_keystatus F
+ xdr_long F
+ xdr_netobj F
+ xdr_opaque F
+ xdr_opaque_auth F
+ xdr_pmap F
+ xdr_pmaplist F
+ xdr_pointer F
+ xdr_reference F
+ xdr_rejected_reply F
+ xdr_replymsg F
+ xdr_rmtcall_args F
+ xdr_rmtcallres F
+ xdr_short F
+ xdr_string F
+ xdr_u_char F
+ xdr_u_int F
+ xdr_u_long F
+ xdr_u_short F
+ xdr_union F
+ xdr_vector F
+ xdr_void F
+ xdr_wrapstring F
+ xdrmem_create F
+ xdrrec_create F
+ xdrrec_endofrecord F
+ xdrrec_eof F
+ xdrrec_skiprecord F
+ xdrstdio_create F
+ xencrypt F
+ xprt_register F
+ xprt_unregister F
+GLIBC_2.1
+ GLIBC_2.1 A
+ _IO_2_1_stderr_ D 0xa0
+ _IO_2_1_stdin_ D 0xa0
+ _IO_2_1_stdout_ D 0xa0
+ _IO_do_write F
+ _IO_fclose F
+ _IO_fdopen F
+ _IO_fgetpos64 F
+ _IO_file_attach F
+ _IO_file_close_it F
+ _IO_file_finish F
+ _IO_file_fopen F
+ _IO_file_init F
+ _IO_file_overflow F
+ _IO_file_seekoff F
+ _IO_file_setbuf F
+ _IO_file_sync F
+ _IO_file_underflow F
+ _IO_file_write F
+ _IO_file_xsputn F
+ _IO_fopen F
+ _IO_fsetpos64 F
+ _IO_getline_info F
+ _IO_popen F
+ _IO_proc_close F
+ _IO_proc_open F
+ __asprintf F
+ __backtrace F
+ __backtrace_symbols F
+ __backtrace_symbols_fd F
+ __duplocale F
+ __freelocale F
+ __fxstat64 F
+ __isalnum_l F
+ __isalpha_l F
+ __isascii_l F
+ __isblank_l F
+ __iscntrl_l F
+ __isdigit_l F
+ __isgraph_l F
+ __islower_l F
+ __isprint_l F
+ __ispunct_l F
+ __isspace_l F
+ __isupper_l F
+ __iswalnum_l F
+ __iswalpha_l F
+ __iswblank_l F
+ __iswcntrl_l F
+ __iswctype_l F
+ __iswdigit_l F
+ __iswgraph_l F
+ __iswlower_l F
+ __iswprint_l F
+ __iswpunct_l F
+ __iswspace_l F
+ __iswupper_l F
+ __iswxdigit_l F
+ __isxdigit_l F
+ __key_decryptsession_pk_LOCAL D 0x4
+ __key_encryptsession_pk_LOCAL D 0x4
+ __key_gendes_LOCAL D 0x4
+ __libc_allocate_rtsig F
+ __libc_current_sigrtmax F
+ __libc_current_sigrtmin F
+ __libc_freeres F
+ __libc_sa_len F
+ __lxstat64 F
+ __newlocale F
+ __poll F
+ __pread64 F
+ __pwrite64 F
+ __rawmemchr F
+ __signbit F
+ __signbitf F
+ __strcasecmp_l F
+ __strcasestr F
+ __strcoll_l F
+ __strfmon_l F
+ __strncasecmp_l F
+ __strtod_l F
+ __strtof_l F
+ __strtol_l F
+ __strtold_l F
+ __strtoll_l F
+ __strtoul_l F
+ __strtoull_l F
+ __strxfrm_l F
+ __toascii_l F
+ __tolower_l F
+ __toupper_l F
+ __towctrans F
+ __towctrans_l F
+ __towlower_l F
+ __towupper_l F
+ __wcscasecmp_l F
+ __wcscoll_l F
+ __wcsncasecmp_l F
+ __wcstod_l F
+ __wcstof_l F
+ __wcstol_l F
+ __wcstold_l F
+ __wcstoll_l F
+ __wcstoul_l F
+ __wcstoull_l F
+ __wcsxfrm_l F
+ __wctype_l F
+ __xstat64 F
+ _authenticate F
+ _dl_mcount_wrapper F
+ _dl_mcount_wrapper_check F
+ _sys_errlist D 0x1f4
+ _sys_nerr D 0x4
+ _sys_siglist D 0x100
+ addseverity F
+ alphasort64 F
+ argp_err_exit_status D 0x4
+ argp_error F
+ argp_failure F
+ argp_help F
+ argp_parse F
+ argp_program_bug_address D 0x4
+ argp_program_version D 0x4
+ argp_program_version_hook D 0x4
+ argp_state_help F
+ argp_usage F
+ authdes_create F
+ authdes_getucred F
+ authdes_pk_create F
+ backtrace F
+ backtrace_symbols F
+ backtrace_symbols_fd F
+ capget F
+ capset F
+ cbc_crypt F
+ chown F
+ clntunix_create F
+ creat64 F
+ des_setparity F
+ ecb_crypt F
+ endutxent F
+ fattach F
+ fclose F
+ fdetach F
+ fdopen F
+ ffsl F
+ ffsll F
+ fgetc_unlocked F
+ fgetpos64 F
+ fgets_unlocked F
+ fmtmsg F
+ fopen F
+ fopen64 F
+ fputs_unlocked F
+ fread_unlocked F
+ freopen64 F
+ fseeko F
+ fseeko64 F
+ fsetpos64 F
+ fstatfs64 F
+ fstatvfs F
+ fstatvfs64 F
+ ftello F
+ ftello64 F
+ ftruncate64 F
+ ftw64 F
+ fwrite_unlocked F
+ gai_strerror F
+ getcontext F
+ getdate F
+ getdate_err D 0x4
+ getdate_r F
+ getmsg F
+ getnameinfo F
+ getnetname F
+ getpmsg F
+ getpt F
+ getrlimit64 F
+ getutxent F
+ getutxid F
+ getutxline F
+ glob64 F
+ globfree64 F
+ gnu_get_libc_release F
+ gnu_get_libc_version F
+ grantpt F
+ host2netname F
+ iconv F
+ iconv_close F
+ iconv_open F
+ if_freenameindex F
+ if_indextoname F
+ if_nameindex F
+ if_nametoindex F
+ in6addr_any D 0x10
+ in6addr_loopback D 0x10
+ isastream F
+ iswblank F
+ key_decryptsession F
+ key_decryptsession_pk F
+ key_encryptsession F
+ key_encryptsession_pk F
+ key_gendes F
+ key_get_conv F
+ key_secretkey_is_set F
+ key_setnet F
+ key_setsecret F
+ lockf64 F
+ lseek64 F
+ makecontext F
+ mempcpy F
+ mmap64 F
+ netname2host F
+ netname2user F
+ nftw F
+ nftw64 F
+ ntp_adjtime F
+ ntp_gettime F
+ open64 F
+ passwd2des F
+ pclose F
+ popen F
+ pread F
+ pread64 F
+ printf_size F
+ printf_size_info F
+ pthread_attr_init F
+ ptsname F
+ ptsname_r F
+ putgrent F
+ putmsg F
+ putpmsg F
+ pututxline F
+ pwrite F
+ pwrite64 F
+ rawmemchr F
+ readdir64 F
+ readdir64_r F
+ rtime F
+ scandir64 F
+ sendfile F
+ setrlimit64 F
+ setutxent F
+ sighold F
+ sigignore F
+ sigqueue F
+ sigrelse F
+ sigset F
+ sigtimedwait F
+ sigwaitinfo F
+ statfs64 F
+ statvfs F
+ statvfs64 F
+ strcasestr F
+ strtoimax F
+ strtoumax F
+ strverscmp F
+ svcunix_create F
+ svcunixfd_create F
+ swapcontext F
+ sys_errlist D 0x1f4
+ sys_nerr D 0x4
+ sys_sigabbrev D 0x100
+ sys_siglist D 0x100
+ sysv_signal F
+ tcgetsid F
+ tdestroy F
+ tmpfile F
+ tmpfile64 F
+ truncate64 F
+ umount2 F
+ unlockpt F
+ updwtmpx F
+ user2netname F
+ utmpxname F
+ versionsort F
+ versionsort64 F
+ waitid F
+ wcscasecmp F
+ wcsncasecmp F
+ wcsnlen F
+ wcstoimax F
+ wcstoll F
+ wcstoull F
+ wcstoumax F
+ wcswcs F
+ wordexp F
+ wordfree F
+ xdecrypt F
+ xdr_authdes_cred F
+ xdr_authdes_verf F
+ xdr_getcredres F
+ xdr_int16_t F
+ xdr_int32_t F
+ xdr_int8_t F
+ xdr_netnamestr F
+ xdr_sizeof F
+ xdr_uint16_t F
+ xdr_uint32_t F
+ xdr_uint8_t F
+ xdr_unixcred F
+GLIBC_2.1.1
+ GLIBC_2.1.1 A
+ _Exit F
+ __mempcpy_small F
+ __stpcpy_small F
+ __strcpy_small F
+ __strcspn_c1 F
+ __strcspn_c2 F
+ __strcspn_c3 F
+ __strpbrk_c2 F
+ __strpbrk_c3 F
+ __strsep_1c F
+ __strsep_2c F
+ __strsep_3c F
+ __strsep_g F
+ __strspn_c1 F
+ __strspn_c2 F
+ __strspn_c3 F
+ __strtok_r_1c F
+ __strverscmp F
+ getutmp F
+ getutmpx F
+ imaxabs F
+ imaxdiv F
+ strchrnul F
+ xdr_hyper F
+ xdr_int64_t F
+ xdr_longlong_t F
+ xdr_u_hyper F
+ xdr_u_longlong_t F
+ xdr_uint64_t F
+GLIBC_2.1.2
+ GLIBC_2.1.2 A
+ __vfork F
+ getaliasbyname_r F
+ getaliasent_r F
+ getgrent_r F
+ getgrgid_r F
+ getgrnam_r F
+ gethostbyaddr_r F
+ gethostbyname2_r F
+ gethostbyname_r F
+ gethostent_r F
+ getnetbyaddr_r F
+ getnetbyname_r F
+ getnetent_r F
+ getprotobyname_r F
+ getprotobynumber_r F
+ getprotoent_r F
+ getpwent_r F
+ getpwnam_r F
+ getpwuid_r F
+ getrpcbyname_r F
+ getrpcbynumber_r F
+ getrpcent_r F
+ getservbyname_r F
+ getservbyport_r F
+ getservent_r F
+ getspent_r F
+ getspnam_r F
+GLIBC_2.1.3
+ GLIBC_2.1.3 A
+ __cxa_atexit F
+ __cxa_finalize F
+ __sigsuspend F
+GLIBC_2.10
+ GLIBC_2.10 A
+ __cxa_at_quick_exit F
+ __posix_getopt F
+ accept4 F
+ endsgent F
+ fallocate F
+ fgetsgent F
+ fgetsgent_r F
+ getsgent F
+ getsgent_r F
+ getsgnam F
+ getsgnam_r F
+ malloc_info F
+ preadv F
+ preadv64 F
+ psiginfo F
+ putsgent F
+ pwritev F
+ pwritev64 F
+ quick_exit F
+ register_printf_modifier F
+ register_printf_specifier F
+ register_printf_type F
+ setsgent F
+ sgetsgent F
+ sgetsgent_r F
+GLIBC_2.11
+ GLIBC_2.11 A
+ __longjmp_chk F
+ execvpe F
+ fallocate64 F
+ mkostemps F
+ mkostemps64 F
+ mkstemps F
+ mkstemps64 F
+GLIBC_2.12
+ GLIBC_2.12 A
+ _sys_errlist D 0x21c
+ _sys_nerr D 0x4
+ ntp_gettimex F
+ recvmmsg F
+ sys_errlist D 0x21c
+ sys_nerr D 0x4
+GLIBC_2.13
+ GLIBC_2.13 A
+ fanotify_init F
+ fanotify_mark F
+ prlimit F
+ prlimit64 F
+GLIBC_2.14
+ GLIBC_2.14 A
+ clock_adjtime F
+ name_to_handle_at F
+ open_by_handle_at F
+ sendmmsg F
+ setns F
+ syncfs F
+GLIBC_2.15
+ GLIBC_2.15 A
+ __fdelt_chk F
+ __fdelt_warn F
+ posix_spawn F
+ posix_spawnp F
+ process_vm_readv F
+ process_vm_writev F
+ scandirat F
+ scandirat64 F
+GLIBC_2.16
+ GLIBC_2.16 A
+ __getauxval F
+ __mcount_internal F
+ __poll_chk F
+ __ppoll_chk F
+ aligned_alloc F
+ c16rtomb F
+ c32rtomb F
+ getauxval F
+ mbrtoc16 F
+ mbrtoc32 F
+ timespec_get F
+GLIBC_2.17
+ GLIBC_2.17 A
+ __ppc_get_timebase_freq F
+ clock_getcpuclockid F
+ clock_getres F
+ clock_gettime F
+ clock_nanosleep F
+ clock_settime F
+ secure_getenv F
+GLIBC_2.18
+ GLIBC_2.18 A
+ __cxa_thread_atexit_impl F
+GLIBC_2.2
+ GLIBC_2.2 A
+ _IO_adjust_wcolumn F
+ _IO_fgetpos F
+ _IO_fgetpos64 F
+ _IO_free_wbackup_area F
+ _IO_fsetpos F
+ _IO_fsetpos64 F
+ _IO_init_wmarker F
+ _IO_iter_begin F
+ _IO_iter_end F
+ _IO_iter_file F
+ _IO_iter_next F
+ _IO_least_wmarker F
+ _IO_list_lock F
+ _IO_list_resetlock F
+ _IO_list_unlock F
+ _IO_seekwmark F
+ _IO_sputbackwc F
+ _IO_sungetwc F
+ _IO_switch_to_main_wget_area F
+ _IO_switch_to_wbackup_area F
+ _IO_switch_to_wget_mode F
+ _IO_unsave_wmarkers F
+ _IO_wdefault_doallocate F
+ _IO_wdefault_finish F
+ _IO_wdefault_pbackfail F
+ _IO_wdefault_uflow F
+ _IO_wdefault_xsgetn F
+ _IO_wdefault_xsputn F
+ _IO_wdo_write F
+ _IO_wdoallocbuf F
+ _IO_wfile_jumps D 0x54
+ _IO_wfile_overflow F
+ _IO_wfile_seekoff F
+ _IO_wfile_sync F
+ _IO_wfile_underflow F
+ _IO_wfile_xsputn F
+ _IO_wmarker_delta F
+ _IO_wsetb F
+ __assert F
+ __ctype32_tolower D 0x4
+ __ctype32_toupper D 0x4
+ __cyg_profile_func_enter F
+ __cyg_profile_func_exit F
+ __endmntent F
+ __fbufsize F
+ __flbf F
+ __fpending F
+ __fpurge F
+ __freadable F
+ __freading F
+ __fsetlocking F
+ __fwritable F
+ __fwriting F
+ __fxstat64 F
+ __getmntent_r F
+ __lxstat64 F
+ __nl_langinfo_l F
+ __open64 F
+ __res_init F
+ __res_nclose F
+ __res_ninit F
+ __res_state F
+ __setmntent F
+ __statfs F
+ __strndup F
+ __sysconf F
+ __sysctl F
+ __wctrans_l F
+ __woverflow F
+ __wuflow F
+ __wunderflow F
+ __xpg_sigpause F
+ __xstat64 F
+ _flushlbf F
+ _res_hconf D 0x30
+ bind_textdomain_codeset F
+ dcngettext F
+ dngettext F
+ fgetpos F
+ fgetpos64 F
+ fgetwc F
+ fgetwc_unlocked F
+ fgetws F
+ fgetws_unlocked F
+ fmemopen F
+ fopencookie F
+ fputwc F
+ fputwc_unlocked F
+ fputws F
+ fputws_unlocked F
+ fsetpos F
+ fsetpos64 F
+ fwide F
+ fwprintf F
+ fwscanf F
+ getdirentries64 F
+ getloadavg F
+ getrlimit F
+ getrlimit64 F
+ getwc F
+ getwc_unlocked F
+ getwchar F
+ getwchar_unlocked F
+ glob64 F
+ iruserok_af F
+ localeconv F
+ mcheck_check_all F
+ mcheck_pedantic F
+ memrchr F
+ mincore F
+ mkdtemp F
+ mkstemp64 F
+ moncontrol F
+ msgctl F
+ ngettext F
+ posix_fadvise F
+ posix_fadvise64 F
+ posix_fallocate F
+ posix_fallocate64 F
+ posix_madvise F
+ posix_memalign F
+ posix_spawn F
+ posix_spawn_file_actions_addclose F
+ posix_spawn_file_actions_adddup2 F
+ posix_spawn_file_actions_addopen F
+ posix_spawn_file_actions_destroy F
+ posix_spawn_file_actions_init F
+ posix_spawnattr_destroy F
+ posix_spawnattr_getflags F
+ posix_spawnattr_getpgroup F
+ posix_spawnattr_getschedparam F
+ posix_spawnattr_getschedpolicy F
+ posix_spawnattr_getsigdefault F
+ posix_spawnattr_getsigmask F
+ posix_spawnattr_init F
+ posix_spawnattr_setflags F
+ posix_spawnattr_setpgroup F
+ posix_spawnattr_setschedparam F
+ posix_spawnattr_setschedpolicy F
+ posix_spawnattr_setsigdefault F
+ posix_spawnattr_setsigmask F
+ posix_spawnp F
+ putwc F
+ putwc_unlocked F
+ putwchar F
+ putwchar_unlocked F
+ rcmd_af F
+ readdir64 F
+ readdir64_r F
+ rexec_af F
+ rresvport_af F
+ ruserok_af F
+ scandir64 F
+ semctl F
+ setrlimit F
+ shmctl F
+ svc_getreq_common F
+ svc_getreq_poll F
+ svc_max_pollfd D 0x4
+ svc_pollfd D 0x4
+ swprintf F
+ swscanf F
+ ungetwc F
+ vfwprintf F
+ vfwscanf F
+ vswprintf F
+ vswscanf F
+ vwprintf F
+ vwscanf F
+ wcschrnul F
+ wcsftime F
+ wmempcpy F
+ wprintf F
+ wscanf F
+GLIBC_2.2.1
+ GLIBC_2.2.1 A
+ pivot_root F
+ posix_openpt F
+GLIBC_2.2.2
+ GLIBC_2.2.2 A
+ __nss_hostname_digits_dots F
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ __rpc_thread_createerr F
+ __rpc_thread_svc_fdset F
+ __rpc_thread_svc_max_pollfd F
+ __rpc_thread_svc_pollfd F
+ fnmatch F
+ sprofil F
+GLIBC_2.2.4
+ GLIBC_2.2.4 A
+ dl_iterate_phdr F
+ getgrouplist F
+ sockatmark F
+GLIBC_2.2.6
+ GLIBC_2.2.6 A
+ __nanosleep F
+GLIBC_2.3
+ GLIBC_2.3 A
+ __ctype_b_loc F
+ __ctype_tolower_loc F
+ __ctype_toupper_loc F
+ __isctype F
+ __strftime_l F
+ __uselocale F
+ __wcsftime_l F
+ _sys_errlist D 0x1f8
+ _sys_nerr D 0x4
+ duplocale F
+ fgetxattr F
+ flistxattr F
+ freeifaddrs F
+ freelocale F
+ fremovexattr F
+ fsetxattr F
+ futimes F
+ getifaddrs F
+ getxattr F
+ isalnum_l F
+ isalpha_l F
+ isblank_l F
+ iscntrl_l F
+ isctype F
+ isdigit_l F
+ isgraph_l F
+ islower_l F
+ isprint_l F
+ ispunct_l F
+ isspace_l F
+ isupper_l F
+ iswalnum_l F
+ iswalpha_l F
+ iswblank_l F
+ iswcntrl_l F
+ iswctype_l F
+ iswdigit_l F
+ iswgraph_l F
+ iswlower_l F
+ iswprint_l F
+ iswpunct_l F
+ iswspace_l F
+ iswupper_l F
+ iswxdigit_l F
+ isxdigit_l F
+ lgetxattr F
+ listxattr F
+ llistxattr F
+ lremovexattr F
+ lsetxattr F
+ lutimes F
+ newlocale F
+ nl_langinfo_l F
+ readahead F
+ realpath F
+ removexattr F
+ sendfile64 F
+ setxattr F
+ strcasecmp_l F
+ strcoll_l F
+ strfmon_l F
+ strftime_l F
+ strncasecmp_l F
+ strtod_l F
+ strtof_l F
+ strtol_l F
+ strtold_l F
+ strtoll_l F
+ strtoul_l F
+ strtoull_l F
+ strxfrm_l F
+ sys_errlist D 0x1f8
+ sys_nerr D 0x4
+ tolower_l F
+ toupper_l F
+ towctrans_l F
+ towlower_l F
+ towupper_l F
+ uselocale F
+ wcscasecmp_l F
+ wcscoll_l F
+ wcsftime_l F
+ wcsncasecmp_l F
+ wcstod_l F
+ wcstof_l F
+ wcstol_l F
+ wcstold_l F
+ wcstoll_l F
+ wcstoul_l F
+ wcstoull_l F
+ wcsxfrm_l F
+ wctrans_l F
+ wctype_l F
+GLIBC_2.3.2
+ GLIBC_2.3.2 A
+ __adddf3 F
+ __addsf3 F
+ __divdf3 F
+ __divsf3 F
+ __eqdf2 F
+ __eqsf2 F
+ __extendsfdf2 F
+ __fixdfsi F
+ __fixsfsi F
+ __fixunsdfsi F
+ __fixunssfsi F
+ __floatsidf F
+ __floatsisf F
+ __gedf2 F
+ __gesf2 F
+ __ledf2 F
+ __lesf2 F
+ __muldf3 F
+ __mulsf3 F
+ __negdf2 F
+ __negsf2 F
+ __register_atfork F
+ __sim_disabled_exceptions D 0x4
+ __sim_exceptions D 0x4
+ __sim_round_mode D 0x4
+ __sqrtdf2 F
+ __sqrtsf2 F
+ __subdf3 F
+ __subsf3 F
+ __truncdfsf2 F
+ epoll_create F
+ epoll_ctl F
+ epoll_wait F
+ lchmod F
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+ strptime_l F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ _sys_siglist D 0x104
+ getcontext F
+ gnu_dev_major F
+ gnu_dev_makedev F
+ gnu_dev_minor F
+ inet6_option_alloc F
+ inet6_option_append F
+ inet6_option_find F
+ inet6_option_init F
+ inet6_option_next F
+ inet6_option_space F
+ makecontext F
+ nftw F
+ nftw64 F
+ posix_fadvise64 F
+ posix_fallocate64 F
+ remap_file_pages F
+ sched_getaffinity F
+ sched_setaffinity F
+ semtimedop F
+ setcontext F
+ swapcontext F
+ sys_sigabbrev D 0x104
+ sys_siglist D 0x104
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ __chk_fail F
+ __fprintf_chk F
+ __gets_chk F
+ __memcpy_chk F
+ __memmove_chk F
+ __mempcpy_chk F
+ __memset_chk F
+ __printf_chk F
+ __sigsetjmp F
+ __snprintf_chk F
+ __sprintf_chk F
+ __stpcpy_chk F
+ __strcat_chk F
+ __strcpy_chk F
+ __strncat_chk F
+ __strncpy_chk F
+ __vfprintf_chk F
+ __vprintf_chk F
+ __vsnprintf_chk F
+ __vsprintf_chk F
+ __xpg_strerror_r F
+ _longjmp F
+ _setjmp F
+ getcontext F
+ getipv4sourcefilter F
+ getsourcefilter F
+ longjmp F
+ makecontext F
+ regexec F
+ sched_getaffinity F
+ sched_setaffinity F
+ setcontext F
+ setipv4sourcefilter F
+ setjmp F
+ setsourcefilter F
+ siglongjmp F
+ swapcontext F
+ xdr_quad_t F
+ xdr_u_quad_t F
+GLIBC_2.4
+ GLIBC_2.4 A
+ _IO_fprintf F
+ _IO_printf F
+ _IO_sprintf F
+ _IO_sscanf F
+ _IO_vfprintf F
+ _IO_vfscanf F
+ _IO_vsprintf F
+ __asprintf F
+ __confstr_chk F
+ __fgets_chk F
+ __fgets_unlocked_chk F
+ __fgetws_chk F
+ __fgetws_unlocked_chk F
+ __finitel F
+ __floatundidf F
+ __floatundisf F
+ __floatunsidf F
+ __floatunsisf F
+ __fprintf_chk F
+ __fwprintf_chk F
+ __fxstatat F
+ __fxstatat64 F
+ __getcwd_chk F
+ __getdomainname_chk F
+ __getgroups_chk F
+ __gethostname_chk F
+ __getlogin_r_chk F
+ __getwd_chk F
+ __gtdf2 F
+ __gtsf2 F
+ __isinfl F
+ __isnanl F
+ __ltdf2 F
+ __ltsf2 F
+ __mbsnrtowcs_chk F
+ __mbsrtowcs_chk F
+ __mbstowcs_chk F
+ __nedf2 F
+ __nesf2 F
+ __nldbl__IO_fprintf F
+ __nldbl__IO_printf F
+ __nldbl__IO_sprintf F
+ __nldbl__IO_sscanf F
+ __nldbl__IO_vfprintf F
+ __nldbl__IO_vfscanf F
+ __nldbl__IO_vsprintf F
+ __nldbl___asprintf F
+ __nldbl___fprintf_chk F
+ __nldbl___fwprintf_chk F
+ __nldbl___printf_chk F
+ __nldbl___printf_fp F
+ __nldbl___snprintf_chk F
+ __nldbl___sprintf_chk F
+ __nldbl___strfmon_l F
+ __nldbl___swprintf_chk F
+ __nldbl___syslog_chk F
+ __nldbl___vfprintf_chk F
+ __nldbl___vfscanf F
+ __nldbl___vfwprintf_chk F
+ __nldbl___vprintf_chk F
+ __nldbl___vsnprintf F
+ __nldbl___vsnprintf_chk F
+ __nldbl___vsprintf_chk F
+ __nldbl___vsscanf F
+ __nldbl___vstrfmon F
+ __nldbl___vstrfmon_l F
+ __nldbl___vswprintf_chk F
+ __nldbl___vsyslog_chk F
+ __nldbl___vwprintf_chk F
+ __nldbl___wprintf_chk F
+ __nldbl_asprintf F
+ __nldbl_dprintf F
+ __nldbl_fprintf F
+ __nldbl_fscanf F
+ __nldbl_fwprintf F
+ __nldbl_fwscanf F
+ __nldbl_obstack_printf F
+ __nldbl_obstack_vprintf F
+ __nldbl_printf F
+ __nldbl_printf_size F
+ __nldbl_scanf F
+ __nldbl_snprintf F
+ __nldbl_sprintf F
+ __nldbl_sscanf F
+ __nldbl_strfmon F
+ __nldbl_strfmon_l F
+ __nldbl_swprintf F
+ __nldbl_swscanf F
+ __nldbl_syslog F
+ __nldbl_vasprintf F
+ __nldbl_vdprintf F
+ __nldbl_vfprintf F
+ __nldbl_vfscanf F
+ __nldbl_vfwprintf F
+ __nldbl_vfwscanf F
+ __nldbl_vprintf F
+ __nldbl_vscanf F
+ __nldbl_vsnprintf F
+ __nldbl_vsprintf F
+ __nldbl_vsscanf F
+ __nldbl_vswprintf F
+ __nldbl_vswscanf F
+ __nldbl_vsyslog F
+ __nldbl_vwprintf F
+ __nldbl_vwscanf F
+ __nldbl_wprintf F
+ __nldbl_wscanf F
+ __pread64_chk F
+ __pread_chk F
+ __printf_chk F
+ __printf_fp F
+ __ptsname_r_chk F
+ __read_chk F
+ __readlink_chk F
+ __realpath_chk F
+ __recv_chk F
+ __recvfrom_chk F
+ __signbitl F
+ __snprintf_chk F
+ __sprintf_chk F
+ __stack_chk_fail F
+ __stpncpy_chk F
+ __strfmon_l F
+ __strtold_internal F
+ __strtold_l F
+ __swprintf_chk F
+ __syslog_chk F
+ __ttyname_r_chk F
+ __unorddf2 F
+ __unordsf2 F
+ __vfprintf_chk F
+ __vfscanf F
+ __vfwprintf_chk F
+ __vprintf_chk F
+ __vsnprintf F
+ __vsnprintf_chk F
+ __vsprintf_chk F
+ __vsscanf F
+ __vswprintf_chk F
+ __vsyslog_chk F
+ __vwprintf_chk F
+ __wcpcpy_chk F
+ __wcpncpy_chk F
+ __wcrtomb_chk F
+ __wcscat_chk F
+ __wcscpy_chk F
+ __wcsncat_chk F
+ __wcsncpy_chk F
+ __wcsnrtombs_chk F
+ __wcsrtombs_chk F
+ __wcstold_internal F
+ __wcstold_l F
+ __wcstombs_chk F
+ __wctomb_chk F
+ __wmemcpy_chk F
+ __wmemmove_chk F
+ __wmempcpy_chk F
+ __wmemset_chk F
+ __wprintf_chk F
+ __xmknodat F
+ _sys_errlist D 0x210
+ _sys_nerr D 0x4
+ asprintf F
+ copysignl F
+ dprintf F
+ eaccess F
+ faccessat F
+ fchmodat F
+ fchownat F
+ fdopendir F
+ finitel F
+ fprintf F
+ frexpl F
+ fscanf F
+ futimesat F
+ fwprintf F
+ fwscanf F
+ inotify_add_watch F
+ inotify_init F
+ inotify_rm_watch F
+ isinfl F
+ isnanl F
+ ldexpl F
+ linkat F
+ mkdirat F
+ mkfifoat F
+ modfl F
+ obstack_printf F
+ obstack_vprintf F
+ open_wmemstream F
+ openat F
+ openat64 F
+ ppoll F
+ printf F
+ printf_size F
+ qecvt F
+ qecvt_r F
+ qfcvt F
+ qfcvt_r F
+ qgcvt F
+ readlinkat F
+ renameat F
+ scalbnl F
+ scanf F
+ snprintf F
+ sprintf F
+ sscanf F
+ strfmon F
+ strfmon_l F
+ strtold F
+ strtold_l F
+ swprintf F
+ swscanf F
+ symlinkat F
+ sys_errlist D 0x210
+ sys_nerr D 0x4
+ syslog F
+ unlinkat F
+ unshare F
+ vasprintf F
+ vdprintf F
+ vfprintf F
+ vfscanf F
+ vfwprintf F
+ vfwscanf F
+ vprintf F
+ vscanf F
+ vsnprintf F
+ vsprintf F
+ vsscanf F
+ vswprintf F
+ vswscanf F
+ vsyslog F
+ vwprintf F
+ vwscanf F
+ wcstold F
+ wcstold_l F
+ wprintf F
+ wscanf F
+GLIBC_2.5
+ GLIBC_2.5 A
+ __readlinkat_chk F
+ inet6_opt_append F
+ inet6_opt_find F
+ inet6_opt_finish F
+ inet6_opt_get_val F
+ inet6_opt_init F
+ inet6_opt_next F
+ inet6_opt_set_val F
+ inet6_rth_add F
+ inet6_rth_getaddr F
+ inet6_rth_init F
+ inet6_rth_reverse F
+ inet6_rth_segments F
+ inet6_rth_space F
+ splice F
+ tee F
+ vmsplice F
+GLIBC_2.6
+ GLIBC_2.6 A
+ __sched_cpucount F
+ epoll_pwait F
+ futimens F
+ sched_getcpu F
+ strerror_l F
+ sync_file_range F
+ utimensat F
+GLIBC_2.7
+ GLIBC_2.7 A
+ __fread_chk F
+ __fread_unlocked_chk F
+ __isoc99_fscanf F
+ __isoc99_fwscanf F
+ __isoc99_scanf F
+ __isoc99_sscanf F
+ __isoc99_swscanf F
+ __isoc99_vfscanf F
+ __isoc99_vfwscanf F
+ __isoc99_vscanf F
+ __isoc99_vsscanf F
+ __isoc99_vswscanf F
+ __isoc99_vwscanf F
+ __isoc99_wscanf F
+ __nldbl___isoc99_fscanf F
+ __nldbl___isoc99_fwscanf F
+ __nldbl___isoc99_scanf F
+ __nldbl___isoc99_sscanf F
+ __nldbl___isoc99_swscanf F
+ __nldbl___isoc99_vfscanf F
+ __nldbl___isoc99_vfwscanf F
+ __nldbl___isoc99_vscanf F
+ __nldbl___isoc99_vsscanf F
+ __nldbl___isoc99_vswscanf F
+ __nldbl___isoc99_vwscanf F
+ __nldbl___isoc99_wscanf F
+ __open64_2 F
+ __open_2 F
+ __openat64_2 F
+ __openat_2 F
+ __sched_cpualloc F
+ __sched_cpufree F
+ eventfd F
+ eventfd_read F
+ eventfd_write F
+ mkostemp F
+ mkostemp64 F
+ signalfd F
+GLIBC_2.8
+ GLIBC_2.8 A
+ __asprintf_chk F
+ __dprintf_chk F
+ __nldbl___asprintf_chk F
+ __nldbl___dprintf_chk F
+ __nldbl___obstack_printf_chk F
+ __nldbl___obstack_vprintf_chk F
+ __nldbl___vasprintf_chk F
+ __nldbl___vdprintf_chk F
+ __obstack_printf_chk F
+ __obstack_vprintf_chk F
+ __vasprintf_chk F
+ __vdprintf_chk F
+ qsort_r F
+ timerfd_create F
+ timerfd_gettime F
+ timerfd_settime F
+GLIBC_2.9
+ GLIBC_2.9 A
+ dup3 F
+ epoll_create1 F
+ inotify_init1 F
+ pipe2 F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libcrypt.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libcrypt.abilist
new file mode 100644
index 000000000..1df145f26
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libcrypt.abilist
@@ -0,0 +1,9 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ crypt F
+ crypt_r F
+ encrypt F
+ encrypt_r F
+ fcrypt F
+ setkey F
+ setkey_r F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libdl.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libdl.abilist
new file mode 100644
index 000000000..62e6b41ed
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libdl.abilist
@@ -0,0 +1,18 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ dladdr F
+ dlclose F
+ dlerror F
+ dlopen F
+ dlsym F
+GLIBC_2.1
+ GLIBC_2.1 A
+ dlopen F
+ dlvsym F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ dladdr1 F
+ dlinfo F
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ dlmopen F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libm.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libm.abilist
new file mode 100644
index 000000000..9bd593c0e
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libm.abilist
@@ -0,0 +1,519 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ _LIB_VERSION D 0x4
+ acos F
+ acosf F
+ acosh F
+ acoshf F
+ acoshl F
+ acosl F
+ asin F
+ asinf F
+ asinh F
+ asinhf F
+ asinhl F
+ asinl F
+ atan F
+ atan2 F
+ atan2f F
+ atan2l F
+ atanf F
+ atanh F
+ atanhf F
+ atanhl F
+ atanl F
+ cbrt F
+ cbrtf F
+ cbrtl F
+ ceil F
+ ceilf F
+ ceill F
+ copysign F
+ copysignf F
+ copysignl F
+ cos F
+ cosf F
+ cosh F
+ coshf F
+ coshl F
+ cosl F
+ drem F
+ dremf F
+ dreml F
+ erf F
+ erfc F
+ erfcf F
+ erfcl F
+ erff F
+ erfl F
+ exp F
+ expf F
+ expl F
+ expm1 F
+ expm1f F
+ expm1l F
+ fabs F
+ fabsf F
+ fabsl F
+ finite F
+ finitef F
+ finitel F
+ floor F
+ floorf F
+ floorl F
+ fmod F
+ fmodf F
+ fmodl F
+ frexp F
+ frexpf F
+ frexpl F
+ gamma F
+ gammaf F
+ gammal F
+ hypot F
+ hypotf F
+ hypotl F
+ ilogb F
+ ilogbf F
+ ilogbl F
+ j0 F
+ j0f F
+ j0l F
+ j1 F
+ j1f F
+ j1l F
+ jn F
+ jnf F
+ jnl F
+ ldexp F
+ ldexpf F
+ ldexpl F
+ lgamma F
+ lgamma_r F
+ lgammaf F
+ lgammaf_r F
+ lgammal F
+ lgammal_r F
+ log F
+ log10 F
+ log10f F
+ log10l F
+ log1p F
+ log1pf F
+ log1pl F
+ logb F
+ logbf F
+ logbl F
+ logf F
+ logl F
+ matherr F
+ modf F
+ modff F
+ modfl F
+ nextafter F
+ nextafterf F
+ nextafterl F
+ pow F
+ powf F
+ powl F
+ remainder F
+ remainderf F
+ remainderl F
+ rint F
+ rintf F
+ rintl F
+ scalb F
+ scalbf F
+ scalbl F
+ scalbn F
+ scalbnf F
+ scalbnl F
+ signgam D 0x4
+ significand F
+ significandf F
+ significandl F
+ sin F
+ sinf F
+ sinh F
+ sinhf F
+ sinhl F
+ sinl F
+ sqrt F
+ sqrtf F
+ sqrtl F
+ tan F
+ tanf F
+ tanh F
+ tanhf F
+ tanhl F
+ tanl F
+ y0 F
+ y0f F
+ y0l F
+ y1 F
+ y1f F
+ y1l F
+ yn F
+ ynf F
+ ynl F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __clog10 F
+ __clog10f F
+ __clog10l F
+ __fe_dfl_env D 0x8
+ __fe_enabled_env D 0x8
+ __fe_nonieee_env D 0x8
+ __finite F
+ __finitef F
+ __finitel F
+ __fpclassify F
+ __fpclassifyf F
+ __signbit F
+ __signbitf F
+ cabs F
+ cabsf F
+ cabsl F
+ cacos F
+ cacosf F
+ cacosh F
+ cacoshf F
+ cacoshl F
+ cacosl F
+ carg F
+ cargf F
+ cargl F
+ casin F
+ casinf F
+ casinh F
+ casinhf F
+ casinhl F
+ casinl F
+ catan F
+ catanf F
+ catanh F
+ catanhf F
+ catanhl F
+ catanl F
+ ccos F
+ ccosf F
+ ccosh F
+ ccoshf F
+ ccoshl F
+ ccosl F
+ cexp F
+ cexpf F
+ cexpl F
+ cimag F
+ cimagf F
+ cimagl F
+ clog F
+ clog10 F
+ clog10f F
+ clog10l F
+ clogf F
+ clogl F
+ conj F
+ conjf F
+ conjl F
+ cpow F
+ cpowf F
+ cpowl F
+ cproj F
+ cprojf F
+ cprojl F
+ creal F
+ crealf F
+ creall F
+ csin F
+ csinf F
+ csinh F
+ csinhf F
+ csinhl F
+ csinl F
+ csqrt F
+ csqrtf F
+ csqrtl F
+ ctan F
+ ctanf F
+ ctanh F
+ ctanhf F
+ ctanhl F
+ ctanl F
+ exp10 F
+ exp10f F
+ exp10l F
+ exp2 F
+ exp2f F
+ fdim F
+ fdimf F
+ fdiml F
+ feclearexcept F
+ fegetenv F
+ fegetexceptflag F
+ fegetround F
+ feholdexcept F
+ feraiseexcept F
+ fesetenv F
+ fesetexceptflag F
+ fesetround F
+ fetestexcept F
+ feupdateenv F
+ fma F
+ fmaf F
+ fmal F
+ fmax F
+ fmaxf F
+ fmaxl F
+ fmin F
+ fminf F
+ fminl F
+ llrint F
+ llrintf F
+ llrintl F
+ llround F
+ llroundf F
+ llroundl F
+ log2 F
+ log2f F
+ log2l F
+ lrint F
+ lrintf F
+ lrintl F
+ lround F
+ lroundf F
+ lroundl F
+ nan F
+ nanf F
+ nanl F
+ nearbyint F
+ nearbyintf F
+ nearbyintl F
+ nexttoward F
+ nexttowardf F
+ nexttowardl F
+ pow10 F
+ pow10f F
+ pow10l F
+ remquo F
+ remquof F
+ remquol F
+ round F
+ roundf F
+ roundl F
+ scalbln F
+ scalblnf F
+ scalblnl F
+ sincos F
+ sincosf F
+ sincosl F
+ tgamma F
+ tgammaf F
+ tgammal F
+ trunc F
+ truncf F
+ truncl F
+GLIBC_2.15
+ GLIBC_2.15 A
+ __acos_finite F
+ __acosf_finite F
+ __acosh_finite F
+ __acoshf_finite F
+ __acoshl_finite F
+ __acosl_finite F
+ __asin_finite F
+ __asinf_finite F
+ __asinl_finite F
+ __atan2_finite F
+ __atan2f_finite F
+ __atan2l_finite F
+ __atanh_finite F
+ __atanhf_finite F
+ __atanhl_finite F
+ __cosh_finite F
+ __coshf_finite F
+ __coshl_finite F
+ __exp10_finite F
+ __exp10f_finite F
+ __exp10l_finite F
+ __exp2_finite F
+ __exp2f_finite F
+ __exp2l_finite F
+ __exp_finite F
+ __expf_finite F
+ __expl_finite F
+ __fmod_finite F
+ __fmodf_finite F
+ __fmodl_finite F
+ __gamma_r_finite F
+ __gammaf_r_finite F
+ __gammal_r_finite F
+ __hypot_finite F
+ __hypotf_finite F
+ __hypotl_finite F
+ __j0_finite F
+ __j0f_finite F
+ __j0l_finite F
+ __j1_finite F
+ __j1f_finite F
+ __j1l_finite F
+ __jn_finite F
+ __jnf_finite F
+ __jnl_finite F
+ __lgamma_r_finite F
+ __lgammaf_r_finite F
+ __lgammal_r_finite F
+ __log10_finite F
+ __log10f_finite F
+ __log10l_finite F
+ __log2_finite F
+ __log2f_finite F
+ __log2l_finite F
+ __log_finite F
+ __logf_finite F
+ __logl_finite F
+ __pow_finite F
+ __powf_finite F
+ __powl_finite F
+ __remainder_finite F
+ __remainderf_finite F
+ __remainderl_finite F
+ __scalb_finite F
+ __scalbf_finite F
+ __scalbl_finite F
+ __sinh_finite F
+ __sinhf_finite F
+ __sinhl_finite F
+ __sqrt_finite F
+ __sqrtf_finite F
+ __sqrtl_finite F
+ __y0_finite F
+ __y0f_finite F
+ __y0l_finite F
+ __y1_finite F
+ __y1f_finite F
+ __y1l_finite F
+ __yn_finite F
+ __ynf_finite F
+ __ynl_finite F
+GLIBC_2.18
+ GLIBC_2.18 A
+ __issignaling F
+ __issignalingf F
+ __issignalingl F
+GLIBC_2.2
+ GLIBC_2.2 A
+ feclearexcept F
+ fedisableexcept F
+ feenableexcept F
+ fegetenv F
+ fegetexcept F
+ fegetexceptflag F
+ feraiseexcept F
+ fesetenv F
+ fesetexceptflag F
+ feupdateenv F
+GLIBC_2.4
+ GLIBC_2.4 A
+ __clog10l F
+ __finitel F
+ __fpclassifyl F
+ __nldbl_nexttowardf F
+ __signbitl F
+ acoshl F
+ acosl F
+ asinhl F
+ asinl F
+ atan2l F
+ atanhl F
+ atanl F
+ cabsl F
+ cacoshl F
+ cacosl F
+ cargl F
+ casinhl F
+ casinl F
+ catanhl F
+ catanl F
+ cbrtl F
+ ccoshl F
+ ccosl F
+ ceill F
+ cexpl F
+ cimagl F
+ clog10l F
+ clogl F
+ conjl F
+ copysignl F
+ coshl F
+ cosl F
+ cpowl F
+ cprojl F
+ creall F
+ csinhl F
+ csinl F
+ csqrtl F
+ ctanhl F
+ ctanl F
+ dreml F
+ erfcl F
+ erfl F
+ exp10l F
+ exp2l F
+ expl F
+ expm1l F
+ fabsl F
+ fdiml F
+ finitel F
+ floorl F
+ fmal F
+ fmaxl F
+ fminl F
+ fmodl F
+ frexpl F
+ gammal F
+ hypotl F
+ ilogbl F
+ j0l F
+ j1l F
+ jnl F
+ ldexpl F
+ lgammal F
+ lgammal_r F
+ llrintl F
+ llroundl F
+ log10l F
+ log1pl F
+ log2l F
+ logbl F
+ logl F
+ lrintl F
+ lroundl F
+ modfl F
+ nanl F
+ nearbyintl F
+ nextafterl F
+ nexttoward F
+ nexttowardf F
+ nexttowardl F
+ pow10l F
+ powl F
+ remainderl F
+ remquol F
+ rintl F
+ roundl F
+ scalbl F
+ scalblnl F
+ scalbnl F
+ significandl F
+ sincosl F
+ sinhl F
+ sinl F
+ sqrtl F
+ tanhl F
+ tanl F
+ tgammal F
+ truncl F
+ y0l F
+ y1l F
+ ynl F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libnsl.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libnsl.abilist
new file mode 100644
index 000000000..4241e2d88
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libnsl.abilist
@@ -0,0 +1,127 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __yp_check F
+ xdr_domainname F
+ xdr_keydat F
+ xdr_mapname F
+ xdr_peername F
+ xdr_valdat F
+ xdr_yp_buf F
+ xdr_ypbind_binding F
+ xdr_ypbind_resp F
+ xdr_ypbind_resptype F
+ xdr_ypbind_setdom F
+ xdr_ypdelete_args F
+ xdr_ypmap_parms F
+ xdr_ypmaplist F
+ xdr_yppush_status F
+ xdr_yppushresp_xfr F
+ xdr_ypreq_key F
+ xdr_ypreq_nokey F
+ xdr_ypreq_xfr F
+ xdr_ypresp_all F
+ xdr_ypresp_key_val F
+ xdr_ypresp_maplist F
+ xdr_ypresp_master F
+ xdr_ypresp_order F
+ xdr_ypresp_val F
+ xdr_ypresp_xfr F
+ xdr_ypstat F
+ xdr_ypupdate_args F
+ xdr_ypxfrstat F
+ yp_all F
+ yp_bind F
+ yp_first F
+ yp_get_default_domain F
+ yp_maplist F
+ yp_master F
+ yp_match F
+ yp_next F
+ yp_order F
+ yp_unbind F
+ yp_update F
+ ypbinderr_string F
+ yperr_string F
+ ypprot_err F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __free_fdresult F
+ __nis_default_access F
+ __nis_default_group F
+ __nis_default_owner F
+ __nis_default_ttl F
+ __nis_finddirectory F
+ __nis_hash F
+ __nisbind_connect F
+ __nisbind_create F
+ __nisbind_destroy F
+ __nisbind_next F
+ nis_add F
+ nis_add_entry F
+ nis_addmember F
+ nis_checkpoint F
+ nis_clone_directory F
+ nis_clone_object F
+ nis_clone_result F
+ nis_creategroup F
+ nis_destroy_object F
+ nis_destroygroup F
+ nis_dir_cmp F
+ nis_domain_of F
+ nis_domain_of_r F
+ nis_first_entry F
+ nis_free_directory F
+ nis_free_object F
+ nis_free_request F
+ nis_freenames F
+ nis_freeresult F
+ nis_freeservlist F
+ nis_freetags F
+ nis_getnames F
+ nis_getservlist F
+ nis_ismember F
+ nis_leaf_of F
+ nis_leaf_of_r F
+ nis_lerror F
+ nis_list F
+ nis_local_directory F
+ nis_local_group F
+ nis_local_host F
+ nis_local_principal F
+ nis_lookup F
+ nis_mkdir F
+ nis_modify F
+ nis_modify_entry F
+ nis_name_of F
+ nis_name_of_r F
+ nis_next_entry F
+ nis_perror F
+ nis_ping F
+ nis_print_directory F
+ nis_print_entry F
+ nis_print_group F
+ nis_print_group_entry F
+ nis_print_link F
+ nis_print_object F
+ nis_print_result F
+ nis_print_rights F
+ nis_print_table F
+ nis_read_obj F
+ nis_remove F
+ nis_remove_entry F
+ nis_removemember F
+ nis_rmdir F
+ nis_servstate F
+ nis_sperrno F
+ nis_sperror F
+ nis_sperror_r F
+ nis_stats F
+ nis_verifygroup F
+ nis_write_obj F
+ readColdStartFile F
+ writeColdStartFile F
+ xdr_cback_data F
+ xdr_obj_p F
+GLIBC_2.2
+ GLIBC_2.2 A
+ xdr_ypall F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libpthread.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libpthread.abilist
new file mode 100644
index 000000000..c8a2a0471
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libpthread.abilist
@@ -0,0 +1,277 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ _IO_flockfile F
+ _IO_ftrylockfile F
+ _IO_funlockfile F
+ __close F
+ __connect F
+ __errno_location F
+ __fcntl F
+ __fork F
+ __h_errno_location F
+ __lseek F
+ __open F
+ __pthread_getspecific F
+ __pthread_key_create F
+ __pthread_mutex_destroy F
+ __pthread_mutex_init F
+ __pthread_mutex_lock F
+ __pthread_mutex_trylock F
+ __pthread_mutex_unlock F
+ __pthread_mutexattr_destroy F
+ __pthread_mutexattr_init F
+ __pthread_mutexattr_settype F
+ __pthread_once F
+ __pthread_setspecific F
+ __read F
+ __send F
+ __sigaction F
+ __wait F
+ __write F
+ _pthread_cleanup_pop F
+ _pthread_cleanup_pop_restore F
+ _pthread_cleanup_push F
+ _pthread_cleanup_push_defer F
+ accept F
+ close F
+ connect F
+ fcntl F
+ flockfile F
+ fork F
+ fsync F
+ ftrylockfile F
+ funlockfile F
+ longjmp F
+ lseek F
+ msync F
+ nanosleep F
+ open F
+ pause F
+ pthread_atfork F
+ pthread_attr_destroy F
+ pthread_attr_getdetachstate F
+ pthread_attr_getinheritsched F
+ pthread_attr_getschedparam F
+ pthread_attr_getschedpolicy F
+ pthread_attr_getscope F
+ pthread_attr_init F
+ pthread_attr_setdetachstate F
+ pthread_attr_setinheritsched F
+ pthread_attr_setschedparam F
+ pthread_attr_setschedpolicy F
+ pthread_attr_setscope F
+ pthread_cancel F
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+ pthread_condattr_destroy F
+ pthread_condattr_init F
+ pthread_create F
+ pthread_detach F
+ pthread_equal F
+ pthread_exit F
+ pthread_getschedparam F
+ pthread_getspecific F
+ pthread_join F
+ pthread_key_create F
+ pthread_key_delete F
+ pthread_kill F
+ pthread_kill_other_threads_np F
+ pthread_mutex_destroy F
+ pthread_mutex_init F
+ pthread_mutex_lock F
+ pthread_mutex_trylock F
+ pthread_mutex_unlock F
+ pthread_mutexattr_destroy F
+ pthread_mutexattr_getkind_np F
+ pthread_mutexattr_init F
+ pthread_mutexattr_setkind_np F
+ pthread_once F
+ pthread_self F
+ pthread_setcancelstate F
+ pthread_setcanceltype F
+ pthread_setschedparam F
+ pthread_setspecific F
+ pthread_sigmask F
+ pthread_testcancel F
+ raise F
+ read F
+ recv F
+ recvfrom F
+ recvmsg F
+ sem_destroy F
+ sem_getvalue F
+ sem_init F
+ sem_post F
+ sem_trywait F
+ sem_wait F
+ send F
+ sendmsg F
+ sendto F
+ sigaction F
+ siglongjmp F
+ sigwait F
+ system F
+ tcdrain F
+ vfork F
+ wait F
+ waitpid F
+ write F
+GLIBC_2.1
+ GLIBC_2.1 A
+ __libc_allocate_rtsig F
+ __libc_current_sigrtmax F
+ __libc_current_sigrtmin F
+ pthread_attr_getguardsize F
+ pthread_attr_getstackaddr F
+ pthread_attr_getstacksize F
+ pthread_attr_init F
+ pthread_attr_setguardsize F
+ pthread_attr_setstackaddr F
+ pthread_attr_setstacksize F
+ pthread_create F
+ pthread_getconcurrency F
+ pthread_mutexattr_gettype F
+ pthread_mutexattr_settype F
+ pthread_rwlock_destroy F
+ pthread_rwlock_init F
+ pthread_rwlock_rdlock F
+ pthread_rwlock_tryrdlock F
+ pthread_rwlock_trywrlock F
+ pthread_rwlock_unlock F
+ pthread_rwlock_wrlock F
+ pthread_rwlockattr_destroy F
+ pthread_rwlockattr_getkind_np F
+ pthread_rwlockattr_getpshared F
+ pthread_rwlockattr_init F
+ pthread_rwlockattr_setkind_np F
+ pthread_rwlockattr_setpshared F
+ pthread_setconcurrency F
+ sem_destroy F
+ sem_getvalue F
+ sem_init F
+ sem_post F
+ sem_trywait F
+ sem_wait F
+GLIBC_2.1.1
+ GLIBC_2.1.1 A
+ sem_close F
+ sem_open F
+ sem_unlink F
+GLIBC_2.1.2
+ GLIBC_2.1.2 A
+ __vfork F
+GLIBC_2.11
+ GLIBC_2.11 A
+ pthread_sigqueue F
+GLIBC_2.12
+ GLIBC_2.12 A
+ pthread_getname_np F
+ pthread_mutex_consistent F
+ pthread_mutexattr_getrobust F
+ pthread_mutexattr_setrobust F
+ pthread_setname_np F
+GLIBC_2.18
+ GLIBC_2.18 A
+ pthread_getattr_default_np F
+ pthread_setattr_default_np F
+GLIBC_2.2
+ GLIBC_2.2 A
+ __open64 F
+ __pread64 F
+ __pthread_rwlock_destroy F
+ __pthread_rwlock_init F
+ __pthread_rwlock_rdlock F
+ __pthread_rwlock_tryrdlock F
+ __pthread_rwlock_trywrlock F
+ __pthread_rwlock_unlock F
+ __pthread_rwlock_wrlock F
+ __pwrite64 F
+ __res_state F
+ lseek64 F
+ open64 F
+ pread F
+ pread64 F
+ pthread_attr_getstack F
+ pthread_attr_setstack F
+ pthread_barrier_destroy F
+ pthread_barrier_init F
+ pthread_barrier_wait F
+ pthread_barrierattr_destroy F
+ pthread_barrierattr_init F
+ pthread_barrierattr_setpshared F
+ pthread_condattr_getpshared F
+ pthread_condattr_setpshared F
+ pthread_getcpuclockid F
+ pthread_mutex_timedlock F
+ pthread_mutexattr_getpshared F
+ pthread_mutexattr_setpshared F
+ pthread_rwlock_timedrdlock F
+ pthread_rwlock_timedwrlock F
+ pthread_spin_destroy F
+ pthread_spin_init F
+ pthread_spin_lock F
+ pthread_spin_trylock F
+ pthread_spin_unlock F
+ pthread_yield F
+ pwrite F
+ pwrite64 F
+ sem_timedwait F
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ pthread_getattr_np F
+GLIBC_2.2.6
+ GLIBC_2.2.6 A
+ __nanosleep F
+GLIBC_2.3.2
+ GLIBC_2.3.2 A
+ pthread_cond_broadcast F
+ pthread_cond_destroy F
+ pthread_cond_init F
+ pthread_cond_signal F
+ pthread_cond_timedwait F
+ pthread_cond_wait F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ __pthread_cleanup_routine F
+ __pthread_register_cancel F
+ __pthread_register_cancel_defer F
+ __pthread_unregister_cancel F
+ __pthread_unregister_cancel_restore F
+ __pthread_unwind_next F
+ pthread_attr_getaffinity_np F
+ pthread_attr_setaffinity_np F
+ pthread_barrierattr_getpshared F
+ pthread_condattr_getclock F
+ pthread_condattr_setclock F
+ pthread_getaffinity_np F
+ pthread_setaffinity_np F
+ pthread_timedjoin_np F
+ pthread_tryjoin_np F
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ longjmp F
+ pthread_attr_getaffinity_np F
+ pthread_attr_setaffinity_np F
+ pthread_getaffinity_np F
+ pthread_setaffinity_np F
+ pthread_setschedprio F
+ siglongjmp F
+GLIBC_2.4
+ GLIBC_2.4 A
+ pthread_mutex_consistent_np F
+ pthread_mutex_getprioceiling F
+ pthread_mutex_setprioceiling F
+ pthread_mutexattr_getprioceiling F
+ pthread_mutexattr_getprotocol F
+ pthread_mutexattr_getrobust_np F
+ pthread_mutexattr_setprioceiling F
+ pthread_mutexattr_setprotocol F
+ pthread_mutexattr_setrobust_np F
+GLIBC_2.6
+ GLIBC_2.6 A
+ pthread_attr_setstack F
+ pthread_attr_setstacksize F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libresolv.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libresolv.abilist
new file mode 100644
index 000000000..f68333d4a
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libresolv.abilist
@@ -0,0 +1,104 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ __b64_ntop F
+ __b64_pton F
+ __dn_comp F
+ __dn_count_labels F
+ __dn_skipname F
+ __fp_nquery F
+ __fp_query F
+ __fp_resstat F
+ __hostalias F
+ __loc_aton F
+ __loc_ntoa F
+ __p_cdname F
+ __p_cdnname F
+ __p_class F
+ __p_class_syms D 0x54
+ __p_fqname F
+ __p_fqnname F
+ __p_option F
+ __p_query F
+ __p_secstodate F
+ __p_time F
+ __p_type F
+ __p_type_syms D 0x228
+ __putlong F
+ __putshort F
+ __res_close F
+ __res_dnok F
+ __res_hnok F
+ __res_isourserver F
+ __res_mailok F
+ __res_nameinquery F
+ __res_ownok F
+ __res_queriesmatch F
+ __res_send F
+ __sym_ntop F
+ __sym_ntos F
+ __sym_ston F
+ _gethtbyaddr F
+ _gethtbyname F
+ _gethtbyname2 F
+ _gethtent F
+ _getlong F
+ _getshort F
+ _res_opcodes D 0x40
+ _sethtent F
+ dn_expand F
+ inet_net_ntop F
+ inet_net_pton F
+ inet_neta F
+ res_gethostbyaddr F
+ res_gethostbyname F
+ res_gethostbyname2 F
+ res_mkquery F
+ res_query F
+ res_querydomain F
+ res_search F
+ res_send_setqhook F
+ res_send_setrhook F
+GLIBC_2.2
+ GLIBC_2.2 A
+ __dn_expand F
+ __res_hostalias F
+ __res_mkquery F
+ __res_nmkquery F
+ __res_nquery F
+ __res_nquerydomain F
+ __res_nsearch F
+ __res_nsend F
+ __res_query F
+ __res_querydomain F
+ __res_search F
+GLIBC_2.3.2
+ GLIBC_2.3.2 A
+ __p_rcode F
+GLIBC_2.9
+ GLIBC_2.9 A
+ ns_datetosecs F
+ ns_format_ttl F
+ ns_get16 F
+ ns_get32 F
+ ns_initparse F
+ ns_makecanon F
+ ns_msg_getflag F
+ ns_name_compress F
+ ns_name_ntol F
+ ns_name_ntop F
+ ns_name_pack F
+ ns_name_pton F
+ ns_name_rollback F
+ ns_name_skip F
+ ns_name_uncompress F
+ ns_name_unpack F
+ ns_parse_ttl F
+ ns_parserr F
+ ns_put16 F
+ ns_put32 F
+ ns_samedomain F
+ ns_samename F
+ ns_skiprr F
+ ns_sprintrr F
+ ns_sprintrrf F
+ ns_subdomain F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/librt.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/librt.abilist
new file mode 100644
index 000000000..af7df27cb
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/librt.abilist
@@ -0,0 +1,52 @@
+GLIBC_2.1
+ GLIBC_2.1 A
+ aio_cancel F
+ aio_cancel64 F
+ aio_error F
+ aio_error64 F
+ aio_fsync F
+ aio_fsync64 F
+ aio_init F
+ aio_read F
+ aio_read64 F
+ aio_return F
+ aio_return64 F
+ aio_suspend F
+ aio_suspend64 F
+ aio_write F
+ aio_write64 F
+ lio_listio F
+ lio_listio64 F
+GLIBC_2.2
+ GLIBC_2.2 A
+ clock_getcpuclockid F
+ clock_getres F
+ clock_gettime F
+ clock_nanosleep F
+ clock_settime F
+ shm_open F
+ shm_unlink F
+ timer_create F
+ timer_delete F
+ timer_getoverrun F
+ timer_gettime F
+ timer_settime F
+GLIBC_2.3.4
+ GLIBC_2.3.4 A
+ mq_close F
+ mq_getattr F
+ mq_notify F
+ mq_open F
+ mq_receive F
+ mq_send F
+ mq_setattr F
+ mq_timedreceive F
+ mq_timedsend F
+ mq_unlink F
+GLIBC_2.4
+ GLIBC_2.4 A
+ lio_listio F
+ lio_listio64 F
+GLIBC_2.7
+ GLIBC_2.7 A
+ __mq_open_2 F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libthread_db.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libthread_db.abilist
new file mode 100644
index 000000000..f33138067
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libthread_db.abilist
@@ -0,0 +1,48 @@
+GLIBC_2.1.3
+ GLIBC_2.1.3 A
+ td_init F
+ td_log F
+ td_ta_clear_event F
+ td_ta_delete F
+ td_ta_enable_stats F
+ td_ta_event_addr F
+ td_ta_event_getmsg F
+ td_ta_get_nthreads F
+ td_ta_get_ph F
+ td_ta_get_stats F
+ td_ta_map_id2thr F
+ td_ta_map_lwp2thr F
+ td_ta_new F
+ td_ta_reset_stats F
+ td_ta_set_event F
+ td_ta_setconcurrency F
+ td_ta_thr_iter F
+ td_ta_tsd_iter F
+ td_thr_clear_event F
+ td_thr_dbresume F
+ td_thr_dbsuspend F
+ td_thr_event_enable F
+ td_thr_event_getmsg F
+ td_thr_get_info F
+ td_thr_getfpregs F
+ td_thr_getgregs F
+ td_thr_getxregs F
+ td_thr_getxregsize F
+ td_thr_set_event F
+ td_thr_setfpregs F
+ td_thr_setgregs F
+ td_thr_setprio F
+ td_thr_setsigpending F
+ td_thr_setxregs F
+ td_thr_sigsetmask F
+ td_thr_tsd F
+ td_thr_validate F
+GLIBC_2.2.3
+ GLIBC_2.2.3 A
+ td_symbol_list F
+GLIBC_2.3
+ GLIBC_2.3 A
+ td_thr_tls_get_addr F
+GLIBC_2.3.3
+ GLIBC_2.3.3 A
+ td_thr_tlsbase F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libutil.abilist b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libutil.abilist
new file mode 100644
index 000000000..7422687e3
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/libutil.abilist
@@ -0,0 +1,8 @@
+GLIBC_2.0
+ GLIBC_2.0 A
+ forkpty F
+ login F
+ login_tty F
+ logout F
+ logwtmp F
+ openpty F
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/localplt.data b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/localplt.data
new file mode 100644
index 000000000..b87936cb3
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/nptl/localplt.data
@@ -0,0 +1,41 @@
+libc.so: _Unwind_Find_FDE
+libc.so: __adddf3 ?
+libc.so: __addsf3 ?
+libc.so: __divdf3 ?
+libc.so: __divsf3 ?
+libc.so: __eqdf2 ?
+libc.so: __eqsf2 ?
+libc.so: __extendsfdf2 ?
+libc.so: __fixdfsi ?
+libc.so: __fixsfsi ?
+libc.so: __fixunsdfsi ?
+libc.so: __floatsidf ?
+libc.so: __floatsisf ?
+libc.so: __floatunsidf ?
+libc.so: __floatunsisf ?
+libc.so: __gedf2 ?
+libc.so: __gtdf2 ?
+libc.so: __ledf2 ?
+libc.so: __ltdf2 ?
+libc.so: __muldf3 ?
+libc.so: __mulsf3 ?
+libc.so: __nedf2 ?
+libc.so: __signbit
+libc.so: __signbitl
+libc.so: __subdf3 ?
+libc.so: __subsf3 ?
+libc.so: __truncdfsf2 ?
+libc.so: __unorddf2 ?
+libc.so: abort ?
+libc.so: calloc
+libc.so: free
+libc.so: malloc
+libc.so: memalign
+libc.so: realloc
+libm.so: __signbit
+libm.so: __signbitf
+libm.so: __signbitl
+libm.so: copysignl ?
+libm.so: fabsl
+libm.so: fegetround
+libm.so: matherr
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/setcontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/setcontext.S
new file mode 100644
index 000000000..5f8653ffb
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/setcontext.S
@@ -0,0 +1,60 @@
+/* Jump to a new context.
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+#include <shlib-compat.h>
+
+#define __ASSEMBLY__
+#include <asm/ptrace.h>
+#include "ucontext_i.h"
+
+#include <context-e500.h>
+
+#define __CONTEXT_FUNC_NAME __setcontext
+#undef __CONTEXT_ENABLE_FPRS
+#undef __CONTEXT_ENABLE_VRS
+
+#include "setcontext-common.S"
+
+versioned_symbol (libc, __setcontext, setcontext, GLIBC_2_3_4)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
+
+/* For the nofpu case the old/new versions are the same function.  */
+strong_alias (__setcontext, __novec_setcontext)
+
+compat_symbol (libc, __novec_setcontext, setcontext, GLIBC_2_3_3)
+
+#endif
+
+#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_3)
+
+#define _ERRNO_H	1
+#include <bits/errno.h>
+
+	compat_text_section
+ENTRY (__setcontext_stub)
+	li	r3,ENOSYS
+	b	__syscall_error@local
+END (__setcontext_stub)
+	.previous
+
+compat_symbol (libc, __setcontext_stub, setcontext, GLIBC_2_0)
+
+#endif
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/swapcontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/swapcontext.S
new file mode 100644
index 000000000..de6d56f96
--- /dev/null
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/nofpu/swapcontext.S
@@ -0,0 +1,60 @@
+/* Save current context and jump to a new context.
+   Copyright (C) 2002-2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <rtld-global-offsets.h>
+#include <shlib-compat.h>
+
+#define __ASSEMBLY__
+#include <asm/ptrace.h>
+#include "ucontext_i.h"
+
+#include <context-e500.h>
+
+#define __CONTEXT_FUNC_NAME __swapcontext
+#undef __CONTEXT_ENABLE_FPRS
+#undef __CONTEXT_ENABLE_VRS
+
+# include "swapcontext-common.S"
+
+versioned_symbol (libc, __swapcontext, swapcontext, GLIBC_2_3_4)
+
+#if SHLIB_COMPAT (libc, GLIBC_2_3_3, GLIBC_2_3_4)
+
+/* For the nofpu case the old/new versions are the same function.  */
+strong_alias (__swapcontext, __novec_swapcontext)
+
+compat_symbol (libc, __novec_swapcontext, swapcontext, GLIBC_2_3_3)
+
+#endif
+
+#if SHLIB_COMPAT (libc, GLIBC_2_1, GLIBC_2_3_3)
+
+#define _ERRNO_H	1
+#include <bits/errno.h>
+
+	compat_text_section
+ENTRY (__swapcontext_stub)
+	li	r3,ENOSYS
+	b	__syscall_error@local
+END (__swapcontext_stub)
+	.previous
+
+compat_symbol (libc, __swapcontext_stub, swapcontext, GLIBC_2_1)
+
+#endif
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
index fb7744839..6525cf742 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext-common.S
@@ -79,15 +79,15 @@ ENTRY(__CONTEXT_FUNC_NAME)
 # ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
 	mtlr    r8
-	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
 # else
 	lwz     r7,_dl_hwcap@got(r7)
 	mtlr    r8
-	lwz     r7,4(r7)
+	lwz     r7,LOWORD(r7)
 # endif
 #else
-	lis	r7,(_dl_hwcap+4)@ha
-	lwz     r7,(_dl_hwcap+4)@l(r7)
+	lis	r7,(_dl_hwcap+LOWORD)@ha
+	lwz     r7,(_dl_hwcap+LOWORD)@l(r7)
 #endif
 
 #ifdef __CONTEXT_ENABLE_FPRS
@@ -243,8 +243,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
 	lfd	fp31,_UC_FREGS+(31*8)(r31)
 #endif /* __CONTEXT_ENABLE_FPRS */
 
-#ifdef __SETCONTEXT_EXTRA
-	__SETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+	setcontext_e500
 #endif
 
 	/* Restore LR and CCR, and set CTR to the NIP value */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
index 7e0612595..caa5b8932 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext-common.S
@@ -152,15 +152,15 @@ ENTRY(__CONTEXT_FUNC_NAME)
 #  ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
 	mtlr    r8
-	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
 #  else
 	lwz     r7,_dl_hwcap@got(r7)
 	mtlr    r8
-	lwz     r7,4(r7)
+	lwz     r7,LOWORD(r7)
 #  endif
 # else
-	lis	r7,(_dl_hwcap+4)@ha
-	lwz     r7,(_dl_hwcap+4)@l(r7)
+	lis	r7,(_dl_hwcap+LOWORD)@ha
+	lwz     r7,(_dl_hwcap+LOWORD)@l(r7)
 # endif
 
 # ifdef __CONTEXT_ENABLE_VRS
@@ -265,8 +265,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
 # endif /* __CONTEXT_ENABLE_VRS */
 #endif /* __CONTEXT_ENABLE_FPRS */
 
-#ifdef __GETCONTEXT_EXTRA
-	__GETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+	getcontext_e500
 #endif
 
 /* Restore ucontext (parm1) from stack.  */
@@ -312,14 +312,14 @@ ENTRY(__CONTEXT_FUNC_NAME)
 	mtlr    r8
 #   ifdef SHARED
 	lwz     r7,_rtld_global_ro@got(r7)
-	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r7)
+	lwz     r7,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+LOWORD(r7)
 #   else
 	lwz     r7,_dl_hwcap@got(r7)
-	lwz     r7,4(r7)
+	lwz     r7,LOWORD(r7)
 #   endif
 #  else
-	lis	r7,(_dl_hwcap+4)@ha
-	lwz     r7,(_dl_hwcap+4)@l(r7)
+	lis	r7,(_dl_hwcap+LOWORD)@ha
+	lwz     r7,(_dl_hwcap+LOWORD)@l(r7)
 #  endif
 	andis.	r7,r7,(PPC_FEATURE_HAS_ALTIVEC >> 16)
 	la	r10,(_UC_VREGS)(r31)
@@ -472,8 +472,8 @@ ENTRY(__CONTEXT_FUNC_NAME)
 	lfd	fp31,_UC_FREGS+(31*8)(r31)
 #endif /* __CONTEXT_ENABLE_FPRS */
 
-#ifdef __SETCONTEXT_EXTRA
-	__SETCONTEXT_EXTRA
+#ifdef __CONTEXT_ENABLE_E500
+	setcontext_e500
 #endif
 
 	/* Restore LR and CCR, and set CTR to the NIP value */
diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
index 4a1666938..32fc47c3f 100644
--- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
+++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S
@@ -124,8 +124,10 @@ L(noparms):
 
   /* If the target function returns we need to do some cleanup.  We use a
      code trick to get the address of our cleanup function into the link
-     register.  Do not add any code between here and L(exitcode).  */
-  bl  L(gotexitcodeaddr);
+     register.  Do not add any code between here and L(exitcode).
+     Use this conditional form of branch and link to avoid destroying
+     the cpu link stack used to predict blr return addresses.  */
+  bcl	20,31,L(gotexitcodeaddr);
 
 	/* This is the helper code which gets called if a function which
 	   is registered with 'makecontext' returns.  In this case we
diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
index 01084bb71..f384bc7fd 100644
--- a/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
+++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/fcntl.h
@@ -39,6 +39,7 @@
 #define __O_DIRECT	0x100000 /* direct disk access hint */
 #define __O_NOATIME	0x200000 /* Do not set atime.  */
 #define __O_PATH	0x1000000 /* Resolve pathname but do not open file.  */
+#define __O_TMPFILE	0x2010000 /* Atomically create nameless file.  */
 
 #if __WORDSIZE == 64
 # define __O_LARGEFILE	0
diff --git a/libc/sysdeps/unix/sysv/linux/tst-fanotify.c b/libc/sysdeps/unix/sysv/linux/tst-fanotify.c
index b21e160ca..ad9836b58 100644
--- a/libc/sysdeps/unix/sysv/linux/tst-fanotify.c
+++ b/libc/sysdeps/unix/sysv/linux/tst-fanotify.c
@@ -29,14 +29,15 @@ do_test (void)
   fd = fanotify_init (0, 0);
   if (fd < 0)
     {
-      switch (errno) {
-      case ENOSYS:
-	puts ("SKIP: missing support for fanotify (check CONFIG_FANOTIFY=y)");
-	return 0;
-      case EPERM:
-	puts ("SKIP: missing proper permissions for runtime test");
-	return 0;
-      }
+      switch (errno)
+	{
+	case ENOSYS:
+	  puts ("SKIP: missing support for fanotify (check CONFIG_FANOTIFY=y)");
+	  return 0;
+	case EPERM:
+	  puts ("SKIP: missing proper permissions for runtime test");
+	  return 0;
+	}
 
       perror ("fanotify_init (0, 0) failed");
       return 1;
diff --git a/libc/sysdeps/x86_64/ffs.c b/libc/sysdeps/x86_64/ffs.c
index 27013d6ae..07ee7dd4a 100644
--- a/libc/sysdeps/x86_64/ffs.c
+++ b/libc/sysdeps/x86_64/ffs.c
@@ -35,4 +35,5 @@ __ffs (int x)
   return cnt + 1;
 }
 weak_alias (__ffs, ffs)
+libc_hidden_def (__ffs)
 libc_hidden_builtin_def (ffs)
diff --git a/libc/sysdeps/x86_64/fpu/printf_fphex.c b/libc/sysdeps/x86_64/fpu/printf_fphex.c
index c85d1f79f..be55f9cf6 100644
--- a/libc/sysdeps/x86_64/fpu/printf_fphex.c
+++ b/libc/sysdeps/x86_64/fpu/printf_fphex.c
@@ -25,10 +25,11 @@ do {									      \
       /* The "strange" 80 bit format on ix86 and m68k has an explicit	      \
 	 leading digit in the 64 bit mantissa.  */			      \
       unsigned long long int num;					      \
+      union ieee854_long_double u;					      \
+      u.d = fpnum.ldbl;							      \
 									      \
-									      \
-      num = (((unsigned long long int) fpnum.ldbl.ieee.mantissa0) << 32	      \
-	     | fpnum.ldbl.ieee.mantissa1);				      \
+      num = (((unsigned long long int) u.ieee.mantissa0) << 32		      \
+	     | u.ieee.mantissa1);					      \
 									      \
       zero_mantissa = num == 0;						      \
 									      \
@@ -61,7 +62,7 @@ do {									      \
 									      \
       /* We have 3 bits from the mantissa in the leading nibble.	      \
 	 Therefore we are here using `IEEE854_LONG_DOUBLE_BIAS + 3'.  */      \
-      exponent = fpnum.ldbl.ieee.exponent;				      \
+      exponent = u.ieee.exponent;					      \
 									      \
       if (exponent == 0)						      \
 	{								      \
diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S
index 6c69f4b44..9b1de89d9 100644
--- a/libc/sysdeps/x86_64/memset.S
+++ b/libc/sysdeps/x86_64/memset.S
@@ -19,10 +19,6 @@
 
 #include <sysdep.h>
 
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
 	.text
 #if !defined NOT_IN_libc
 ENTRY(__bzero)
@@ -71,12 +67,12 @@ L(entry_from_bzero):
 L(return):
 	rep
 	ret
-	ALIGN (4)
+	.p2align 4
 L(between_32_64_bytes):
 	movdqu	%xmm8, 16(%rdi)
 	movdqu	%xmm8, -32(%rdi,%rdx)
 	ret
-	ALIGN (4)
+	.p2align 4
 L(loop_start):
 	leaq	64(%rdi), %rcx
 	movdqu	%xmm8, (%rdi)
@@ -92,7 +88,7 @@ L(loop_start):
 	andq	$-64, %rdx
 	cmpq	%rdx, %rcx
 	je	L(return)
-	ALIGN (4)
+	.p2align 4
 L(loop):
 	movdqa	%xmm8, (%rcx)
 	movdqa	%xmm8, 16(%rcx)
diff --git a/libc/sysdeps/x86_64/multiarch/Makefile b/libc/sysdeps/x86_64/multiarch/Makefile
index 5ab950a53..9fd0fd64c 100644
--- a/libc/sysdeps/x86_64/multiarch/Makefile
+++ b/libc/sysdeps/x86_64/multiarch/Makefile
@@ -8,7 +8,7 @@ ifeq ($(subdir),string)
 
 sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   strcmp-sse2-unaligned strncmp-ssse3 \
-		   strend-sse4 memcmp-sse4 memcpy-ssse3 \
+		   memcmp-sse4 memcpy-ssse3 \
 		   memcpy-sse2-unaligned mempcpy-ssse3 \
 		   memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
 		   memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
@@ -17,7 +17,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
 		   strcpy-sse2-unaligned strncpy-sse2-unaligned \
 		   stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
 		   strcat-sse2-unaligned strncat-sse2-unaligned \
-		   strrchr-sse2-no-bsf strchr-sse2-no-bsf memcmp-ssse3
+		   strchr-sse2-no-bsf memcmp-ssse3
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
 CFLAGS-varshift.c += -msse4
diff --git a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index f8756d7af..71beab82e 100644
--- a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -110,7 +110,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   /* Support sysdeps/x86_64/multiarch/strchr.S.  */
   IFUNC_IMPL (i, name, strchr,
-	      IFUNC_IMPL_ADD (array, i, strchr, HAS_SSE4_2, __strchr_sse42)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
 
@@ -177,12 +176,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strpbrk_sse42)
 	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strrchr.S.  */
-  IFUNC_IMPL (i, name, strrchr,
-	      IFUNC_IMPL_ADD (array, i, strrchr, HAS_SSE4_2,
-			      __strrchr_sse42)
-	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2_no_bsf)
-	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
 
   /* Support sysdeps/x86_64/multiarch/strspn.S.  */
   IFUNC_IMPL (i, name, strspn,
diff --git a/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S b/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200f4..d7b147e5c 100644
--- a/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/libc/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -25,10 +25,6 @@
 #  define MEMCMP	__memcmp_sse4_1
 # endif
 
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
-# endif
-
 # define JMPTBL(I, B)	(I - B)
 
 # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
@@ -60,7 +56,7 @@ ENTRY (MEMCMP)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
 # ifndef USE_AS_WMEMCMP
-	ALIGN (4)
+	.p2align 4
 L(firstbyte):
 	movzbl	(%rdi), %eax
 	movzbl	(%rsi), %ecx
@@ -68,7 +64,7 @@ L(firstbyte):
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(79bytesormore):
 	movdqu	(%rsi), %xmm1
 	movdqu	(%rdi), %xmm2
@@ -316,7 +312,7 @@ L(less32bytesin256):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(512bytesormore):
 # ifdef DATA_CACHE_SIZE_HALF
 	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
@@ -329,7 +325,7 @@ L(512bytesormore):
 	cmp	%r8, %rdx
 	ja	L(L2_L3_cache_unaglined)
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loop):
 	movdqu	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
@@ -361,7 +357,7 @@ L(64bytesormore_loop):
 
 L(L2_L3_cache_unaglined):
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(L2_L3_unaligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
@@ -396,7 +392,7 @@ L(L2_L3_unaligned_128bytes_loop):
 /*
  * This case is for machines which are sensitive for unaligned instructions.
  */
-	ALIGN (4)
+	.p2align 4
 L(2aligned):
 	cmp	$128, %rdx
 	ja	L(128bytesormorein2aligned)
@@ -444,7 +440,7 @@ L(less32bytesin64in2alinged):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(128bytesormorein2aligned):
 	cmp	$512, %rdx
 	ja	L(512bytesormorein2aligned)
@@ -519,7 +515,7 @@ L(less32bytesin128in2aligned):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(256bytesormorein2aligned):
 
 	sub	$256, %rdx
@@ -632,7 +628,7 @@ L(less32bytesin256in2alinged):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(512bytesormorein2aligned):
 # ifdef DATA_CACHE_SIZE_HALF
 	mov	$DATA_CACHE_SIZE_HALF, %R8_LP
@@ -646,7 +642,7 @@ L(512bytesormorein2aligned):
 	ja	L(L2_L3_cache_aglined)
 
 	sub	$64, %rdx
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loopin2aligned):
 	movdqa	(%rdi), %xmm2
 	pxor	(%rsi), %xmm2
@@ -678,7 +674,7 @@ L(64bytesormore_loopin2aligned):
 L(L2_L3_cache_aglined):
 	sub	$64, %rdx
 
-	ALIGN (4)
+	.p2align 4
 L(L2_L3_aligned_128bytes_loop):
 	prefetchnta 0x1c0(%rdi)
 	prefetchnta 0x1c0(%rsi)
@@ -711,7 +707,7 @@ L(L2_L3_aligned_128bytes_loop):
 	BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
 
 
-	ALIGN (4)
+	.p2align 4
 L(64bytesormore_loop_end):
 	add	$16, %rdi
 	add	$16, %rsi
@@ -806,7 +802,7 @@ L(8bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(12bytes):
 	mov	-12(%rdi), %rax
 	mov	-12(%rsi), %rcx
@@ -827,7 +823,7 @@ L(0bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal case for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(65bytes):
 	movdqu	-65(%rdi), %xmm1
 	movdqu	-65(%rsi), %xmm2
@@ -864,7 +860,7 @@ L(9bytes):
 	sub	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(13bytes):
 	mov	-13(%rdi), %rax
 	mov	-13(%rsi), %rcx
@@ -877,7 +873,7 @@ L(13bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(5bytes):
 	mov	-5(%rdi), %eax
 	mov	-5(%rsi), %ecx
@@ -888,7 +884,7 @@ L(5bytes):
 	sub	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(66bytes):
 	movdqu	-66(%rdi), %xmm1
 	movdqu	-66(%rsi), %xmm2
@@ -929,7 +925,7 @@ L(10bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(14bytes):
 	mov	-14(%rdi), %rax
 	mov	-14(%rsi), %rcx
@@ -942,7 +938,7 @@ L(14bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(6bytes):
 	mov	-6(%rdi), %eax
 	mov	-6(%rsi), %ecx
@@ -958,7 +954,7 @@ L(2bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(67bytes):
 	movdqu	-67(%rdi), %xmm2
 	movdqu	-67(%rsi), %xmm1
@@ -997,7 +993,7 @@ L(11bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(15bytes):
 	mov	-15(%rdi), %rax
 	mov	-15(%rsi), %rcx
@@ -1010,7 +1006,7 @@ L(15bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(7bytes):
 	mov	-7(%rdi), %eax
 	mov	-7(%rsi), %ecx
@@ -1023,7 +1019,7 @@ L(7bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(3bytes):
 	movzwl	-3(%rdi), %eax
 	movzwl	-3(%rsi), %ecx
@@ -1036,7 +1032,7 @@ L(1bytes):
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(68bytes):
 	movdqu	-68(%rdi), %xmm2
 	movdqu	-68(%rsi), %xmm1
@@ -1079,7 +1075,7 @@ L(20bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(69bytes):
 	movdqu	-69(%rsi), %xmm1
 	movdqu	-69(%rdi), %xmm2
@@ -1115,7 +1111,7 @@ L(21bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(70bytes):
 	movdqu	-70(%rsi), %xmm1
 	movdqu	-70(%rdi), %xmm2
@@ -1151,7 +1147,7 @@ L(22bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(71bytes):
 	movdqu	-71(%rsi), %xmm1
 	movdqu	-71(%rdi), %xmm2
@@ -1188,7 +1184,7 @@ L(23bytes):
 	ret
 # endif
 
-	ALIGN (4)
+	.p2align 4
 L(72bytes):
 	movdqu	-72(%rsi), %xmm1
 	movdqu	-72(%rdi), %xmm2
@@ -1227,7 +1223,7 @@ L(24bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(73bytes):
 	movdqu	-73(%rsi), %xmm1
 	movdqu	-73(%rdi), %xmm2
@@ -1265,7 +1261,7 @@ L(25bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(74bytes):
 	movdqu	-74(%rsi), %xmm1
 	movdqu	-74(%rdi), %xmm2
@@ -1302,7 +1298,7 @@ L(26bytes):
 	movzwl	-2(%rsi), %ecx
 	jmp	L(diffin2bytes)
 
-	ALIGN (4)
+	.p2align 4
 L(75bytes):
 	movdqu	-75(%rsi), %xmm1
 	movdqu	-75(%rdi), %xmm2
@@ -1342,7 +1338,7 @@ L(27bytes):
 	xor	%eax, %eax
 	ret
 # endif
-	ALIGN (4)
+	.p2align 4
 L(76bytes):
 	movdqu	-76(%rsi), %xmm1
 	movdqu	-76(%rdi), %xmm2
@@ -1388,7 +1384,7 @@ L(28bytes):
 
 # ifndef USE_AS_WMEMCMP
 /* unreal cases for wmemcmp */
-	ALIGN (4)
+	.p2align 4
 L(77bytes):
 	movdqu	-77(%rsi), %xmm1
 	movdqu	-77(%rdi), %xmm2
@@ -1430,7 +1426,7 @@ L(29bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(78bytes):
 	movdqu	-78(%rsi), %xmm1
 	movdqu	-78(%rdi), %xmm2
@@ -1470,7 +1466,7 @@ L(30bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(79bytes):
 	movdqu	-79(%rsi), %xmm1
 	movdqu	-79(%rdi), %xmm2
@@ -1510,7 +1506,7 @@ L(31bytes):
 	xor	%eax, %eax
 	ret
 # endif
-	ALIGN (4)
+	.p2align 4
 L(64bytes):
 	movdqu	-64(%rdi), %xmm2
 	movdqu	-64(%rsi), %xmm1
@@ -1548,7 +1544,7 @@ L(32bytes):
 /*
  * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block.
  */
-	ALIGN (3)
+	.p2align 3
 L(less16bytes):
 	movsbq	%dl, %rdx
 	mov	(%rsi, %rdx), %rcx
@@ -1585,7 +1581,7 @@ L(diffin2bytes):
 	sub	%ecx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(end):
 	and	$0xff, %eax
 	and	$0xff, %ecx
@@ -1599,7 +1595,7 @@ L(end):
 	neg	%eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(nequal_bigger):
 	ret
 
@@ -1611,7 +1607,7 @@ L(unreal_case):
 END (MEMCMP)
 
 	.section .rodata.sse4.1,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 # ifndef USE_AS_WMEMCMP
 L(table_64bytes):
 	.int	JMPTBL (L(0bytes), L(table_64bytes))
diff --git a/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
index e319df926..e04f918df 100644
--- a/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
+++ b/libc/sysdeps/x86_64/multiarch/memcmp-ssse3.S
@@ -25,10 +25,6 @@
 #  define MEMCMP	__memcmp_ssse3
 # endif
 
-# ifndef ALIGN
-#  define ALIGN(n)	.p2align n
-# endif
-
 /* Warning!
 	   wmemcmp has to use SIGNED comparison for elements.
 	   memcmp has to use UNSIGNED comparison for elemnts.
@@ -50,7 +46,7 @@ ENTRY (MEMCMP)
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 /* ECX >= 32.  */
 L(48bytesormore):
 	movdqu	(%rdi), %xmm3
@@ -90,7 +86,7 @@ L(48bytesormore):
 	je	L(shr_6)
 	jmp	L(shr_7)
 
-	ALIGN	(2)
+	.p2align 2
 L(next_unaligned_table):
 	cmp	$8, %edx
 	je	L(shr_8)
@@ -117,7 +113,7 @@ L(next_unaligned_table):
 	jmp	L(shr_12)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_0):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -137,7 +133,7 @@ L(shr_0):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_0_gobble):
 	movdqa	(%rsi), %xmm0
 	xor	%eax, %eax
@@ -180,7 +176,7 @@ L(next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_1):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -207,7 +203,7 @@ L(shr_1):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_1_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -258,7 +254,7 @@ L(shr_1_gobble_next):
 	jmp	L(less48bytes)
 
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_2):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -285,7 +281,7 @@ L(shr_2):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_2_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -335,7 +331,7 @@ L(shr_2_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_3):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -362,7 +358,7 @@ L(shr_3):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_3_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -414,7 +410,7 @@ L(shr_3_gobble_next):
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_4):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -441,7 +437,7 @@ L(shr_4):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_4_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -493,7 +489,7 @@ L(shr_4_gobble_next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_5):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -520,7 +516,7 @@ L(shr_5):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_5_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -570,7 +566,7 @@ L(shr_5_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_6):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -597,7 +593,7 @@ L(shr_6):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_6_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -647,7 +643,7 @@ L(shr_6_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_7):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -674,7 +670,7 @@ L(shr_7):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_7_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -726,7 +722,7 @@ L(shr_7_gobble_next):
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_8):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -753,7 +749,7 @@ L(shr_8):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_8_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -805,7 +801,7 @@ L(shr_8_gobble_next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_9):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -832,7 +828,7 @@ L(shr_9):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_9_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -882,7 +878,7 @@ L(shr_9_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_10):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -909,7 +905,7 @@ L(shr_10):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_10_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -959,7 +955,7 @@ L(shr_10_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_11):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -986,7 +982,7 @@ L(shr_11):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_11_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1038,7 +1034,7 @@ L(shr_11_gobble_next):
 
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_12):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1065,7 +1061,7 @@ L(shr_12):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_12_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1117,7 +1113,7 @@ L(shr_12_gobble_next):
 
 # ifndef USE_AS_WMEMCMP
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_13):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1144,7 +1140,7 @@ L(shr_13):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_13_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1194,7 +1190,7 @@ L(shr_13_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_14):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1221,7 +1217,7 @@ L(shr_14):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_14_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1271,7 +1267,7 @@ L(shr_14_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_15):
 	cmp	$80, %rcx
 	lea	-48(%rcx), %rcx
@@ -1298,7 +1294,7 @@ L(shr_15):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(shr_15_gobble):
 	sub	$32, %rcx
 	movdqa	16(%rsi), %xmm0
@@ -1348,7 +1344,7 @@ L(shr_15_gobble_next):
 	add	%rcx, %rdi
 	jmp	L(less48bytes)
 # endif
-	ALIGN	(4)
+	.p2align 4
 L(exit):
 	pmovmskb %xmm1, %r8d
 	sub	$0xffff, %r8d
@@ -1389,56 +1385,56 @@ L(less16bytes):
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte16):
 	movzbl	-16(%rdi), %eax
 	movzbl	-16(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte17):
 	movzbl	-15(%rdi), %eax
 	movzbl	-15(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte18):
 	movzbl	-14(%rdi), %eax
 	movzbl	-14(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte19):
 	movzbl	-13(%rdi), %eax
 	movzbl	-13(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte20):
 	movzbl	-12(%rdi), %eax
 	movzbl	-12(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte21):
 	movzbl	-11(%rdi), %eax
 	movzbl	-11(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(Byte22):
 	movzbl	-10(%rdi), %eax
 	movzbl	-10(%rsi), %edx
 	sub	%edx, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(next_24_bytes):
 	lea	8(%rdi), %rdi
 	lea	8(%rsi), %rsi
@@ -1479,14 +1475,14 @@ L(next_24_bytes):
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(second_double_word):
 	mov	-12(%rdi), %eax
 	cmp	-12(%rsi), %eax
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(next_two_double_words):
 	and	$15, %dh
 	jz	L(fourth_double_word)
@@ -1495,7 +1491,7 @@ L(next_two_double_words):
 	jne	L(find_diff)
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(fourth_double_word):
 	mov	-4(%rdi), %eax
 	cmp	-4(%rsi), %eax
@@ -1503,7 +1499,7 @@ L(fourth_double_word):
 	ret
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(less48bytes):
 	cmp	$8, %ecx
 	jae	L(more8bytes)
@@ -1527,7 +1523,7 @@ L(less48bytes):
 	jmp	L(4bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more8bytes):
 	cmp	$16, %ecx
 	jae	L(more16bytes)
@@ -1551,7 +1547,7 @@ L(more8bytes):
 	jmp	L(12bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more16bytes):
 	cmp	$24, %ecx
 	jae	L(more24bytes)
@@ -1575,7 +1571,7 @@ L(more16bytes):
 	jmp	L(20bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more24bytes):
 	cmp	$32, %ecx
 	jae	L(more32bytes)
@@ -1599,7 +1595,7 @@ L(more24bytes):
 	jmp	L(28bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more32bytes):
 	cmp	$40, %ecx
 	jae	L(more40bytes)
@@ -1623,7 +1619,7 @@ L(more32bytes):
 	jmp	L(36bytes)
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(more40bytes):
 	cmp	$40, %ecx
 	je	L(40bytes)
@@ -1642,7 +1638,7 @@ L(more40bytes):
 	je	L(46bytes)
 	jmp	L(47bytes)
 
-	ALIGN	(4)
+	.p2align 4
 L(44bytes):
 	movl	-44(%rdi), %eax
 	movl	-44(%rsi), %ecx
@@ -1702,7 +1698,7 @@ L(0bytes):
 	xor	%eax, %eax
 	ret
 # else
-	ALIGN	(4)
+	.p2align 4
 L(44bytes):
 	movl	-44(%rdi), %eax
 	cmp	-44(%rsi), %eax
@@ -1753,7 +1749,7 @@ L(0bytes):
 # endif
 
 # ifndef USE_AS_WMEMCMP
-	ALIGN	(4)
+	.p2align 4
 L(45bytes):
 	movl	-45(%rdi), %eax
 	movl	-45(%rsi), %ecx
@@ -1816,7 +1812,7 @@ L(1bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(46bytes):
 	movl	-46(%rdi), %eax
 	movl	-46(%rsi), %ecx
@@ -1882,7 +1878,7 @@ L(2bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(47bytes):
 	movl	-47(%rdi), %eax
 	movl	-47(%rsi), %ecx
@@ -1951,7 +1947,7 @@ L(3bytes):
 	xor	%eax, %eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(find_diff):
 	cmpb	%cl, %al
 	jne	L(set)
@@ -1973,19 +1969,19 @@ L(set):
 # else
 
 /* for wmemcmp */
-	ALIGN	(4)
+	.p2align 4
 L(find_diff):
 	mov	$1, %eax
 	jg	L(find_diff_bigger)
 	neg	%eax
 	ret
 
-	ALIGN	(4)
+	.p2align 4
 L(find_diff_bigger):
 	ret
 # endif
 
-	ALIGN	(4)
+	.p2align 4
 L(equal):
 	xor	%eax, %eax
 	ret
diff --git a/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
index efdfea238..df6578ebc 100644
--- a/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ b/libc/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
@@ -20,10 +20,6 @@
 
 #include "asm-syntax.h"
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 
 ENTRY(__memcpy_sse2_unaligned)
 	movq	%rsi, %rax
@@ -44,7 +40,7 @@ L(return):
 	movq	%rdi, %rax
 	ret
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 .L31:
 	movdqu	16(%rsi), %xmm8
 	cmpq	$64, %rdx
@@ -77,7 +73,7 @@ L(return):
 	leaq	32(%r10), %r8
 	leaq	48(%r10), %rax
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 L(loop):
 	movdqu	(%rcx,%r10), %xmm8
 	movdqa	%xmm8, (%rcx)
@@ -151,7 +147,7 @@ L(less_16):
 .L3:
 	leaq	-1(%rdx), %rax
 	.p2align 4,,10
-	ALIGN(4)
+	.p2align 4
 .L11:
 	movzbl	(%rsi,%rax), %edx
 	movb	%dl, (%rdi,%rax)
diff --git a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef27..0eb7d9b75 100644
--- a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -31,10 +31,6 @@
 # define MEMCPY_CHK	__memcpy_chk_ssse3_back
 #endif
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 #define JMPTBL(I, B)	I - B
 
 /* Branch to an entry in a jump table.  TABLE is a jump table with
@@ -87,7 +83,7 @@ L(bk_write):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 #endif
 
-	ALIGN (4)
+	.p2align 4
 L(144bytesormore):
 
 #ifndef USE_AS_MEMMOVE
@@ -119,7 +115,7 @@ L(144bytesormore):
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(copy_backward):
 #ifdef DATA_CACHE_SIZE
 	mov	$DATA_CACHE_SIZE, %RCX_LP
@@ -149,7 +145,7 @@ L(copy_backward):
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0):
 
 	mov	%rdx, %r9
@@ -162,7 +158,7 @@ L(shl_0):
 #endif
 	jae	L(gobble_mem_fwd)
 	sub	$0x80, %rdx
-	ALIGN (4)
+	.p2align 4
 L(shl_0_loop):
 	movdqa	(%rsi), %xmm1
 	movdqa	%xmm1, (%rdi)
@@ -190,7 +186,7 @@ L(shl_0_loop):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_bwd):
 	sub	$0x80, %rdx
 L(copy_backward_loop):
@@ -221,7 +217,7 @@ L(copy_backward_loop):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1):
 	sub	$0x80, %rdx
 	movaps	-0x01(%rsi), %xmm1
@@ -258,7 +254,7 @@ L(shl_1):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1_bwd):
 	movaps	-0x01(%rsi), %xmm1
 
@@ -304,7 +300,7 @@ L(shl_1_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2):
 	sub	$0x80, %rdx
 	movaps	-0x02(%rsi), %xmm1
@@ -341,7 +337,7 @@ L(shl_2):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2_bwd):
 	movaps	-0x02(%rsi), %xmm1
 
@@ -387,7 +383,7 @@ L(shl_2_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3):
 	sub	$0x80, %rdx
 	movaps -0x03(%rsi), %xmm1
@@ -424,7 +420,7 @@ L(shl_3):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3_bwd):
 	movaps	-0x03(%rsi), %xmm1
 
@@ -470,7 +466,7 @@ L(shl_3_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4):
 	sub	$0x80, %rdx
 	movaps	-0x04(%rsi), %xmm1
@@ -507,7 +503,7 @@ L(shl_4):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4_bwd):
 	movaps	-0x04(%rsi), %xmm1
 
@@ -553,7 +549,7 @@ L(shl_4_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5):
 	sub	$0x80, %rdx
 	movaps	-0x05(%rsi), %xmm1
@@ -590,7 +586,7 @@ L(shl_5):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5_bwd):
 	movaps	-0x05(%rsi), %xmm1
 
@@ -636,7 +632,7 @@ L(shl_5_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6):
 	sub	$0x80, %rdx
 	movaps	-0x06(%rsi), %xmm1
@@ -673,7 +669,7 @@ L(shl_6):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6_bwd):
 	movaps	-0x06(%rsi), %xmm1
 
@@ -719,7 +715,7 @@ L(shl_6_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7):
 	sub	$0x80, %rdx
 	movaps	-0x07(%rsi), %xmm1
@@ -756,7 +752,7 @@ L(shl_7):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7_bwd):
 	movaps	-0x07(%rsi), %xmm1
 
@@ -802,7 +798,7 @@ L(shl_7_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8):
 	sub	$0x80, %rdx
 	movaps	-0x08(%rsi), %xmm1
@@ -839,7 +835,7 @@ L(shl_8):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8_bwd):
 	movaps	-0x08(%rsi), %xmm1
 
@@ -886,7 +882,7 @@ L(shl_8_end_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9):
 	sub	$0x80, %rdx
 	movaps	-0x09(%rsi), %xmm1
@@ -923,7 +919,7 @@ L(shl_9):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9_bwd):
 	movaps	-0x09(%rsi), %xmm1
 
@@ -969,7 +965,7 @@ L(shl_9_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10):
 	sub	$0x80, %rdx
 	movaps	-0x0a(%rsi), %xmm1
@@ -1006,7 +1002,7 @@ L(shl_10):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10_bwd):
 	movaps	-0x0a(%rsi), %xmm1
 
@@ -1052,7 +1048,7 @@ L(shl_10_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11):
 	sub	$0x80, %rdx
 	movaps	-0x0b(%rsi), %xmm1
@@ -1089,7 +1085,7 @@ L(shl_11):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11_bwd):
 	movaps	-0x0b(%rsi), %xmm1
 
@@ -1135,7 +1131,7 @@ L(shl_11_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12):
 	sub	$0x80, %rdx
 	movdqa	-0x0c(%rsi), %xmm1
@@ -1173,7 +1169,7 @@ L(shl_12):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12_bwd):
 	movaps	-0x0c(%rsi), %xmm1
 
@@ -1219,7 +1215,7 @@ L(shl_12_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13):
 	sub	$0x80, %rdx
 	movaps	-0x0d(%rsi), %xmm1
@@ -1256,7 +1252,7 @@ L(shl_13):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13_bwd):
 	movaps	-0x0d(%rsi), %xmm1
 
@@ -1302,7 +1298,7 @@ L(shl_13_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14):
 	sub	$0x80, %rdx
 	movaps	-0x0e(%rsi), %xmm1
@@ -1339,7 +1335,7 @@ L(shl_14):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14_bwd):
 	movaps	-0x0e(%rsi), %xmm1
 
@@ -1385,7 +1381,7 @@ L(shl_14_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15):
 	sub	$0x80, %rdx
 	movaps	-0x0f(%rsi), %xmm1
@@ -1422,7 +1418,7 @@ L(shl_15):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15_bwd):
 	movaps	-0x0f(%rsi), %xmm1
 
@@ -1468,7 +1464,7 @@ L(shl_15_bwd):
 	sub	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(gobble_mem_fwd):
 	movdqu	(%rsi), %xmm1
 	movdqu	%xmm0, (%r8)
@@ -1570,7 +1566,7 @@ L(gobble_mem_fwd_end):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(gobble_mem_bwd):
 	add	%rdx, %rsi
 	add	%rdx, %rdi
@@ -2833,7 +2829,7 @@ L(bwd_write_1bytes):
 END (MEMCPY)
 
 	.section .rodata.ssse3,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 L(table_144_bytes_bwd):
 	.int	JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
 	.int	JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
@@ -2980,7 +2976,7 @@ L(table_144_bytes_bwd):
 	.int	JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
 	.int	JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
 
-	ALIGN (3)
+	.p2align 3
 L(table_144_bytes_fwd):
 	.int	JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
 	.int	JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
@@ -3127,7 +3123,7 @@ L(table_144_bytes_fwd):
 	.int	JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
 	.int	JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_fwd):
 	.int	JMPTBL (L(shl_0), L(shl_table_fwd))
 	.int	JMPTBL (L(shl_1), L(shl_table_fwd))
@@ -3146,7 +3142,7 @@ L(shl_table_fwd):
 	.int	JMPTBL (L(shl_14), L(shl_table_fwd))
 	.int	JMPTBL (L(shl_15), L(shl_table_fwd))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_bwd):
 	.int	JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
 	.int	JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 9642ceecd..0cedab244 100644
--- a/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/libc/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -31,10 +31,6 @@
 # define MEMCPY_CHK	__memcpy_chk_ssse3
 #endif
 
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
 #define JMPTBL(I, B)	I - B
 
 /* Branch to an entry in a jump table.  TABLE is a jump table with
@@ -80,7 +76,7 @@ L(copy_forward):
 	jmp	*%r9
 	ud2
 
-	ALIGN (4)
+	.p2align 4
 L(80bytesormore):
 #ifndef USE_AS_MEMMOVE
 	cmp	%dil, %sil
@@ -113,7 +109,7 @@ L(80bytesormore):
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %r9, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(copy_backward):
 	movdqu	-16(%rsi, %rdx), %xmm0
 	add	%rdx, %rsi
@@ -144,7 +140,7 @@ L(copy_backward):
 #endif
 	BRANCH_TO_JMPTBL_ENTRY (L(shl_table_bwd), %r9, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0):
 	sub	$16, %rdx
 	movdqa	(%rsi), %xmm1
@@ -172,7 +168,7 @@ L(shl_0_less_64bytes):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble):
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -228,7 +224,7 @@ L(shl_0_cache_less_64bytes):
 	add	%rdx, %rdi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_mem_loop):
 	prefetcht0 0x1c0(%rsi)
 	prefetcht0 0x280(%rsi)
@@ -287,7 +283,7 @@ L(shl_0_mem_less_32bytes):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_bwd):
 	sub	$16, %rdx
 	movdqa	-0x10(%rsi), %xmm1
@@ -313,7 +309,7 @@ L(shl_0_bwd):
 L(shl_0_less_64bytes_bwd):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_bwd):
 #ifdef DATA_CACHE_SIZE_HALF
 	cmp	$DATA_CACHE_SIZE_HALF, %RDX_LP
@@ -367,7 +363,7 @@ L(shl_0_gobble_bwd_loop):
 L(shl_0_gobble_bwd_less_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_0_gobble_mem_bwd_loop):
 	prefetcht0 -0x1c0(%rsi)
 	prefetcht0 -0x280(%rsi)
@@ -423,7 +419,7 @@ L(shl_0_mem_bwd_less_64bytes):
 L(shl_0_mem_bwd_less_32bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1):
 	lea	(L(shl_1_loop_L1)-L(shl_1))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -466,7 +462,7 @@ L(shl_1_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_1_bwd):
 	lea	(L(shl_1_bwd_loop_L1)-L(shl_1_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -508,7 +504,7 @@ L(shl_1_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2):
 	lea	(L(shl_2_loop_L1)-L(shl_2))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -551,7 +547,7 @@ L(shl_2_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_2_bwd):
 	lea	(L(shl_2_bwd_loop_L1)-L(shl_2_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -593,7 +589,7 @@ L(shl_2_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3):
 	lea	(L(shl_3_loop_L1)-L(shl_3))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -636,7 +632,7 @@ L(shl_3_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_3_bwd):
 	lea	(L(shl_3_bwd_loop_L1)-L(shl_3_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -678,7 +674,7 @@ L(shl_3_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4):
 	lea	(L(shl_4_loop_L1)-L(shl_4))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -721,7 +717,7 @@ L(shl_4_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_4_bwd):
 	lea	(L(shl_4_bwd_loop_L1)-L(shl_4_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -763,7 +759,7 @@ L(shl_4_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5):
 	lea	(L(shl_5_loop_L1)-L(shl_5))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -806,7 +802,7 @@ L(shl_5_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_5_bwd):
 	lea	(L(shl_5_bwd_loop_L1)-L(shl_5_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -848,7 +844,7 @@ L(shl_5_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6):
 	lea	(L(shl_6_loop_L1)-L(shl_6))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -891,7 +887,7 @@ L(shl_6_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_6_bwd):
 	lea	(L(shl_6_bwd_loop_L1)-L(shl_6_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -933,7 +929,7 @@ L(shl_6_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7):
 	lea	(L(shl_7_loop_L1)-L(shl_7))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -976,7 +972,7 @@ L(shl_7_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_7_bwd):
 	lea	(L(shl_7_bwd_loop_L1)-L(shl_7_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1018,7 +1014,7 @@ L(shl_7_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8):
 	lea	(L(shl_8_loop_L1)-L(shl_8))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1051,7 +1047,7 @@ L(shl_8_loop_L1):
 	movaps	%xmm5, -0x10(%rdi)
 	jmp	*%r9
 	ud2
-	ALIGN (4)
+	.p2align 4
 L(shl_8_end):
 	lea	64(%rdx), %rdx
 	movaps	%xmm4, -0x20(%rdi)
@@ -1061,7 +1057,7 @@ L(shl_8_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_8_bwd):
 	lea	(L(shl_8_bwd_loop_L1)-L(shl_8_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1103,7 +1099,7 @@ L(shl_8_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9):
 	lea	(L(shl_9_loop_L1)-L(shl_9))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1146,7 +1142,7 @@ L(shl_9_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_9_bwd):
 	lea	(L(shl_9_bwd_loop_L1)-L(shl_9_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1188,7 +1184,7 @@ L(shl_9_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10):
 	lea	(L(shl_10_loop_L1)-L(shl_10))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1231,7 +1227,7 @@ L(shl_10_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_10_bwd):
 	lea	(L(shl_10_bwd_loop_L1)-L(shl_10_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1273,7 +1269,7 @@ L(shl_10_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11):
 	lea	(L(shl_11_loop_L1)-L(shl_11))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1316,7 +1312,7 @@ L(shl_11_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_11_bwd):
 	lea	(L(shl_11_bwd_loop_L1)-L(shl_11_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1358,7 +1354,7 @@ L(shl_11_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12):
 	lea	(L(shl_12_loop_L1)-L(shl_12))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1401,7 +1397,7 @@ L(shl_12_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_12_bwd):
 	lea	(L(shl_12_bwd_loop_L1)-L(shl_12_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1443,7 +1439,7 @@ L(shl_12_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13):
 	lea	(L(shl_13_loop_L1)-L(shl_13))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1486,7 +1482,7 @@ L(shl_13_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_13_bwd):
 	lea	(L(shl_13_bwd_loop_L1)-L(shl_13_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1528,7 +1524,7 @@ L(shl_13_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14):
 	lea	(L(shl_14_loop_L1)-L(shl_14))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1571,7 +1567,7 @@ L(shl_14_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_14_bwd):
 	lea	(L(shl_14_bwd_loop_L1)-L(shl_14_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1613,7 +1609,7 @@ L(shl_14_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15):
 	lea	(L(shl_15_loop_L1)-L(shl_15))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1656,7 +1652,7 @@ L(shl_15_end):
 	add	%rdx, %rsi
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(shl_15_bwd):
 	lea	(L(shl_15_bwd_loop_L1)-L(shl_15_bwd))(%r9), %r9
 	cmp	%rcx, %rdx
@@ -1698,7 +1694,7 @@ L(shl_15_bwd_end):
 	movdqu	%xmm0, (%r8)
 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_80bytes), %rdx, 4)
 
-	ALIGN (4)
+	.p2align 4
 L(write_72bytes):
 	movdqu	-72(%rsi), %xmm0
 	movdqu	-56(%rsi), %xmm1
@@ -1716,7 +1712,7 @@ L(write_72bytes):
 	mov	 %rcx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_64bytes):
 	movdqu	-64(%rsi), %xmm0
 	mov	-48(%rsi), %rcx
@@ -1734,7 +1730,7 @@ L(write_64bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_56bytes):
 	movdqu	-56(%rsi), %xmm0
 	mov	-40(%rsi), %r8
@@ -1750,7 +1746,7 @@ L(write_56bytes):
 	mov	 %rcx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_48bytes):
 	mov	-48(%rsi), %rcx
 	mov	-40(%rsi), %r8
@@ -1766,7 +1762,7 @@ L(write_48bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_40bytes):
 	mov	-40(%rsi), %r8
 	mov	-32(%rsi), %r9
@@ -1780,7 +1776,7 @@ L(write_40bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_32bytes):
 	mov	-32(%rsi), %r9
 	mov	-24(%rsi), %r10
@@ -1792,7 +1788,7 @@ L(write_32bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_24bytes):
 	mov	-24(%rsi), %r10
 	mov	-16(%rsi), %r11
@@ -1802,7 +1798,7 @@ L(write_24bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_16bytes):
 	mov	-16(%rsi), %r11
 	mov	-8(%rsi), %rdx
@@ -1810,14 +1806,14 @@ L(write_16bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_8bytes):
 	mov	-8(%rsi), %rdx
 	mov	 %rdx, -8(%rdi)
 L(write_0bytes):
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_73bytes):
 	movdqu	-73(%rsi), %xmm0
 	movdqu	-57(%rsi), %xmm1
@@ -1837,7 +1833,7 @@ L(write_73bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_65bytes):
 	movdqu	-65(%rsi), %xmm0
 	movdqu	-49(%rsi), %xmm1
@@ -1855,7 +1851,7 @@ L(write_65bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_57bytes):
 	movdqu	-57(%rsi), %xmm0
 	mov	-41(%rsi), %r8
@@ -1873,7 +1869,7 @@ L(write_57bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_49bytes):
 	movdqu	-49(%rsi), %xmm0
 	mov	-33(%rsi), %r9
@@ -1889,7 +1885,7 @@ L(write_49bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_41bytes):
 	mov	-41(%rsi), %r8
 	mov	-33(%rsi), %r9
@@ -1905,7 +1901,7 @@ L(write_41bytes):
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_33bytes):
 	mov	-33(%rsi), %r9
 	mov	-25(%rsi), %r10
@@ -1919,7 +1915,7 @@ L(write_33bytes):
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_25bytes):
 	mov	-25(%rsi), %r10
 	mov	-17(%rsi), %r11
@@ -1931,7 +1927,7 @@ L(write_25bytes):
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_17bytes):
 	mov	-17(%rsi), %r11
 	mov	-9(%rsi), %rcx
@@ -1941,7 +1937,7 @@ L(write_17bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_9bytes):
 	mov	-9(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -1949,13 +1945,13 @@ L(write_9bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_1bytes):
 	mov	-1(%rsi), %dl
 	mov	 %dl, -1(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_74bytes):
 	movdqu	-74(%rsi), %xmm0
 	movdqu	-58(%rsi), %xmm1
@@ -1975,7 +1971,7 @@ L(write_74bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_66bytes):
 	movdqu	-66(%rsi), %xmm0
 	movdqu	-50(%rsi), %xmm1
@@ -1995,7 +1991,7 @@ L(write_66bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_58bytes):
 	movdqu	-58(%rsi), %xmm1
 	mov	-42(%rsi), %r8
@@ -2013,7 +2009,7 @@ L(write_58bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_50bytes):
 	movdqu	-50(%rsi), %xmm0
 	mov	-34(%rsi), %r9
@@ -2029,7 +2025,7 @@ L(write_50bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_42bytes):
 	mov	-42(%rsi), %r8
 	mov	-34(%rsi), %r9
@@ -2045,7 +2041,7 @@ L(write_42bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_34bytes):
 	mov	-34(%rsi), %r9
 	mov	-26(%rsi), %r10
@@ -2059,7 +2055,7 @@ L(write_34bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_26bytes):
 	mov	-26(%rsi), %r10
 	mov	-18(%rsi), %r11
@@ -2071,7 +2067,7 @@ L(write_26bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_18bytes):
 	mov	-18(%rsi), %r11
 	mov	-10(%rsi), %rcx
@@ -2081,7 +2077,7 @@ L(write_18bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_10bytes):
 	mov	-10(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2089,13 +2085,13 @@ L(write_10bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_2bytes):
 	mov	-2(%rsi), %dx
 	mov	 %dx, -2(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_75bytes):
 	movdqu	-75(%rsi), %xmm0
 	movdqu	-59(%rsi), %xmm1
@@ -2115,7 +2111,7 @@ L(write_75bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_67bytes):
 	movdqu	-67(%rsi), %xmm0
 	movdqu	-59(%rsi), %xmm1
@@ -2135,7 +2131,7 @@ L(write_67bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_59bytes):
 	movdqu	-59(%rsi), %xmm0
 	mov	-43(%rsi), %r8
@@ -2153,7 +2149,7 @@ L(write_59bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_51bytes):
 	movdqu	-51(%rsi), %xmm0
 	mov	-35(%rsi), %r9
@@ -2169,7 +2165,7 @@ L(write_51bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_43bytes):
 	mov	-43(%rsi), %r8
 	mov	-35(%rsi), %r9
@@ -2185,7 +2181,7 @@ L(write_43bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_35bytes):
 	mov	-35(%rsi), %r9
 	mov	-27(%rsi), %r10
@@ -2199,7 +2195,7 @@ L(write_35bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_27bytes):
 	mov	-27(%rsi), %r10
 	mov	-19(%rsi), %r11
@@ -2211,7 +2207,7 @@ L(write_27bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_19bytes):
 	mov	-19(%rsi), %r11
 	mov	-11(%rsi), %rcx
@@ -2221,7 +2217,7 @@ L(write_19bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_11bytes):
 	mov	-11(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2229,7 +2225,7 @@ L(write_11bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_3bytes):
 	mov	-3(%rsi), %dx
 	mov	-2(%rsi), %cx
@@ -2237,7 +2233,7 @@ L(write_3bytes):
 	mov	 %cx, -2(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_76bytes):
 	movdqu	-76(%rsi), %xmm0
 	movdqu	-60(%rsi), %xmm1
@@ -2257,7 +2253,7 @@ L(write_76bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_68bytes):
 	movdqu	-68(%rsi), %xmm0
 	movdqu	-52(%rsi), %xmm1
@@ -2275,7 +2271,7 @@ L(write_68bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_60bytes):
 	movdqu	-60(%rsi), %xmm0
 	mov	-44(%rsi), %r8
@@ -2293,7 +2289,7 @@ L(write_60bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_52bytes):
 	movdqu	-52(%rsi), %xmm0
 	mov	-36(%rsi), %r9
@@ -2309,7 +2305,7 @@ L(write_52bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_44bytes):
 	mov	-44(%rsi), %r8
 	mov	-36(%rsi), %r9
@@ -2325,7 +2321,7 @@ L(write_44bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_36bytes):
 	mov	-36(%rsi), %r9
 	mov	-28(%rsi), %r10
@@ -2339,7 +2335,7 @@ L(write_36bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_28bytes):
 	mov	-28(%rsi), %r10
 	mov	-20(%rsi), %r11
@@ -2351,7 +2347,7 @@ L(write_28bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_20bytes):
 	mov	-20(%rsi), %r11
 	mov	-12(%rsi), %rcx
@@ -2361,7 +2357,7 @@ L(write_20bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_12bytes):
 	mov	-12(%rsi), %rcx
 	mov	-4(%rsi), %edx
@@ -2369,13 +2365,13 @@ L(write_12bytes):
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_4bytes):
 	mov	-4(%rsi), %edx
 	mov	 %edx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_77bytes):
 	movdqu	-77(%rsi), %xmm0
 	movdqu	-61(%rsi), %xmm1
@@ -2395,7 +2391,7 @@ L(write_77bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_69bytes):
 	movdqu	-69(%rsi), %xmm0
 	movdqu	-53(%rsi), %xmm1
@@ -2413,7 +2409,7 @@ L(write_69bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_61bytes):
 	movdqu	-61(%rsi), %xmm0
 	mov	-45(%rsi), %r8
@@ -2431,7 +2427,7 @@ L(write_61bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_53bytes):
 	movdqu	-53(%rsi), %xmm0
 	mov	-45(%rsi), %r8
@@ -2448,7 +2444,7 @@ L(write_53bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_45bytes):
 	mov	-45(%rsi), %r8
 	mov	-37(%rsi), %r9
@@ -2464,7 +2460,7 @@ L(write_45bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_37bytes):
 	mov	-37(%rsi), %r9
 	mov	-29(%rsi), %r10
@@ -2478,7 +2474,7 @@ L(write_37bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_29bytes):
 	mov	-29(%rsi), %r10
 	mov	-21(%rsi), %r11
@@ -2490,7 +2486,7 @@ L(write_29bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_21bytes):
 	mov	-21(%rsi), %r11
 	mov	-13(%rsi), %rcx
@@ -2500,7 +2496,7 @@ L(write_21bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_13bytes):
 	mov	-13(%rsi), %rcx
 	mov	-8(%rsi), %rdx
@@ -2508,7 +2504,7 @@ L(write_13bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_5bytes):
 	mov	-5(%rsi), %edx
 	mov	-4(%rsi), %ecx
@@ -2516,7 +2512,7 @@ L(write_5bytes):
 	mov	 %ecx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_78bytes):
 	movdqu	-78(%rsi), %xmm0
 	movdqu	-62(%rsi), %xmm1
@@ -2536,7 +2532,7 @@ L(write_78bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_70bytes):
 	movdqu	-70(%rsi), %xmm0
 	movdqu	-54(%rsi), %xmm1
@@ -2554,7 +2550,7 @@ L(write_70bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_62bytes):
 	movdqu	-62(%rsi), %xmm0
 	mov	-46(%rsi), %r8
@@ -2572,7 +2568,7 @@ L(write_62bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_54bytes):
 	movdqu	-54(%rsi), %xmm0
 	mov	-38(%rsi), %r9
@@ -2588,7 +2584,7 @@ L(write_54bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_46bytes):
 	mov	-46(%rsi), %r8
 	mov	-38(%rsi), %r9
@@ -2604,7 +2600,7 @@ L(write_46bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_38bytes):
 	mov	-38(%rsi), %r9
 	mov	-30(%rsi), %r10
@@ -2618,7 +2614,7 @@ L(write_38bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_30bytes):
 	mov	-30(%rsi), %r10
 	mov	-22(%rsi), %r11
@@ -2630,7 +2626,7 @@ L(write_30bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_22bytes):
 	mov	-22(%rsi), %r11
 	mov	-14(%rsi), %rcx
@@ -2640,7 +2636,7 @@ L(write_22bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_14bytes):
 	mov	-14(%rsi), %rcx
 	mov	-8(%rsi), %rdx
@@ -2648,7 +2644,7 @@ L(write_14bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_6bytes):
 	mov	-6(%rsi), %edx
 	mov	-4(%rsi), %ecx
@@ -2656,7 +2652,7 @@ L(write_6bytes):
 	mov	 %ecx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_79bytes):
 	movdqu	-79(%rsi), %xmm0
 	movdqu	-63(%rsi), %xmm1
@@ -2676,7 +2672,7 @@ L(write_79bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_71bytes):
 	movdqu	-71(%rsi), %xmm0
 	movdqu	-55(%rsi), %xmm1
@@ -2694,7 +2690,7 @@ L(write_71bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_63bytes):
 	movdqu	-63(%rsi), %xmm0
 	mov	-47(%rsi), %r8
@@ -2712,7 +2708,7 @@ L(write_63bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_55bytes):
 	movdqu	-55(%rsi), %xmm0
 	mov	-39(%rsi), %r9
@@ -2728,7 +2724,7 @@ L(write_55bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_47bytes):
 	mov	-47(%rsi), %r8
 	mov	-39(%rsi), %r9
@@ -2744,7 +2740,7 @@ L(write_47bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_39bytes):
 	mov	-39(%rsi), %r9
 	mov	-31(%rsi), %r10
@@ -2758,7 +2754,7 @@ L(write_39bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_31bytes):
 	mov	-31(%rsi), %r10
 	mov	-23(%rsi), %r11
@@ -2770,7 +2766,7 @@ L(write_31bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_23bytes):
 	mov	-23(%rsi), %r11
 	mov	-15(%rsi), %rcx
@@ -2780,7 +2776,7 @@ L(write_23bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_15bytes):
 	mov	-15(%rsi), %rcx
 	mov	-8(%rsi), %rdx
@@ -2788,7 +2784,7 @@ L(write_15bytes):
 	mov	 %rdx, -8(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(write_7bytes):
 	mov	-7(%rsi), %edx
 	mov	-4(%rsi), %ecx
@@ -2796,7 +2792,7 @@ L(write_7bytes):
 	mov	 %ecx, -4(%rdi)
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(large_page_fwd):
 	movdqu	(%rsi), %xmm1
 	lea	16(%rsi), %rsi
@@ -2859,7 +2855,7 @@ L(large_page_less_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
 #ifdef USE_AS_MEMMOVE
-	ALIGN (4)
+	.p2align 4
 L(ll_cache_copy_fwd_start):
 	prefetcht0 0x1c0(%rsi)
 	prefetcht0 0x200(%rsi)
@@ -2906,7 +2902,7 @@ L(large_page_ll_less_fwd_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
 #endif
-	ALIGN (4)
+	.p2align 4
 L(large_page_bwd):
 	movdqu	-0x10(%rsi), %xmm1
 	lea	-16(%rsi), %rsi
@@ -2966,7 +2962,7 @@ L(large_page_less_bwd_64bytes):
 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_80bytes), %rdx, 4)
 
 #ifdef USE_AS_MEMMOVE
-	ALIGN (4)
+	.p2align 4
 L(ll_cache_copy_bwd_start):
 	prefetcht0 -0x1c0(%rsi)
 	prefetcht0 -0x200(%rsi)
@@ -3014,7 +3010,7 @@ L(large_page_ll_less_bwd_64bytes):
 END (MEMCPY)
 
 	.section .rodata.ssse3,"a",@progbits
-	ALIGN (3)
+	.p2align 3
 L(table_less_80bytes):
 	.int	JMPTBL (L(write_0bytes), L(table_less_80bytes))
 	.int	JMPTBL (L(write_1bytes), L(table_less_80bytes))
@@ -3097,7 +3093,7 @@ L(table_less_80bytes):
 	.int	JMPTBL (L(write_78bytes), L(table_less_80bytes))
 	.int	JMPTBL (L(write_79bytes), L(table_less_80bytes))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table):
 	.int	JMPTBL (L(shl_0), L(shl_table))
 	.int	JMPTBL (L(shl_1), L(shl_table))
@@ -3116,7 +3112,7 @@ L(shl_table):
 	.int	JMPTBL (L(shl_14), L(shl_table))
 	.int	JMPTBL (L(shl_15), L(shl_table))
 
-	ALIGN (3)
+	.p2align 3
 L(shl_table_bwd):
 	.int	JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
 	.int	JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
diff --git a/libc/sysdeps/x86_64/multiarch/strchr.S b/libc/sysdeps/x86_64/multiarch/strchr.S
index f170238b5..3f0b2c5f5 100644
--- a/libc/sysdeps/x86_64/multiarch/strchr.S
+++ b/libc/sysdeps/x86_64/multiarch/strchr.S
@@ -29,12 +29,6 @@ ENTRY(strchr)
 	jne	1f
 	call	__init_cpu_features
 1:	leaq	__strchr_sse2(%rip), %rax
-	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
-	jnz	2f
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jz	2f
-	leaq	__strchr_sse42(%rip), %rax
-	ret
 2:	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
 	jz	3f
 	leaq    __strchr_sse2_no_bsf(%rip), %rax
@@ -42,127 +36,6 @@ ENTRY(strchr)
 END(strchr)
 
 
-/*
-   This implementation uses SSE4 instructions to compare up to 16 bytes
-   at a time looking for the first occurrence of the character c in the
-   string s:
-
-   char *strchr (const char *s, int c);
-
-   We use 0xa:
-	_SIDD_SBYTE_OPS
-	| _SIDD_CMP_EQUAL_EACH
-	| _SIDD_LEAST_SIGNIFICANT
-   on pcmpistri to compare xmm/mem128
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   X X X X X X X X X X X X X X X X
-
-   against xmm
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   C C C C C C C C C C C C C C C C
-
-   to find out if the first 16byte data element has a byte C and the
-   offset of the first byte.  There are 3 cases:
-
-   1. The first 16byte data element has the byte C at the offset X.
-   2. The first 16byte data element has EOS and doesn't have the byte C.
-   3. The first 16byte data element is valid and doesn't have the byte C.
-
-   Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
-
-   case		ECX	CFlag	ZFlag	SFlag
-    1		 X	  1	 0/1	  0
-    2		16	  0	  1	  0
-    3		16	  0	  0	  0
-
-   We exit from the loop for cases 1 and 2 with jbe which branches
-   when either CFlag or ZFlag is 1.  If CFlag == 1, ECX has the offset
-   X for case 1.  */
-
-	.section .text.sse4.2,"ax",@progbits
-	.align	16
-	.type	__strchr_sse42, @function
-	.globl	__strchr_sse42
-	.hidden	__strchr_sse42
-__strchr_sse42:
-	cfi_startproc
-	CALL_MCOUNT
-	testb	%sil, %sil
-	je	__strend_sse4
-	pxor	%xmm2, %xmm2
-	movd	%esi, %xmm1
-	movl	%edi, %ecx
-	pshufb  %xmm2, %xmm1
-	andl	$15, %ecx
-	movq	%rdi, %r8
-	je	L(aligned_start)
-
-/* Handle unaligned string.  */
-	andq	$-16, %r8
-	movdqa	(%r8), %xmm0
-	pcmpeqb	 %xmm0, %xmm2
-	pcmpeqb	 %xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %edx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %esi
-	/* Remove the leading  bytes.  */
-	sarl	%cl, %edx
-	sarl	%cl, %esi
-	testl	%esi, %esi
-	je	L(unaligned_no_match)
-	/* Check which byte is a match.  */
-	bsfl	%esi, %eax
-	/* Is there a NULL? */
-	testl	%edx, %edx
-	je      L(unaligned_match)
-	bsfl	%edx, %esi
-	cmpl	%esi, %eax
-	/* Return NULL if NULL comes first.  */
-	ja	L(return_null)
-L(unaligned_match):
-	addq	%rdi, %rax
-	ret
-
-	.p2align 4
-L(unaligned_no_match):
-	testl	%edx, %edx
-	jne	L(return_null)
-
-/* Loop start on aligned string.  */
-L(loop):
-	addq	$16, %r8
-L(aligned_start):
-	pcmpistri	$0x2, (%r8), %xmm1
-	jbe	L(wrap)
-	addq	$16, %r8
-	pcmpistri	$0x2, (%r8), %xmm1
-	jbe	L(wrap)
-	addq	$16, %r8
-	pcmpistri       $0x2, (%r8), %xmm1
-	jbe     L(wrap)
-	addq	$16, %r8
-	pcmpistri	$0x2, (%r8), %xmm1
-	jbe	L(wrap)
-	jmp	L(loop)
-L(wrap):
-	jc	L(loop_exit)
-
-/* Return NULL.  */
-L(return_null):
-	xorl	%eax, %eax
-	ret
-
-/* Loop exit.  */
-	.p2align 4
-L(loop_exit):
-	leaq	(%r8,%rcx), %rax
-	ret
-	cfi_endproc
-	.size	__strchr_sse42, .-__strchr_sse42
-
 
 # undef ENTRY
 # define ENTRY(name) \
diff --git a/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
index eed843297..4a8e57a24 100644
--- a/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
+++ b/libc/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
@@ -17,7 +17,6 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "sysdep.h"
-#define ALIGN(x)	.p2align x
 
 ENTRY ( __strcmp_sse2_unaligned)
 	movl	%edi, %eax
@@ -43,7 +42,7 @@ L(return):
 	subl	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(next_48_bytes):
 	movdqu	16(%rdi), %xmm6
 	movdqu	16(%rsi), %xmm3
@@ -85,7 +84,7 @@ L(main_loop_header):
 	movq	%rcx, %rsi
 	jmp	L(loop_start)
 
-	ALIGN	(4)
+	.p2align 4
 L(loop):
 	addq	$64, %rax
 	addq	$64, %rdx
@@ -141,7 +140,7 @@ L(back_to_loop):
 	subl	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(loop_cross_page):
 	xor	%r10, %r10
 	movq	%rdx, %r9
@@ -191,7 +190,7 @@ L(loop_cross_page):
 	subl	%edx, %eax
 	ret
 
-	ALIGN (4)
+	.p2align 4
 L(cross_page_loop):
 	cmpb	%cl, %al
 	jne	L(different)
diff --git a/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S
deleted file mode 100644
index fcef610db..000000000
--- a/libc/sysdeps/x86_64/multiarch/strrchr-sse2-no-bsf.S
+++ /dev/null
@@ -1,555 +0,0 @@
-/* strrchr with SSE2 without bsf and bsr
-   Copyright (C) 2011-2013 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if defined SHARED && !defined NOT_IN_libc
-
-# include <sysdep.h>
-# include "asm-syntax.h"
-
-	atom_text_section
-ENTRY (__strrchr_sse2_no_bsf)
-
-	movd	%rsi, %xmm1
-	pxor	%xmm2, %xmm2
-	mov	%rdi, %rcx
-	punpcklbw %xmm1, %xmm1
-	punpcklbw %xmm1, %xmm1
-	/* ECX has OFFSET. */
-	and	$63, %rcx
-	cmp	$48, %rcx
-	pshufd	$0, %xmm1, %xmm1
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%rdi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm2, %rcx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %rax
-	add	$16, %rdi
-
-	test	%rax, %rax
-	jnz	L(unaligned_match1)
-
-	test	%rcx, %rcx
-	jnz	L(return_null)
-
-	and	$-16, %rdi
-	xor	%r8, %r8
-	jmp	L(loop)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%rcx, %rcx
-	jnz	L(prolog_find_zero_1)
-
-	mov	%rax, %r8
-	mov	%rdi, %rsi
-	and	$-16, %rdi
-	jmp	L(loop)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %rcx
-	and	$-16, %rdi
-	pxor	%xmm3, %xmm3
-	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	/* Find where NULL is.  */
-	pmovmskb %xmm3, %rdx
-	/* Check if there is a match.  */
-	pmovmskb %xmm0, %rax
-	/* Remove the leading bytes.  */
-	shr	%cl, %rdx
-	shr	%cl, %rax
-	add	$16, %rdi
-
-	test	%rax, %rax
-	jnz	L(unaligned_match)
-
-	test	%rdx, %rdx
-	jnz	L(return_null)
-
-	xor	%r8, %r8
-	jmp	L(loop)
-
-	.p2align 4
-L(unaligned_match):
-	test	%rdx, %rdx
-	jnz	L(prolog_find_zero)
-
-	mov	%rax, %r8
-	lea	(%rdi, %rcx), %rsi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	or	%rax, %rcx
-	jnz	L(matches)
-
-	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm0, %xmm2
-	add	$16, %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pmovmskb %xmm2, %rcx
-	pmovmskb %xmm0, %rax
-	or	%rax, %rcx
-	jz	L(loop)
-
-L(matches):
-	test	%rax, %rax
-	jnz	L(match)
-L(return_value):
-	test	%r8, %r8
-	jz	L(return_null)
-	mov	%r8, %rax
-	mov	%rsi, %rdi
-	jmp	L(match_exit)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %rcx
-	test	%rcx, %rcx
-	jnz	L(find_zero)
-	mov	%rax, %r8
-	mov	%rdi, %rsi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(find_zero_8)
-	test	$0x01, %cl
-	jnz	L(FindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(FindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(FindZeroExit3)
-	and	$1 << 4 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(find_zero_8):
-	test	$0x10, %cl
-	jnz	L(FindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(FindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(FindZeroExit7)
-	and	$1 << 8 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(FindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(FindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(FindZeroExit11)
-	and	$1 << 12 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(FindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(FindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(FindZeroExit15)
-	and	$1 << 16 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit1):
-	and	$1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit2):
-	and	$1 << 2 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit3):
-	and	$1 << 3 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit5):
-	and	$1 << 5 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit6):
-	and	$1 << 6 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit7):
-	and	$1 << 7 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit9):
-	and	$1 << 9 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit10):
-	and	$1 << 10 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit11):
-	and	$1 << 11 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit13):
-	and	$1 << 13 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit14):
-	and	$1 << 14 - 1, %rax
-	jz	L(return_value)
-	jmp	L(match_exit)
-
-	.p2align 4
-L(FindZeroExit15):
-	and	$1 << 15 - 1, %rax
-	jz	L(return_value)
-
-	.p2align 4
-L(match_exit):
-	test	%ah, %ah
-	jnz	L(match_exit_high)
-	mov	%al, %dl
-	and	$15 << 4, %dl
-	jnz	L(match_exit_8)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	lea	-16(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_exit_8):
-	test	$0x80, %al
-	jnz	L(Exit8)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	lea	-12(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_exit_high):
-	mov	%ah, %dh
-	and	$15 << 4, %dh
-	jnz	L(match_exit_high_8)
-	test	$0x08, %ah
-	jnz	L(Exit12)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	lea	-8(%rdi), %rax
-	ret
-
-	.p2align 4
-L(match_exit_high_8):
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	lea	-4(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit2):
-	lea	-15(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit3):
-	lea	-14(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit4):
-	lea	-13(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit6):
-	lea	-11(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit7):
-	lea	-10(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit8):
-	lea	-9(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit10):
-	lea	-7(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit11):
-	lea	-6(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit12):
-	lea	-5(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit14):
-	lea	-3(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit15):
-	lea	-2(%rdi), %rax
-	ret
-
-	.p2align 4
-L(Exit16):
-	lea	-1(%rdi), %rax
-	ret
-
-/* Return NULL.  */
-	.p2align 4
-L(return_null):
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%rcx, %rdi
-	mov     %rdx, %rcx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_high)
-	mov	%cl, %dl
-	and	$15, %dl
-	jz	L(prolog_find_zero_8)
-	test	$0x01, %cl
-	jnz	L(PrologFindZeroExit1)
-	test	$0x02, %cl
-	jnz	L(PrologFindZeroExit2)
-	test	$0x04, %cl
-	jnz	L(PrologFindZeroExit3)
-	and	$1 << 4 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_8):
-	test	$0x10, %cl
-	jnz	L(PrologFindZeroExit5)
-	test	$0x20, %cl
-	jnz	L(PrologFindZeroExit6)
-	test	$0x40, %cl
-	jnz	L(PrologFindZeroExit7)
-	and	$1 << 8 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_high):
-	mov	%ch, %dh
-	and	$15, %dh
-	jz	L(prolog_find_zero_high_8)
-	test	$0x01, %ch
-	jnz	L(PrologFindZeroExit9)
-	test	$0x02, %ch
-	jnz	L(PrologFindZeroExit10)
-	test	$0x04, %ch
-	jnz	L(PrologFindZeroExit11)
-	and	$1 << 12 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(prolog_find_zero_high_8):
-	test	$0x10, %ch
-	jnz	L(PrologFindZeroExit13)
-	test	$0x20, %ch
-	jnz	L(PrologFindZeroExit14)
-	test	$0x40, %ch
-	jnz	L(PrologFindZeroExit15)
-	and	$1 << 16 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit1):
-	and	$1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit2):
-	and	$1 << 2 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit3):
-	and	$1 << 3 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit5):
-	and	$1 << 5 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit6):
-	and	$1 << 6 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit7):
-	and	$1 << 7 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit9):
-	and	$1 << 9 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit10):
-	and	$1 << 10 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit11):
-	and	$1 << 11 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit13):
-	and	$1 << 13 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit14):
-	and	$1 << 14 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-	.p2align 4
-L(PrologFindZeroExit15):
-	and	$1 << 15 - 1, %rax
-	jnz	L(match_exit)
-	xor	%rax, %rax
-	ret
-
-END (__strrchr_sse2_no_bsf)
-#endif
diff --git a/libc/sysdeps/x86_64/multiarch/strrchr.S b/libc/sysdeps/x86_64/multiarch/strrchr.S
deleted file mode 100644
index 3f92a41ef..000000000
--- a/libc/sysdeps/x86_64/multiarch/strrchr.S
+++ /dev/null
@@ -1,288 +0,0 @@
-/* Multiple versions of strrchr
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc and for
-   the DSO.  In static binaries we need strrchr before the initialization
-   happened.  */
-#if defined SHARED && !defined NOT_IN_libc
-	.text
-ENTRY(strrchr)
-	.type	strrchr, @gnu_indirect_function
-	cmpl	$0, __cpu_features+KIND_OFFSET(%rip)
-	jne	1f
-	call	__init_cpu_features
-1:	leaq	__strrchr_sse2(%rip), %rax
-	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
-	jnz	2f
-	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
-	jz	2f
-	leaq	__strrchr_sse42(%rip), %rax
-	ret
-2:	testl	$bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
-	jz	3f
-	leaq    __strrchr_sse2_no_bsf(%rip), %rax
-3:	ret
-END(strrchr)
-
-/*
-   This implementation uses SSE4 instructions to compare up to 16 bytes
-   at a time looking for the last occurrence of the character c in the
-   string s:
-
-   char *strrchr (const char *s, int c);
-
-   We use 0x4a:
-	_SIDD_SBYTE_OPS
-	| _SIDD_CMP_EQUAL_EACH
-	| _SIDD_MOST_SIGNIFICANT
-   on pcmpistri to compare xmm/mem128
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   X X X X X X X X X X X X X X X X
-
-   against xmm
-
-   0 1 2 3 4 5 6 7 8 9 A B C D E F
-   C C C C C C C C C C C C C C C C
-
-   to find out if the first 16byte data element has a byte C and the
-   last offset.  There are 4 cases:
-
-   1. The first 16byte data element has EOS and has the byte C at the
-      last offset X.
-   2. The first 16byte data element is valid and has the byte C at the
-      last offset X.
-   3. The first 16byte data element has EOS and doesn't have the byte C.
-   4. The first 16byte data element is valid and doesn't have the byte C.
-
-   Here is the table of ECX, CFlag, ZFlag and SFlag for 3 cases:
-
-   case		ECX	CFlag	ZFlag	SFlag
-    1		 X	  1	  1	  0
-    2		 X	  1	  0	  0
-    3		16	  0	  1	  0
-    4		16	  0	  0	  0
-
-   We exit from the loop for cases 1 and 3 with jz which branches
-   when ZFlag is 1.  If CFlag == 1, ECX has the offset X for case 1.  */
-
-
-	.section .text.sse4.2,"ax",@progbits
-	.align	16
-	.type	__strrchr_sse42, @function
-	.globl	__strrchr_sse42
-	.hidden	__strrchr_sse42
-__strrchr_sse42:
-	cfi_startproc
-	CALL_MCOUNT
-	testb	%sil, %sil
-	je	__strend_sse4
-	xor	%eax,%eax	/* RAX has the last occurrence of s.  */
-	movd	%esi, %xmm1
-	punpcklbw	%xmm1, %xmm1
-	movl	%edi, %esi
-	punpcklbw	%xmm1, %xmm1
-	andl	$15, %esi
-	pshufd	$0, %xmm1, %xmm1
-	movq	%rdi, %r8
-	je	L(loop)
-
-/* Handle unaligned string using psrldq.  */
-	leaq	L(psrldq_table)(%rip), %rdx
-	andq	$-16, %r8
-	movslq	(%rdx,%rsi,4),%r9
-	movdqa	(%r8), %xmm0
-	addq	%rdx, %r9
-	jmp	*%r9
-
-/* Handle unaligned string with offset 1 using psrldq.  */
-	.p2align 4
-L(psrldq_1):
-	psrldq	$1, %xmm0
-
-	.p2align 4
-L(unaligned_pcmpistri):
-	pcmpistri	$0x4a, %xmm1, %xmm0
-	jnc	L(unaligned_no_byte)
-	leaq	(%rdi,%rcx), %rax
-L(unaligned_no_byte):
-	/* Find the length of the unaligned string.  */
-	pcmpistri	$0x3a, %xmm0, %xmm0
-	movl	$16, %edx
-	subl	%esi, %edx
-	cmpl	%ecx, %edx
-	/* Return RAX if the unaligned fragment to next 16B already
-	   contain the NULL terminator.  */
-	jg	L(exit)
-	addq	$16, %r8
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	pcmpistri	$0x4a, (%r8), %xmm1
-	jbe	L(match_or_eos)
-	addq	$16, %r8
-	jmp	L(loop)
-	.p2align 4
-L(match_or_eos):
-	je	L(had_eos)
-L(match_no_eos):
-	leaq	(%r8,%rcx), %rax
-	addq	$16, %r8
-	jmp     L(loop)
-	.p2align 4
-L(had_eos):
-	jnc     L(exit)
-	leaq	(%r8,%rcx), %rax
-	.p2align 4
-L(exit):
-	ret
-
-/* Handle unaligned string with offset 15 using psrldq.  */
-	.p2align 4
-L(psrldq_15):
-	psrldq	$15, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 14 using psrldq.  */
-	.p2align 4
-L(psrldq_14):
-	psrldq	$14, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 13 using psrldq.  */
-	.p2align 4
-L(psrldq_13):
-	psrldq	$13, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 12 using psrldq.  */
-	.p2align 4
-L(psrldq_12):
-	psrldq	$12, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 11 using psrldq.  */
-	.p2align 4
-L(psrldq_11):
-	psrldq	$11, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 10 using psrldq.  */
-	.p2align 4
-L(psrldq_10):
-	psrldq	$10, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 9 using psrldq.  */
-	.p2align 4
-L(psrldq_9):
-	psrldq	$9, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 8 using psrldq.  */
-	.p2align 4
-L(psrldq_8):
-	psrldq	$8, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 7 using psrldq.  */
-	.p2align 4
-L(psrldq_7):
-	psrldq	$7, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 6 using psrldq.  */
-	.p2align 4
-L(psrldq_6):
-	psrldq	$6, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 5 using psrldq.  */
-	.p2align 4
-L(psrldq_5):
-	psrldq	$5, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 4 using psrldq.  */
-	.p2align 4
-L(psrldq_4):
-	psrldq	$4, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 3 using psrldq.  */
-	.p2align 4
-L(psrldq_3):
-	psrldq	$3, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-/* Handle unaligned string with offset 2 using psrldq.  */
-	.p2align 4
-L(psrldq_2):
-	psrldq	$2, %xmm0
-	jmp	L(unaligned_pcmpistri)
-
-	cfi_endproc
-	.size	__strrchr_sse42, .-__strrchr_sse42
-
-	.section .rodata.sse4.2,"a",@progbits
-	.p2align 4
-L(psrldq_table):
-	.int	L(loop) - L(psrldq_table)
-	.int	L(psrldq_1) - L(psrldq_table)
-	.int	L(psrldq_2) - L(psrldq_table)
-	.int	L(psrldq_3) - L(psrldq_table)
-	.int	L(psrldq_4) - L(psrldq_table)
-	.int	L(psrldq_5) - L(psrldq_table)
-	.int	L(psrldq_6) - L(psrldq_table)
-	.int	L(psrldq_7) - L(psrldq_table)
-	.int	L(psrldq_8) - L(psrldq_table)
-	.int	L(psrldq_9) - L(psrldq_table)
-	.int	L(psrldq_10) - L(psrldq_table)
-	.int	L(psrldq_11) - L(psrldq_table)
-	.int	L(psrldq_12) - L(psrldq_table)
-	.int	L(psrldq_13) - L(psrldq_table)
-	.int	L(psrldq_14) - L(psrldq_table)
-	.int	L(psrldq_15) - L(psrldq_table)
-
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strrchr_sse2, @function; \
-	.align 16; \
-	.globl __strrchr_sse2; \
-	.hidden __strrchr_sse2; \
-	__strrchr_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strrchr_sse2, .-__strrchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strrchr calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strrchr; __GI_strrchr = __strrchr_sse2
-#endif
-
-#include "../strrchr.S"
diff --git a/libc/sysdeps/x86_64/stackguard-macros.h b/libc/sysdeps/x86_64/stackguard-macros.h
index d7fedb373..1948800cd 100644
--- a/libc/sysdeps/x86_64/stackguard-macros.h
+++ b/libc/sysdeps/x86_64/stackguard-macros.h
@@ -4,3 +4,8 @@
   ({ uintptr_t x;						\
      asm ("mov %%fs:%c1, %0" : "=r" (x)				\
 	  : "i" (offsetof (tcbhead_t, stack_guard))); x; })
+
+#define POINTER_CHK_GUARD \
+  ({ uintptr_t x;						\
+     asm ("mov %%fs:%c1, %0" : "=r" (x)				\
+	  : "i" (offsetof (tcbhead_t, pointer_guard))); x; })
diff --git a/libc/sysdeps/x86_64/strchr.S b/libc/sysdeps/x86_64/strchr.S
index d89f1eba8..7440500a6 100644
--- a/libc/sysdeps/x86_64/strchr.S
+++ b/libc/sysdeps/x86_64/strchr.S
@@ -19,51 +19,169 @@
 
 #include <sysdep.h>
 
-
 	.text
 ENTRY (strchr)
 	movd	%esi, %xmm1
-	movq	%rdi, %rcx
-	punpcklbw %xmm1, %xmm1
-	andq	$~15, %rdi
-	pxor	%xmm2, %xmm2
+	movl	%edi, %eax
+	andl	$4095, %eax
 	punpcklbw %xmm1, %xmm1
-	orl	$0xffffffff, %esi
-	movdqa	(%rdi), %xmm0
+	cmpl	$4032, %eax
+	punpcklwd %xmm1, %xmm1
 	pshufd	$0, %xmm1, %xmm1
-	subq	%rdi, %rcx
-	movdqa	%xmm0, %xmm3
-	leaq	16(%rdi), %rdi
+	jg	L(cross_page)
+	movdqu	(%rdi), %xmm0
+	pxor	%xmm3, %xmm3
+	movdqa	%xmm0, %xmm4
 	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	andl	%esi, %edx
-	andl	%esi, %ecx
-	orl	%edx, %ecx
-	jnz	1f
+	pcmpeqb	%xmm3, %xmm4
+	por	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	je	L(next_48_bytes)
+	bsf	%eax, %eax
+#ifdef AS_STRCHRNUL
+	leaq	(%rdi,%rax), %rax
+#else
+	movl	$0, %edx
+	leaq	(%rdi,%rax), %rax
+	cmpb	%sil, (%rax)
+	cmovne	%rdx, %rax
+#endif
+	ret
 
-2:	movdqa	(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
-	movdqa	%xmm0, %xmm3
+	.p2align 3
+	L(next_48_bytes):
+	movdqu	16(%rdi), %xmm0
+	movdqa	%xmm0, %xmm4
 	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	orl	%edx, %ecx
-	jz	2b
+	pcmpeqb	%xmm3, %xmm4
+	por	%xmm4, %xmm0
+	pmovmskb %xmm0, %ecx
+	movdqu	32(%rdi), %xmm0
+	movdqa	%xmm0, %xmm4
+	pcmpeqb	%xmm1, %xmm0
+	salq	$16, %rcx
+	pcmpeqb	%xmm3, %xmm4
+	por	%xmm4, %xmm0
+	pmovmskb %xmm0, %eax
+	movdqu	48(%rdi), %xmm0
+	pcmpeqb	%xmm0, %xmm3
+	salq	$32, %rax
+	pcmpeqb	%xmm1, %xmm0
+	orq	%rcx, %rax
+	por	%xmm3, %xmm0
+	pmovmskb %xmm0, %ecx
+	salq	$48, %rcx
+	orq	%rcx, %rax
+	testq	%rax, %rax
+	jne	L(return)
+L(loop_start):
+	/* We use this alignment to force loop be aligned to 8 but not
+	   16 bytes.  This gives better sheduling on AMD processors.  */
+	.p2align 4
+	pxor	%xmm6, %xmm6
+	andq	$-64, %rdi
+	.p2align 3
+L(loop64):
+	addq	$64, %rdi
+	movdqa	(%rdi), %xmm5
+	movdqa	16(%rdi), %xmm2
+	movdqa	32(%rdi), %xmm3
+	pxor	%xmm1, %xmm5
+	movdqa	48(%rdi), %xmm4
+	pxor	%xmm1, %xmm2
+	pxor	%xmm1, %xmm3
+	pminub	(%rdi), %xmm5
+	pxor	%xmm1, %xmm4
+	pminub	16(%rdi), %xmm2
+	pminub	32(%rdi), %xmm3
+	pminub	%xmm2, %xmm5
+	pminub	48(%rdi), %xmm4
+	pminub	%xmm3, %xmm5
+	pminub	%xmm4, %xmm5
+	pcmpeqb %xmm6, %xmm5
+	pmovmskb %xmm5, %eax
+
+	testl	%eax, %eax
+	je	L(loop64)
+
+	movdqa	(%rdi), %xmm5
+	movdqa	%xmm5, %xmm0
+	pcmpeqb	%xmm1, %xmm5
+	pcmpeqb	%xmm6, %xmm0
+	por	%xmm0, %xmm5
+	pcmpeqb %xmm6, %xmm2
+	pcmpeqb %xmm6, %xmm3
+	pcmpeqb %xmm6, %xmm4
+
+	pmovmskb %xmm5, %ecx
+	pmovmskb %xmm2, %eax
+	salq	$16, %rax
+	pmovmskb %xmm3, %r8d
+	pmovmskb %xmm4, %edx
+	salq	$32, %r8
+	orq	%r8, %rax
+	orq	%rcx, %rax
+	salq	$48, %rdx
+	orq	%rdx, %rax
+	.p2align 3
+L(return):
+	bsfq	%rax, %rax
+#ifdef AS_STRCHRNUL
+	leaq	(%rdi,%rax), %rax
+#else
+	movl	$0, %edx
+	leaq	(%rdi,%rax), %rax
+	cmpb	%sil, (%rax)
+	cmovne	%rdx, %rax
+#endif
+	ret
+	.p2align 4
+
+L(cross_page):
+	movq	%rdi, %rdx
+	pxor	%xmm2, %xmm2
+	andq	$-64, %rdx
+	movdqa	%xmm1, %xmm0
+	movdqa	(%rdx), %xmm3
+	movdqa	%xmm3, %xmm4
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm2, %xmm4
+	por	%xmm4, %xmm3
+	pmovmskb %xmm3, %r8d
+	movdqa	16(%rdx), %xmm3
+	movdqa	%xmm3, %xmm4
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm2, %xmm4
+	por	%xmm4, %xmm3
+	pmovmskb %xmm3, %eax
+	movdqa	32(%rdx), %xmm3
+	movdqa	%xmm3, %xmm4
+	pcmpeqb	%xmm1, %xmm3
+	salq	$16, %rax
+	pcmpeqb	%xmm2, %xmm4
+	por	%xmm4, %xmm3
+	pmovmskb %xmm3, %r9d
+	movdqa	48(%rdx), %xmm3
+	pcmpeqb	%xmm3, %xmm2
+	salq	$32, %r9
+	pcmpeqb	%xmm3, %xmm0
+	orq	%r9, %rax
+	orq	%r8, %rax
+	por	%xmm2, %xmm0
+	pmovmskb %xmm0, %ecx
+	salq	$48, %rcx
+	orq	%rcx, %rax
+	movl	%edi, %ecx
+	subb	%dl, %cl
+	shrq	%cl, %rax
+	testq	%rax, %rax
+	jne	L(return)
+	jmp	L(loop_start)
 
-1:	bsfl	%edx, %edx
-	jz	4f
-	bsfl	%ecx, %ecx
-	leaq	-16(%rdi,%rdx), %rax
-	cmpl	%edx, %ecx
-	je	5f
-4:	xorl	%eax, %eax
-5:	ret
 END (strchr)
 
+#ifndef AS_STRCHRNUL
 weak_alias (strchr, index)
 libc_hidden_builtin_def (strchr)
-
+#endif
diff --git a/libc/sysdeps/x86_64/strchrnul.S b/libc/sysdeps/x86_64/strchrnul.S
index d8c345ba7..bceeb6187 100644
--- a/libc/sysdeps/x86_64/strchrnul.S
+++ b/libc/sysdeps/x86_64/strchrnul.S
@@ -20,43 +20,8 @@
 
 #include <sysdep.h>
 
-
-	.text
-ENTRY (__strchrnul)
-	movd	%esi, %xmm1
-	movq	%rdi, %rcx
-	punpcklbw %xmm1, %xmm1
-	andq	$~15, %rdi
-	pxor	%xmm2, %xmm2
-	punpcklbw %xmm1, %xmm1
-	orl	$0xffffffff, %esi
-	movdqa	(%rdi), %xmm0
-	pshufd	$0, %xmm1, %xmm1
-	subq	%rdi, %rcx
-	movdqa	%xmm0, %xmm3
-	leaq	16(%rdi), %rdi
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	orl	%edx, %ecx
-	andl	%esi, %ecx
-	jnz	1f
-
-2:	movdqa	(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
-	movdqa	%xmm0, %xmm3
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	orl	%edx, %ecx
-	jz	2b
-
-1:	bsfl	%ecx, %edx
-	leaq	-16(%rdi,%rdx), %rax
-	ret
-END (__strchrnul)
+#define strchr __strchrnul
+#define AS_STRCHRNUL
+#include "strchr.S"
 
 weak_alias (__strchrnul, strchrnul)
diff --git a/libc/sysdeps/x86_64/strrchr.S b/libc/sysdeps/x86_64/strrchr.S
index e413b0743..2a07ff75a 100644
--- a/libc/sysdeps/x86_64/strrchr.S
+++ b/libc/sysdeps/x86_64/strrchr.S
@@ -1,6 +1,5 @@
 /* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
-   For AMD x86-64.
-   Copyright (C) 2009-2013 Free Software Foundation, Inc.
+   Copyright (C) 2013 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,63 +16,212 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
 
+#include <sysdep.h>
 
 	.text
 ENTRY (strrchr)
 	movd	%esi, %xmm1
-	movq	%rdi, %rcx
-	punpcklbw %xmm1, %xmm1
-	andq	$~15, %rdi
-	pxor	%xmm2, %xmm2
-	punpcklbw %xmm1, %xmm1
-	orl	$0xffffffff, %esi
-	movdqa	(%rdi), %xmm0
+	movq	%rdi, %rax
+	andl	$4095, %eax
+	punpcklbw	%xmm1, %xmm1
+	cmpq	$4032, %rax
+	punpcklwd	%xmm1, %xmm1
 	pshufd	$0, %xmm1, %xmm1
-	subq	%rdi, %rcx
+	ja	L(cross_page)
+	movdqu	(%rdi), %xmm0
+	pxor	%xmm2, %xmm2
 	movdqa	%xmm0, %xmm3
-	leaq	16(%rdi), %rdi
 	pcmpeqb	%xmm1, %xmm0
 	pcmpeqb	%xmm2, %xmm3
-	shl	%cl, %esi
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	andl	%esi, %edx
-	andl	%esi, %ecx
-	xorl	%eax, %eax
-	movl	%edx, %esi
-	orl	%ecx, %esi
-	jnz	1f
+	pmovmskb	%xmm0, %ecx
+	pmovmskb	%xmm3, %edx
+	testq	%rdx, %rdx
+	je	L(next_48_bytes)
+	leaq	-1(%rdx), %rax
+	xorq	%rdx, %rax
+	andq	%rcx, %rax
+	je	L(exit)
+	bsrq	%rax, %rax
+	addq	%rdi, %rax
+	ret
 
-2:	movdqa	(%rdi), %xmm0
-	leaq	16(%rdi), %rdi
-	movdqa	%xmm0, %xmm3
+	.p2align 4
+L(next_48_bytes):
+	movdqu	16(%rdi), %xmm4
+	movdqa	%xmm4, %xmm5
+	movdqu	32(%rdi), %xmm3
+	pcmpeqb	%xmm1, %xmm4
+	pcmpeqb	%xmm2, %xmm5
+	movdqu	48(%rdi), %xmm0
+	pmovmskb	%xmm5, %edx
+	movdqa	%xmm3, %xmm5
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm2, %xmm5
+	pcmpeqb	%xmm0, %xmm2
+	salq	$16, %rdx
+	pmovmskb	%xmm3, %r8d
+	pmovmskb	%xmm5, %eax
+	pmovmskb	%xmm2, %esi
+	salq	$32, %r8
+	salq	$32, %rax
 	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm2, %xmm3
-	pmovmskb %xmm0, %edx
-	pmovmskb %xmm3, %ecx
-	movl	%edx, %esi
-	orl	%ecx, %esi
-	jz	2b
+	orq	%rdx, %rax
+	movq	%rsi, %rdx
+	pmovmskb	%xmm4, %esi
+	salq	$48, %rdx
+	salq	$16, %rsi
+	orq	%r8, %rsi
+	orq	%rcx, %rsi
+	pmovmskb	%xmm0, %ecx
+	salq	$48, %rcx
+	orq	%rcx, %rsi
+	orq	%rdx, %rax
+	je	L(loop_header2)
+	leaq	-1(%rax), %rcx
+	xorq	%rax, %rcx
+	andq	%rcx, %rsi
+	je	L(exit)
+	bsrq	%rsi, %rsi
+	leaq	(%rdi,%rsi), %rax
+	ret
 
-1:	bsfl	%ecx, %r9d
-	movl	$0xffffffff, %r8d
-	movl	$31, %ecx
-	jnz	5f
+	.p2align 4
+L(loop_header2):
+	testq	%rsi, %rsi
+	movq	%rdi, %rcx
+	je	L(no_c_found)
+L(loop_header):
+	addq	$64, %rdi
+	pxor	%xmm7, %xmm7
+	andq	$-64, %rdi
+	jmp	L(loop_entry)
+
+	.p2align 4
+L(loop64):
+	testq	%rdx, %rdx
+	cmovne	%rdx, %rsi
+	cmovne	%rdi, %rcx
+	addq	$64, %rdi
+L(loop_entry):
+	movdqa	32(%rdi), %xmm3
+	pxor	%xmm6, %xmm6
+	movdqa	48(%rdi), %xmm2
+	movdqa	%xmm3, %xmm0
+	movdqa	16(%rdi), %xmm4
+	pminub	%xmm2, %xmm0
+	movdqa	(%rdi), %xmm5
+	pminub	%xmm4, %xmm0
+	pminub	%xmm5, %xmm0
+	pcmpeqb	%xmm7, %xmm0
+	pmovmskb	%xmm0, %eax
+	movdqa	%xmm5, %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb	%xmm0, %r9d
+	movdqa	%xmm4, %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb	%xmm0, %edx
+	movdqa	%xmm3, %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	salq	$16, %rdx
+	pmovmskb	%xmm0, %r10d
+	movdqa	%xmm2, %xmm0
+	pcmpeqb	%xmm1, %xmm0
+	salq	$32, %r10
+	orq	%r10, %rdx
+	pmovmskb	%xmm0, %r8d
+	orq	%r9, %rdx
+	salq	$48, %r8
+	orq	%r8, %rdx
+	testl	%eax, %eax
+	je	L(loop64)
+	pcmpeqb	%xmm6, %xmm4
+	pcmpeqb	%xmm6, %xmm3
+	pcmpeqb	%xmm6, %xmm5
+	pmovmskb	%xmm4, %eax
+	pmovmskb	%xmm3, %r10d
+	pcmpeqb	%xmm6, %xmm2
+	pmovmskb	%xmm5, %r9d
+	salq	$32, %r10
+	salq	$16, %rax
+	pmovmskb	%xmm2, %r8d
+	orq	%r10, %rax
+	orq	%r9, %rax
+	salq	$48, %r8
+	orq	%r8, %rax
+	leaq	-1(%rax), %r8
+	xorq	%rax, %r8
+	andq	%r8, %rdx
+	cmovne	%rdi, %rcx
+	cmovne	%rdx, %rsi
+	bsrq	%rsi, %rsi
+	leaq	(%rcx,%rsi), %rax
+	ret
 
-	bsrl	%edx, %edx
-	jz	2b
-	leaq	-16(%rdi,%rdx), %rax
-	jmp	2b
+	.p2align 4
+L(no_c_found):
+	movl	$1, %esi
+	xorl	%ecx, %ecx
+	jmp	L(loop_header)
+
+	.p2align 4
+L(exit):
+	xorl	%eax, %eax
+	ret
 
-5:	subl	%r9d, %ecx
-	shrl	%cl, %r8d
-	andl	%r8d, %edx
-	bsrl	%edx, %edx
-	jz	4f
-	leaq	-16(%rdi,%rdx), %rax
-4:	ret
+	.p2align 4
+L(cross_page):
+	movq	%rdi, %rax
+	pxor	%xmm0, %xmm0
+	andq	$-64, %rax
+	movdqu	(%rax), %xmm5
+	movdqa	%xmm5, %xmm6
+	movdqu	16(%rax), %xmm4
+	pcmpeqb	%xmm1, %xmm5
+	pcmpeqb	%xmm0, %xmm6
+	movdqu	32(%rax), %xmm3
+	pmovmskb	%xmm6, %esi
+	movdqa	%xmm4, %xmm6
+	movdqu	48(%rax), %xmm2
+	pcmpeqb	%xmm1, %xmm4
+	pcmpeqb	%xmm0, %xmm6
+	pmovmskb	%xmm6, %edx
+	movdqa	%xmm3, %xmm6
+	pcmpeqb	%xmm1, %xmm3
+	pcmpeqb	%xmm0, %xmm6
+	pcmpeqb	%xmm2, %xmm0
+	salq	$16, %rdx
+	pmovmskb	%xmm3, %r9d
+	pmovmskb	%xmm6, %r8d
+	pmovmskb	%xmm0, %ecx
+	salq	$32, %r9
+	salq	$32, %r8
+	pcmpeqb	%xmm1, %xmm2
+	orq	%r8, %rdx
+	salq	$48, %rcx
+	pmovmskb	%xmm5, %r8d
+	orq	%rsi, %rdx
+	pmovmskb	%xmm4, %esi
+	orq	%rcx, %rdx
+	pmovmskb	%xmm2, %ecx
+	salq	$16, %rsi
+	salq	$48, %rcx
+	orq	%r9, %rsi
+	orq	%r8, %rsi
+	orq	%rcx, %rsi
+	movl	%edi, %ecx
+	subl	%eax, %ecx
+	shrq	%cl, %rdx
+	shrq	%cl, %rsi
+	testq	%rdx, %rdx
+	je	L(loop_header2)
+	leaq	-1(%rdx), %rax
+	xorq	%rdx, %rax
+	andq	%rax, %rsi
+	je	L(exit)
+	bsrq	%rsi, %rax
+	addq	%rdi, %rax
+	ret
 END (strrchr)
 
 weak_alias (strrchr, rindex)