diff options
Diffstat (limited to 'libc/sysdeps')
131 files changed, 1701 insertions, 5256 deletions
diff --git a/libc/sysdeps/generic/bp-sym.h b/libc/sysdeps/generic/bp-sym.h deleted file mode 100644 index 089912a68..000000000 --- a/libc/sysdeps/generic/bp-sym.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Bounded-pointer symbol modifier. - Copyright (C) 2000-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Greg McGary <greg@mcgary.org> - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define BP_SYM(name) _BP_SYM (name) -#if __BOUNDED_POINTERS__ -# define _BP_SYM(name) __BP_##name -#else -# define _BP_SYM(name) name -#endif diff --git a/libc/sysdeps/generic/ldsodefs.h b/libc/sysdeps/generic/ldsodefs.h index 1781574a9..b89691a97 100644 --- a/libc/sysdeps/generic/ldsodefs.h +++ b/libc/sysdeps/generic/ldsodefs.h @@ -1018,6 +1018,13 @@ extern struct link_map *_dl_find_dso_for_object (const ElfW(Addr) addr) internal_function; rtld_hidden_proto (_dl_find_dso_for_object) +/* Initialization which is normally done by the dynamic linker. */ +extern void _dl_non_dynamic_init (void) internal_function; + +/* Used by static binaries to check the auxiliary vector. */ +extern void _dl_aux_init (ElfW(auxv_t) *av) internal_function; + + __END_DECLS #endif /* ldsodefs.h */ diff --git a/libc/sysdeps/i386/fpu/libm-test-ulps b/libc/sysdeps/i386/fpu/libm-test-ulps index fd0180ffd..6186c99af 100644 --- a/libc/sysdeps/i386/fpu/libm-test-ulps +++ b/libc/sysdeps/i386/fpu/libm-test-ulps @@ -2475,6 +2475,9 @@ ldouble: 2 Test "j0 (0x1.d7ce3ap+107) == 2.775523647291230802651040996274861694514e-17": float: 1 ifloat: 1 +Test "j0 (0x1p16382) == -1.2193782500509000574176799046642541129387e-2466": +ildouble: 1 +ldouble: 1 Test "j0 (10.0) == -0.245935764451348335197760862485328754": double: 3 float: 1 @@ -2508,6 +2511,9 @@ ldouble: 1 Test "j1 (0x1.ff00000000002p+840) == 1.846591691699331493194965158699937660696e-127": double: 1 idouble: 1 +Test "j1 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467": +ildouble: 1 +ldouble: 1 Test "j1 (10.0) == 0.0434727461688614366697487680258592883": double: 2 float: 1 @@ -3285,6 +3291,9 @@ idouble: 1 Test "y0 (0x1p-80) == -3.5375500319532942168707373066828113573541e+1": double: 1 idouble: 1 +Test "y0 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467": +ildouble: 1 +ldouble: 1 Test "y0 (1.0) == 0.0882569642156769579829267660235151628": double: 2 float: 1 @@ -3329,6 +3338,9 @@ ldouble: 1 Test "y1 (0x1p-10) == -6.5190099301063115047395187618929589514382e+02": float: 1 ifloat: 1 +Test "y1 (0x1p16382) == 1.2193782500509000574176799046642541129387e-2466": +ildouble: 1 +ldouble: 1 Test "y1 (1.0) == -0.781212821300288716547150000047964821": double: 1 idouble: 1 diff --git a/libc/sysdeps/ieee754/bits/nan.h b/libc/sysdeps/ieee754/bits/nan.h index 935271a7c..41f47ba09 100644 --- a/libc/sysdeps/ieee754/bits/nan.h +++ b/libc/sysdeps/ieee754/bits/nan.h @@ -39,14 +39,14 @@ # include <endian.h> # if __BYTE_ORDER == __BIG_ENDIAN -# define __nan_bytes { 0x7f, 0xc0, 0, 0 } +# define __qnan_bytes { 0x7f, 0xc0, 0, 0 } # endif # if __BYTE_ORDER == __LITTLE_ENDIAN -# define __nan_bytes { 0, 0, 0xc0, 0x7f } +# define __qnan_bytes { 0, 0, 0xc0, 0x7f } # endif -static union { unsigned char __c[4]; float __d; } __nan_union - __attribute__ ((__unused__)) = { __nan_bytes }; -# define NAN (__nan_union.__d) +static union { unsigned char __c[4]; float __d; } __qnan_union + __attribute__ ((__unused__)) = { __qnan_bytes }; +# define NAN (__qnan_union.__d) #endif /* GCC. */ diff --git a/libc/sysdeps/ieee754/dbl-64/e_j0.c b/libc/sysdeps/ieee754/dbl-64/e_j0.c index f393a762b..d641a0914 100644 --- a/libc/sysdeps/ieee754/dbl-64/e_j0.c +++ b/libc/sysdeps/ieee754/dbl-64/e_j0.c @@ -293,7 +293,8 @@ pzero(double x) int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; - if(ix>=0x40200000) {p = pR8; q= pS8;} + if (ix>=0x41b00000) {return one;} + else if(ix>=0x40200000){p = pR8; q= pS8;} else if(ix>=0x40122E8B){p = pR5; q= pS5;} else if(ix>=0x4006DB6D){p = pR3; q= pS3;} else if(ix>=0x40000000){p = pR2; q= pS2;} @@ -400,7 +401,8 @@ qzero(double x) int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; - if(ix>=0x40200000) {p = qR8; q= qS8;} + if (ix>=0x41b00000) {return -.125/x;} + else if(ix>=0x40200000){p = qR8; q= qS8;} else if(ix>=0x40122E8B){p = qR5; q= qS5;} else if(ix>=0x4006DB6D){p = qR3; q= qS3;} else if(ix>=0x40000000){p = qR2; q= qS2;} diff --git a/libc/sysdeps/ieee754/dbl-64/e_j1.c b/libc/sysdeps/ieee754/dbl-64/e_j1.c index cba4d46b1..cca5f20b4 100644 --- a/libc/sysdeps/ieee754/dbl-64/e_j1.c +++ b/libc/sysdeps/ieee754/dbl-64/e_j1.c @@ -291,7 +291,8 @@ pone(double x) int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; - if(ix>=0x40200000) {p = pr8; q= ps8;} + if (ix>=0x41b00000) {return one;} + else if(ix>=0x40200000){p = pr8; q= ps8;} else if(ix>=0x40122E8B){p = pr5; q= ps5;} else if(ix>=0x4006DB6D){p = pr3; q= ps3;} else if(ix>=0x40000000){p = pr2; q= ps2;} @@ -399,7 +400,8 @@ qone(double x) int32_t ix; GET_HIGH_WORD(ix,x); ix &= 0x7fffffff; - if(ix>=0x40200000) {p = qr8; q= qs8;} + if (ix>=0x41b00000) {return .375/x;} + else if(ix>=0x40200000){p = qr8; q= qs8;} else if(ix>=0x40122E8B){p = qr5; q= qs5;} else if(ix>=0x4006DB6D){p = qr3; q= qs3;} else if(ix>=0x40000000){p = qr2; q= qs2;} diff --git a/libc/sysdeps/ieee754/dbl-64/mpa.c b/libc/sysdeps/ieee754/dbl-64/mpa.c index 8fc2626f7..076647654 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpa.c +++ b/libc/sysdeps/ieee754/dbl-64/mpa.c @@ -611,6 +611,7 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p) } } +#ifndef NO__MUL /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the error is bounded by 1.001 ULP. */ @@ -761,7 +762,9 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p) EZ = e; Z[0] = X[0] * Y[0]; } +#endif +#ifndef NO__SQR /* Square *X and store result in *Y. X and Y may not overlap. For P in [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the error is bounded by 1.001 ULP. This is a faster special case of @@ -862,6 +865,7 @@ __sqr (const mp_no *x, mp_no *y, int p) EY = e; } +#endif /* Invert *X and store in *Y. Relative error bound: - For P = 2: 1.001 * R ^ (1 - P) diff --git a/libc/sysdeps/ieee754/dbl-64/slowexp.c b/libc/sysdeps/ieee754/dbl-64/slowexp.c index c423fc311..8f353f634 100644 --- a/libc/sysdeps/ieee754/dbl-64/slowexp.c +++ b/libc/sysdeps/ieee754/dbl-64/slowexp.c @@ -27,20 +27,23 @@ /*Converting from double precision to Multi-precision and calculating */ /* e^x */ /**************************************************************************/ -#include "mpa.h" #include <math_private.h> +#ifndef USE_LONG_DOUBLE_FOR_MP +# include "mpa.h" +void __mpexp (mp_no *x, mp_no *y, int p); +#endif + #ifndef SECTION # define SECTION #endif -void __mpexp (mp_no *x, mp_no *y, int p); - /*Converting from double precision to Multi-precision and calculating e^x */ double SECTION __slowexp (double x) { +#ifndef USE_LONG_DOUBLE_FOR_MP double w, z, res, eps = 3.0e-26; int p; mp_no mpx, mpy, mpz, mpw, mpeps, mpcor; @@ -66,4 +69,7 @@ __slowexp (double x) __mp_dbl (&mpy, &res, p); return res; } +#else + return (double) __ieee754_expl((long double)x); +#endif } diff --git a/libc/sysdeps/ieee754/dbl-64/slowpow.c b/libc/sysdeps/ieee754/dbl-64/slowpow.c index cccc7e32c..a379728b1 100644 --- a/libc/sysdeps/ieee754/dbl-64/slowpow.c +++ b/libc/sysdeps/ieee754/dbl-64/slowpow.c @@ -59,6 +59,23 @@ __slowpow (double x, double y, double z) if (res >= 0) return res; + /* Compute pow as long double. This is currently only used by powerpc, where + one may get 106 bits of accuracy. */ +#ifdef USE_LONG_DOUBLE_FOR_MP + long double ldw, ldz, ldpp; + static const long double ldeps = 0x4.0p-96; + + ldz = __ieee754_logl ((long double) x); + ldw = (long double) y *ldz; + ldpp = __ieee754_expl (ldw); + res = (double) (ldpp + ldeps); + res1 = (double) (ldpp - ldeps); + + /* Return the result if it is accurate enough. */ + if (res == res1) + return res; +#endif + /* Or else, calculate using multiple precision. P = 10 implies accuracy of 240 bits accuracy, since MP_NO has a radix of 2^24. */ p = 10; diff --git a/libc/sysdeps/ieee754/ldbl-128/e_j0l.c b/libc/sysdeps/ieee754/ldbl-128/e_j0l.c index 1b1828958..9e7880c49 100644 --- a/libc/sysdeps/ieee754/ldbl-128/e_j0l.c +++ b/libc/sysdeps/ieee754/ldbl-128/e_j0l.c @@ -700,6 +700,25 @@ __ieee754_j0l (long double x) return p; } + /* X = x - pi/4 + cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4) + = 1/sqrt(2) * (cos(x) + sin(x)) + sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4) + = 1/sqrt(2) * (sin(x) - cos(x)) + sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + cf. Fdlibm. */ + __sincosl (xx, &s, &c); + ss = s - c; + cc = s + c; + z = -__cosl (xx + xx); + if ((s * c) < 0) + cc = z / ss; + else + ss = z / cc; + + if (xx > 0x1p256L) + return ONEOSQPI * cc / __ieee754_sqrtl (xx); + xinv = 1.0L / xx; z = xinv * xinv; if (xinv <= 0.25) @@ -761,21 +780,6 @@ __ieee754_j0l (long double x) p = 1.0L + z * p; q = z * xinv * q; q = q - 0.125L * xinv; - /* X = x - pi/4 - cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4) - = 1/sqrt(2) * (cos(x) + sin(x)) - sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4) - = 1/sqrt(2) * (sin(x) - cos(x)) - sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) - cf. Fdlibm. */ - __sincosl (xx, &s, &c); - ss = s - c; - cc = s + c; - z = -__cosl (xx + xx); - if ((s * c) < 0) - cc = z / ss; - else - ss = z / cc; z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx); return z; } @@ -843,6 +847,25 @@ long double return p; } + /* X = x - pi/4 + cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4) + = 1/sqrt(2) * (cos(x) + sin(x)) + sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4) + = 1/sqrt(2) * (sin(x) - cos(x)) + sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) + cf. Fdlibm. */ + __sincosl (x, &s, &c); + ss = s - c; + cc = s + c; + z = -__cosl (x + x); + if ((s * c) < 0) + cc = z / ss; + else + ss = z / cc; + + if (xx > 0x1p256L) + return ONEOSQPI * ss / __ieee754_sqrtl (x); + xinv = 1.0L / xx; z = xinv * xinv; if (xinv <= 0.25) @@ -904,21 +927,6 @@ long double p = 1.0L + z * p; q = z * xinv * q; q = q - 0.125L * xinv; - /* X = x - pi/4 - cos(X) = cos(x) cos(pi/4) + sin(x) sin(pi/4) - = 1/sqrt(2) * (cos(x) + sin(x)) - sin(X) = sin(x) cos(pi/4) - cos(x) sin(pi/4) - = 1/sqrt(2) * (sin(x) - cos(x)) - sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) - cf. Fdlibm. */ - __sincosl (x, &s, &c); - ss = s - c; - cc = s + c; - z = -__cosl (x + x); - if ((s * c) < 0) - cc = z / ss; - else - ss = z / cc; z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (x); return z; } diff --git a/libc/sysdeps/ieee754/ldbl-128/e_j1l.c b/libc/sysdeps/ieee754/ldbl-128/e_j1l.c index f16343b26..95e01a39c 100644 --- a/libc/sysdeps/ieee754/ldbl-128/e_j1l.c +++ b/libc/sysdeps/ieee754/ldbl-128/e_j1l.c @@ -706,6 +706,29 @@ __ieee754_j1l (long double x) return p; } + /* X = x - 3 pi/4 + cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4) + = 1/sqrt(2) * (-cos(x) + sin(x)) + sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4) + = -1/sqrt(2) * (sin(x) + cos(x)) + cf. Fdlibm. */ + __sincosl (xx, &s, &c); + ss = -s - c; + cc = s - c; + z = __cosl (xx + xx); + if ((s * c) > 0) + cc = z / ss; + else + ss = z / cc; + + if (xx > 0x1p256L) + { + z = ONEOSQPI * cc / __ieee754_sqrtl (xx); + if (x < 0) + z = -z; + return z; + } + xinv = 1.0L / xx; z = xinv * xinv; if (xinv <= 0.25) @@ -767,20 +790,6 @@ __ieee754_j1l (long double x) p = 1.0L + z * p; q = z * q; q = q * xinv + 0.375L * xinv; - /* X = x - 3 pi/4 - cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4) - = 1/sqrt(2) * (-cos(x) + sin(x)) - sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4) - = -1/sqrt(2) * (sin(x) + cos(x)) - cf. Fdlibm. */ - __sincosl (xx, &s, &c); - ss = -s - c; - cc = s - c; - z = __cosl (xx + xx); - if ((s * c) > 0) - cc = z / ss; - else - ss = z / cc; z = ONEOSQPI * (p * cc - q * ss) / __ieee754_sqrtl (xx); if (x < 0) z = -z; @@ -850,6 +859,24 @@ __ieee754_y1l (long double x) return p; } + /* X = x - 3 pi/4 + cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4) + = 1/sqrt(2) * (-cos(x) + sin(x)) + sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4) + = -1/sqrt(2) * (sin(x) + cos(x)) + cf. Fdlibm. */ + __sincosl (xx, &s, &c); + ss = -s - c; + cc = s - c; + z = __cosl (xx + xx); + if ((s * c) > 0) + cc = z / ss; + else + ss = z / cc; + + if (xx > 0x1p256L) + return ONEOSQPI * ss / __ieee754_sqrtl (xx); + xinv = 1.0L / xx; z = xinv * xinv; if (xinv <= 0.25) @@ -911,20 +938,6 @@ __ieee754_y1l (long double x) p = 1.0L + z * p; q = z * q; q = q * xinv + 0.375L * xinv; - /* X = x - 3 pi/4 - cos(X) = cos(x) cos(3 pi/4) + sin(x) sin(3 pi/4) - = 1/sqrt(2) * (-cos(x) + sin(x)) - sin(X) = sin(x) cos(3 pi/4) - cos(x) sin(3 pi/4) - = -1/sqrt(2) * (sin(x) + cos(x)) - cf. Fdlibm. */ - __sincosl (xx, &s, &c); - ss = -s - c; - cc = s - c; - z = __cosl (xx + xx); - if ((s * c) > 0) - cc = z / ss; - else - ss = z / cc; z = ONEOSQPI * (p * ss + q * cc) / __ieee754_sqrtl (xx); return z; } diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c index 117bd0f05..abc78a35b 100644 --- a/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c +++ b/libc/sysdeps/ieee754/ldbl-128ibm/e_acoshl.c @@ -52,7 +52,7 @@ __ieee754_acoshl(long double x) return __ieee754_logl(2.0*x-one/(x+__ieee754_sqrtl(t-one))); } else { /* 1<x<2 */ t = x-one; - return __log1p(t+__sqrtl(2.0*t+t*t)); + return __log1p(t+__ieee754_sqrtl(2.0*t+t*t)); } } strong_alias (__ieee754_acoshl, __acoshl_finite) diff --git a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h index be9ac71cb..1cce1fc4d 100644 --- a/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h +++ b/libc/sysdeps/ieee754/ldbl-128ibm/math_ldbl.h @@ -125,7 +125,7 @@ ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64) /* Handy utility functions to pack/unpack/cononicalize and find the nearbyint of long double implemented as double double. */ static inline long double -ldbl_pack (double a, double aa) +default_ldbl_pack (double a, double aa) { union ibm_extended_long_double u; u.dd[0] = a; @@ -134,7 +134,7 @@ ldbl_pack (double a, double aa) } static inline void -ldbl_unpack (long double l, double *a, double *aa) +default_ldbl_unpack (long double l, double *a, double *aa) { union ibm_extended_long_double u; u.d = l; @@ -142,6 +142,12 @@ ldbl_unpack (long double l, double *a, double *aa) *aa = u.dd[1]; } +#ifndef ldbl_pack +# define ldbl_pack default_ldbl_pack +#endif +#ifndef ldbl_unpack +# define ldbl_unpack default_ldbl_unpack +#endif /* Convert a finite long double to canonical form. Does not handle +/-Inf properly. */ diff --git a/libc/sysdeps/ieee754/ldbl-96/e_j1l.c b/libc/sysdeps/ieee754/ldbl-96/e_j1l.c index 785c0b067..4c13018ae 100644 --- a/libc/sysdeps/ieee754/ldbl-96/e_j1l.c +++ b/libc/sysdeps/ieee754/ldbl-96/e_j1l.c @@ -203,7 +203,7 @@ __ieee754_y1l (long double x) __sincosl (x, &s, &c); ss = -s - c; cc = s - c; - if (ix < 0x7fe00000) + if (ix < 0x7ffe) { /* make sure x+x not overflow */ z = __cosl (x + x); if ((s * c) > zero) diff --git a/libc/sysdeps/init_array/crti.S b/libc/sysdeps/init_array/crti.S new file mode 100644 index 000000000..0a6e9fd95 --- /dev/null +++ b/libc/sysdeps/init_array/crti.S @@ -0,0 +1,13 @@ +/* Dummy crti file. + + In this configuration, crti.o and crtn.o are both empty because the + .init_array/.fini_array sections are used exclusively. + + Older ports cannot use this because even if the linker used to + build libc itself has .init_array support, we don't want to produce + a crt[in].o that presume a linker that new will be used to link + other things later. + + But new configurations without compatibility concerns for + toolchains without .init_array support can use this to avoid the + superfluous .init and .fini boilerplate code. */ diff --git a/libc/sysdeps/init_array/crtn.S b/libc/sysdeps/init_array/crtn.S new file mode 100644 index 000000000..6f70e7716 --- /dev/null +++ b/libc/sysdeps/init_array/crtn.S @@ -0,0 +1,13 @@ +/* Dummy crtn file. + + In this configuration, crti.o and crtn.o are both empty because the + .init_array/.fini_array sections are used exclusively. + + Older ports cannot use this because even if the linker used to + build libc itself has .init_array support, we don't want to produce + a crt[in].o that presume a linker that new will be used to link + other things later. + + But new configurations without compatibility concerns for + toolchains without .init_array support can use this to avoid the + superfluous .init and .fini boilerplate code. */ diff --git a/libc/sysdeps/init_array/elf-init.c b/libc/sysdeps/init_array/elf-init.c new file mode 100644 index 000000000..c6467aac8 --- /dev/null +++ b/libc/sysdeps/init_array/elf-init.c @@ -0,0 +1,37 @@ +/* Startup support for ELF initializers/finalizers in the main executable. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define NO_INITFINI +#include <csu/elf-init.c> diff --git a/libc/sysdeps/init_array/gmon-start.c b/libc/sysdeps/init_array/gmon-start.c new file mode 100644 index 000000000..6f2d6dc8b --- /dev/null +++ b/libc/sysdeps/init_array/gmon-start.c @@ -0,0 +1,41 @@ +/* gmon startup hook using .preinit_array. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Instead of defining __gmon_start__ globally in gcrt1.o, we make it + static and just put a pointer to it into the .preinit_array section. */ + +#define GMON_START_ARRAY_SECTION ".preinit_array" + +#include <csu/gmon-start.c> diff --git a/libc/sysdeps/powerpc/Implies b/libc/sysdeps/powerpc/Implies index 7ccf9a7c4..78dba9510 100644 --- a/libc/sysdeps/powerpc/Implies +++ b/libc/sysdeps/powerpc/Implies @@ -1,4 +1,5 @@ # On PowerPC we use the IBM extended long double format. ieee754/ldbl-128ibm +ieee754/ldbl-opt ieee754/dbl-64 ieee754/flt-32 diff --git a/libc/sysdeps/powerpc/bits/fenv.h b/libc/sysdeps/powerpc/bits/fenv.h index 1054ba142..07cd3c8e5 100644 --- a/libc/sysdeps/powerpc/bits/fenv.h +++ b/libc/sysdeps/powerpc/bits/fenv.h @@ -123,7 +123,7 @@ enum these bits is set. Note, though, that you can't disable or enable these exceptions individually. */ - /* Operation with SNaN. */ + /* Operation with a sNaN. */ FE_INVALID_SNAN = # define FE_INVALID_SNAN (1 << (31 - 7)) FE_INVALID_SNAN, @@ -148,7 +148,7 @@ enum # define FE_INVALID_IMZ (1 << (31 - 11)) FE_INVALID_IMZ, - /* Comparison with NaN or SNaN. */ + /* Comparison with a NaN. */ FE_INVALID_COMPARE = # define FE_INVALID_COMPARE (1 << (31 - 12)) FE_INVALID_COMPARE, diff --git a/libc/sysdeps/powerpc/fpu/Makefile b/libc/sysdeps/powerpc/fpu/Makefile index ffacf1a75..fda59f9fa 100644 --- a/libc/sysdeps/powerpc/fpu/Makefile +++ b/libc/sysdeps/powerpc/fpu/Makefile @@ -1,6 +1,5 @@ ifeq ($(subdir),math) libm-support += fenv_const fe_nomask fe_mask t_sqrt -libm-tests += test-powerpc-snan # libm needs ld.so to access dl_hwcap $(objpfx)libm.so: $(elfobjdir)/ld.so diff --git a/libc/sysdeps/powerpc/fpu/fenv_libc.h b/libc/sysdeps/powerpc/fpu/fenv_libc.h index abae2f3df..191095156 100644 --- a/libc/sysdeps/powerpc/fpu/fenv_libc.h +++ b/libc/sysdeps/powerpc/fpu/fenv_libc.h @@ -116,7 +116,7 @@ enum { FPSCR_UX, /* underflow */ FPSCR_ZX, /* zero divide */ FPSCR_XX, /* inexact */ - FPSCR_VXSNAN, /* invalid operation for SNaN */ + FPSCR_VXSNAN, /* invalid operation for sNaN */ FPSCR_VXISI, /* invalid operation for Inf-Inf */ FPSCR_VXIDI, /* invalid operation for Inf/Inf */ FPSCR_VXZDZ, /* invalid operation for 0/0 */ @@ -152,7 +152,7 @@ enum { #endif /* _ARCH_PWR6 */ /* This operation (i) sets the appropriate FPSCR bits for its - parameter, (ii) converts SNaN to the corresponding NaN, and (iii) + parameter, (ii) converts sNaN to the corresponding qNaN, and (iii) otherwise passes its parameter through unchanged (in particular, -0 and +0 stay as they were). The `obvious' way to do this is optimised out by gcc. */ diff --git a/libc/sysdeps/powerpc/fpu/math_ldbl.h b/libc/sysdeps/powerpc/fpu/math_ldbl.h index 20224e664..36378c023 100644 --- a/libc/sysdeps/powerpc/fpu/math_ldbl.h +++ b/libc/sysdeps/powerpc/fpu/math_ldbl.h @@ -2,132 +2,12 @@ #error "Never use <math_ldbl.h> directly; include <math_private.h> instead." #endif -#include <sysdeps/ieee754/ldbl-128/math_ldbl.h> -#include <ieee754.h> - -static inline void -ldbl_extract_mantissa (int64_t *hi64, u_int64_t *lo64, int *exp, long double x) -{ - /* We have 105 bits of mantissa plus one implicit digit. Since - 106 bits are representable we use the first implicit digit for - the number before the decimal point and the second implicit bit - as bit 53 of the mantissa. */ - unsigned long long hi, lo; - int ediff; - union ibm_extended_long_double eldbl; - eldbl.d = x; - *exp = eldbl.ieee.exponent - IBM_EXTENDED_LONG_DOUBLE_BIAS; - - lo = ((long long)eldbl.ieee.mantissa2 << 32) | eldbl.ieee.mantissa3; - hi = ((long long)eldbl.ieee.mantissa0 << 32) | eldbl.ieee.mantissa1; - /* If the lower double is not a denomal or zero then set the hidden - 53rd bit. */ - if (eldbl.ieee.exponent2 > 0x001) - { - lo |= (1ULL << 52); - lo = lo << 7; /* pre-shift lo to match ieee854. */ - /* The lower double is normalized separately from the upper. We - may need to adjust the lower mantissa to reflect this. */ - ediff = eldbl.ieee.exponent - eldbl.ieee.exponent2; - if (ediff > 53) - lo = lo >> (ediff-53); - } - hi |= (1ULL << 52); - - if ((eldbl.ieee.negative != eldbl.ieee.negative2) - && ((eldbl.ieee.exponent2 != 0) && (lo != 0LL))) - { - hi--; - lo = (1ULL << 60) - lo; - if (hi < (1ULL << 52)) - { - /* we have a borrow from the hidden bit, so shift left 1. */ - hi = (hi << 1) | (lo >> 59); - lo = 0xfffffffffffffffLL & (lo << 1); - *exp = *exp - 1; - } - } - *lo64 = (hi << 60) | lo; - *hi64 = hi >> 4; -} - -static inline long double -ldbl_insert_mantissa (int sign, int exp, int64_t hi64, u_int64_t lo64) -{ - union ibm_extended_long_double u; - unsigned long hidden2, lzcount; - unsigned long long hi, lo; - - u.ieee.negative = sign; - u.ieee.negative2 = sign; - u.ieee.exponent = exp + IBM_EXTENDED_LONG_DOUBLE_BIAS; - u.ieee.exponent2 = exp-53 + IBM_EXTENDED_LONG_DOUBLE_BIAS; - /* Expect 113 bits (112 bits + hidden) right justified in two longs. - The low order 53 bits (52 + hidden) go into the lower double */ - lo = (lo64 >> 7)& ((1ULL << 53) - 1); - hidden2 = (lo64 >> 59) & 1ULL; - /* The high order 53 bits (52 + hidden) go into the upper double */ - hi = (lo64 >> 60) & ((1ULL << 11) - 1); - hi |= (hi64 << 4); - - if (lo != 0LL) - { - /* hidden2 bit of low double controls rounding of the high double. - If hidden2 is '1' then round up hi and adjust lo (2nd mantissa) - plus change the sign of the low double to compensate. */ - if (hidden2) - { - hi++; - u.ieee.negative2 = !sign; - lo = (1ULL << 53) - lo; - } - /* The hidden bit of the lo mantissa is zero so we need to - normalize the it for the low double. Shift it left until the - hidden bit is '1' then adjust the 2nd exponent accordingly. */ - - if (sizeof (lo) == sizeof (long)) - lzcount = __builtin_clzl (lo); - else if ((lo >> 32) != 0) - lzcount = __builtin_clzl ((long) (lo >> 32)); - else - lzcount = __builtin_clzl ((long) lo) + 32; - lzcount = lzcount - 11; - if (lzcount > 0) - { - int expnt2 = u.ieee.exponent2 - lzcount; - if (expnt2 >= 1) - { - /* Not denormal. Normalize and set low exponent. */ - lo = lo << lzcount; - u.ieee.exponent2 = expnt2; - } - else - { - /* Is denormal. */ - lo = lo << (lzcount + expnt2); - u.ieee.exponent2 = 0; - } - } - } - else - { - u.ieee.negative2 = 0; - u.ieee.exponent2 = 0; - } - - u.ieee.mantissa3 = lo & ((1ULL << 32) - 1); - u.ieee.mantissa2 = (lo >> 32) & ((1ULL << 20) - 1); - u.ieee.mantissa1 = hi & ((1ULL << 32) - 1); - u.ieee.mantissa0 = (hi >> 32) & ((1ULL << 20) - 1); - return u.d; -} - -/* gcc generates disgusting code to pack and unpack long doubles. - This tells gcc that pack/unpack is really a nop. We use fr1/fr2 - because those are the regs used to pass/return a single - long double arg. */ +/* GCC does not optimize the default ldbl_pack code to not spill register + in the stack. The following optimization tells gcc that pack/unpack + is really a nop. We use fr1/fr2 because those are the regs used to + pass/return a single long double arg. */ static inline long double -ldbl_pack (double a, double aa) +ldbl_pack_ppc (double a, double aa) { register long double x __asm__ ("fr1"); register double xh __asm__ ("fr1"); @@ -139,7 +19,7 @@ ldbl_pack (double a, double aa) } static inline void -ldbl_unpack (long double l, double *a, double *aa) +ldbl_unpack_ppc (long double l, double *a, double *aa) { register long double x __asm__ ("fr1"); register double xh __asm__ ("fr1"); @@ -150,40 +30,7 @@ ldbl_unpack (long double l, double *a, double *aa) *aa = xl; } +#define ldbl_pack ldbl_pack_ppc +#define ldbl_unpack ldbl_unpack_ppc -/* Convert a finite long double to canonical form. - Does not handle +/-Inf properly. */ -static inline void -ldbl_canonicalize (double *a, double *aa) -{ - double xh, xl; - - xh = *a + *aa; - xl = (*a - xh) + *aa; - *a = xh; - *aa = xl; -} - -/* Simple inline nearbyint (double) function . - Only works in the default rounding mode - but is useful in long double rounding functions. */ -static inline double -ldbl_nearbyint (double a) -{ - double two52 = 0x10000000000000LL; - - if (__builtin_expect ((__builtin_fabs (a) < two52), 1)) - { - if (__builtin_expect ((a > 0.0), 1)) - { - a += two52; - a -= two52; - } - else if (__builtin_expect ((a < 0.0), 1)) - { - a = two52 - a; - a = -(a - two52); - } - } - return a; -} +#include <sysdeps/ieee754/ldbl-128ibm/math_ldbl.h> diff --git a/libc/sysdeps/powerpc/fpu/test-powerpc-snan.c b/libc/sysdeps/powerpc/fpu/test-powerpc-snan.c deleted file mode 100644 index e3bd47aba..000000000 --- a/libc/sysdeps/powerpc/fpu/test-powerpc-snan.c +++ /dev/null @@ -1,382 +0,0 @@ -/* Test Signalling NaN in isnan, isinf etc functions. - Copyright (C) 2008-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Andreas Jaeger <aj@suse.de>, 2005. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#define _GNU_SOURCE 1 -#include <stdio.h> -#include <stdlib.h> -#include <sys/time.h> -#include <string.h> -#include <math.h> -#include <float.h> -#include <fenv.h> -#include <signal.h> -#include <setjmp.h> -#include <errno.h> - -int dest_offset; -char *dest_address; -double value = 123.456; -double zero = 0.0; - -float SNANf; -double SNAN; -long double SNANl; - -static sigjmp_buf sigfpe_buf; - -void -init_signaling_nan (void) -{ - union { - double _ld16; - double _d8; - unsigned int _ui4[4]; - float _f4; - } nan_temp; - - nan_temp._ui4[0] = 0x7fa00000; - SNANf = nan_temp._f4; - - nan_temp._ui4[0] = 0x7ff40000; - nan_temp._ui4[1] = 0x00000000; - SNAN = nan_temp._d8; - - nan_temp._ui4[0] = 0x7ff40000; - nan_temp._ui4[1] = 0x00000000; - nan_temp._ui4[2] = 0x00000000; - nan_temp._ui4[3] = 0x00000000; - SNANl = nan_temp._ld16; -} - -static float -snan_float (void) -{ - return SNANf; -} - -static double -snan_double (void) -{ - return SNAN; -} - -typedef long double ldouble; - -static ldouble -snan_ldouble (void) -{ - return SNANl; -} - - -void -myFPsighandler(int signal, - siginfo_t *info, - void *context) -{ - siglongjmp(sigfpe_buf, 0); -} - -int -set_sigaction_FP(void) -{ - struct sigaction sa; - /* register RT signal handler via sigaction */ - sa.sa_flags = SA_SIGINFO; - sa.sa_sigaction = &myFPsighandler; - sigemptyset(&sa.sa_mask); - sigaction(SIGFPE, &sa, NULL); - - return 0; -} - -int -remove_sigaction_FP(void) -{ - struct sigaction sa; - /* restore default RT signal handler via sigaction */ - sa.sa_flags = SA_SIGINFO; - sa.sa_handler = SIG_DFL; - sigemptyset(&sa.sa_mask); - sigaction(SIGFPE, &sa, NULL); - - return 0; -} - -static int errors = 0; - -static void -check (const char *testname, int result) -{ - if (!result) { - printf ("Failure: %s\n", testname); - errors++; - } -} - -#define TEST_FUNC(NAME, FLOAT) \ -static void \ -NAME (void) \ -{ \ - /* Variables are declared volatile to forbid some compiler \ - optimizations. */ \ - volatile FLOAT Inf_var, NaN_var, zero_var, one_var, SNaN_var; \ - fenv_t saved_fenv; \ - \ - zero_var = 0.0; \ - one_var = 1.0; \ - NaN_var = zero_var / zero_var; \ - SNaN_var = snan_##FLOAT (); \ - Inf_var = one_var / zero_var; \ - \ - (void) &zero_var; \ - (void) &one_var; \ - (void) &NaN_var; \ - (void) &SNaN_var; \ - (void) &Inf_var; \ - \ - set_sigaction_FP (); \ - fegetenv(&saved_fenv); \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnan(NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnan (NaN)", isnan (NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnan(-NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnan (-NaN)", isnan (-NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnan(SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnan (SNaN)", isnan (SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnan(-SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnan (-SNaN)", isnan (-SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isinf(NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isinf (NaN)", !isinf (NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isinf(-NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isinf (-NaN)", !isinf (-NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isinf(SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isinf (SNaN)", !isinf (SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isinf(-SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isinf (-SNaN)", !isinf (-SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isfinite(NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isfinite (NaN)", !isfinite (NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isfinite(-NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isfinite (-NaN)", !isfinite (-NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isfinite(SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isfinite (SNaN)", !isfinite (SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isfinite(-SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isfinite (-SNaN)", !isfinite (-SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnormal(NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnormal (NaN)", !isnormal (NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnormal(-NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnormal (-NaN)", !isnormal (-NaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnormal(SNaN) isnormal SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnormal (SNaN)", !isnormal (SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " isnormal(-SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " isnormal (-SNaN)", !isnormal (-SNaN_var)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " fpclassify(NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " fpclassify (NaN)", (fpclassify (NaN_var)==FP_NAN)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " fpclassify(-NaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " fpclassify (-NaN)", (fpclassify (-NaN_var)==FP_NAN)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " fpclassify(SNaN) isnormal SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " fpclassify (SNaN)", (fpclassify (SNaN_var)==FP_NAN)); \ - } \ - \ - feclearexcept(FE_ALL_EXCEPT); \ - feenableexcept (FE_ALL_EXCEPT); \ - if (sigsetjmp(sigfpe_buf, 0)) \ - { \ - printf (#FLOAT " fpclassify(-SNaN) raised SIGFPE\n"); \ - errors++; \ - } else { \ - check (#FLOAT " fpclassify (-SNaN)", (fpclassify (-SNaN_var)==FP_NAN)); \ - } \ - \ - fesetenv(&saved_fenv); /* restore saved fenv */ \ - remove_sigaction_FP(); \ -} - -TEST_FUNC (float_test, float) -TEST_FUNC (double_test, double) -#ifndef NO_LONG_DOUBLE -TEST_FUNC (ldouble_test, ldouble) -#endif - -static int -do_test (void) -{ - init_signaling_nan(); - - float_test(); - double_test(); -#ifndef NO_LONG_DOUBLE - ldouble_test(); -#endif - - return errors != 0; -} - -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/Makefile b/libc/sysdeps/powerpc/power4/fpu/Makefile index f487ed601..e17d32f30 100644 --- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/Makefile +++ b/libc/sysdeps/powerpc/power4/fpu/Makefile @@ -2,4 +2,6 @@ ifeq ($(subdir),math) CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops +CPPFLAGS-slowpow.c += -DUSE_LONG_DOUBLE_FOR_MP=1 +CPPFLAGS-slowexp.c += -DUSE_LONG_DOUBLE_FOR_MP=1 endif diff --git a/libc/sysdeps/powerpc/power4/fpu/mpa.c b/libc/sysdeps/powerpc/power4/fpu/mpa.c new file mode 100644 index 000000000..1858c9740 --- /dev/null +++ b/libc/sysdeps/powerpc/power4/fpu/mpa.c @@ -0,0 +1,214 @@ + +/* + * IBM Accurate Mathematical Library + * written by International Business Machines Corp. + * Copyright (C) 2001-2013 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* Define __mul and __sqr and use the rest from generic code. */ +#define NO__MUL +#define NO__SQR + +#include <sysdeps/ieee754/dbl-64/mpa.c> + +/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X + and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P + digits. In case P > 3 the error is bounded by 1.001 ULP. */ +void +__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, i1, i2, j, k, k2; + long p2 = p; + double u, zk, zk2; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] * Y[0] == ZERO)) + { + Z[0] = ZERO; + return; + } + + /* Multiply, add and carry */ + k2 = (p2 < 3) ? p2 + p2 : p2 + 3; + zk = Z[k2] = ZERO; + for (k = k2; k > 1;) + { + if (k > p2) + { + i1 = k - p2; + i2 = p2 + 1; + } + else + { + i1 = 1; + i2 = k; + } +#if 1 + /* Rearrange this inner loop to allow the fmadd instructions to be + independent and execute in parallel on processors that have + dual symmetrical FP pipelines. */ + if (i1 < (i2 - 1)) + { + /* Make sure we have at least 2 iterations. */ + if (((i2 - i1) & 1L) == 1L) + { + /* Handle the odd iterations case. */ + zk2 = x->d[i2 - 1] * y->d[i1]; + } + else + zk2 = 0.0; + /* Do two multiply/adds per loop iteration, using independent + accumulators; zk and zk2. */ + for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) + { + zk += x->d[i] * y->d[j]; + zk2 += x->d[i + 1] * y->d[j - 1]; + } + zk += zk2; /* Final sum. */ + } + else + { + /* Special case when iterations is 1. */ + zk += x->d[i1] * y->d[i1]; + } +#else + /* The original code. */ + for (i = i1, j = i2 - 1; i < i2; i++, j--) + zk += X[i] * Y[j]; +#endif + + u = (zk + CUTTER) - CUTTER; + if (u > zk) + u -= RADIX; + Z[k] = zk - u; + zk = u * RADIXI; + --k; + } + Z[k] = zk; + + int e = EX + EY; + /* Is there a carry beyond the most significant digit? */ + if (Z[1] == ZERO) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + e--; + } + + EZ = e; + Z[0] = X[0] * Y[0]; +} + +/* Square *X and store result in *Y. X and Y may not overlap. For P in + [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the + error is bounded by 1.001 ULP. This is a faster special case of + multiplication. */ +void +__sqr (const mp_no *x, mp_no *y, int p) +{ + long i, j, k, ip; + double u, yk; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] == ZERO)) + { + Y[0] = ZERO; + return; + } + + /* We need not iterate through all X's since it's pointless to + multiply zeroes. */ + for (ip = p; ip > 0; ip--) + if (X[ip] != ZERO) + break; + + k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; + + while (k > 2 * ip + 1) + Y[k--] = ZERO; + + yk = ZERO; + + while (k > p) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* In __mul, this loop (and the one within the next while loop) run + between a range to calculate the mantissa as follows: + + Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] + + X[n] * Y[k] + + For X == Y, we can get away with summing halfway and doubling the + result. For cases where the range size is even, the mid-point needs + to be added separately (above). */ + for (i = k - p, j = p; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + + while (k > 1) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* Likewise for this loop. */ + for (i = 1, j = k - 1; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + Y[k] = yk; + + /* Squares are always positive. */ + Y[0] = 1.0; + + int e = EX * 2; + /* Is there a carry beyond the most significant digit? */ + if (__glibc_unlikely (Y[1] == ZERO)) + { + for (i = 1; i <= p; i++) + Y[i] = Y[i + 1]; + e--; + } + EY = e; +} diff --git a/libc/sysdeps/powerpc/powerpc32/power4/Implies b/libc/sysdeps/powerpc/powerpc32/power4/Implies new file mode 100644 index 000000000..a372141bb --- /dev/null +++ b/libc/sysdeps/powerpc/powerpc32/power4/Implies @@ -0,0 +1,2 @@ +powerpc/power4/fpu +powerpc/power4 diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c deleted file mode 100644 index b22664772..000000000 --- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c +++ /dev/null @@ -1,837 +0,0 @@ - -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/************************************************************************/ -/* MODULE_NAME: mpa.c */ -/* */ -/* FUNCTIONS: */ -/* mcr */ -/* acr */ -/* cpy */ -/* norm */ -/* denorm */ -/* mp_dbl */ -/* dbl_mp */ -/* add_magnitudes */ -/* sub_magnitudes */ -/* add */ -/* sub */ -/* mul */ -/* inv */ -/* dvd */ -/* */ -/* Arithmetic functions for multiple precision numbers. */ -/* Relative errors are bounded */ -/************************************************************************/ - - -#include "endian.h" -#include "mpa.h" -#include <sys/param.h> - -const mp_no mpone = {1, {1.0, 1.0}}; -const mp_no mptwo = {1, {1.0, 2.0}}; - -/* Compare mantissa of two multiple precision numbers regardless of the sign - and exponent of the numbers. */ -static int -mcr (const mp_no *x, const mp_no *y, int p) -{ - long i; - long p2 = p; - for (i = 1; i <= p2; i++) - { - if (X[i] == Y[i]) - continue; - else if (X[i] > Y[i]) - return 1; - else - return -1; - } - return 0; -} - -/* Compare the absolute values of two multiple precision numbers. */ -int -__acr (const mp_no *x, const mp_no *y, int p) -{ - long i; - - if (X[0] == ZERO) - { - if (Y[0] == ZERO) - i = 0; - else - i = -1; - } - else if (Y[0] == ZERO) - i = 1; - else - { - if (EX > EY) - i = 1; - else if (EX < EY) - i = -1; - else - i = mcr (x, y, p); - } - - return i; -} - -/* Copy multiple precision number X into Y. They could be the same - number. */ -void -__cpy (const mp_no *x, mp_no *y, int p) -{ - long i; - - EY = EX; - for (i = 0; i <= p; i++) - Y[i] = X[i]; -} - -/* Convert a multiple precision number *X into a double precision - number *Y, normalized case (|x| >= 2**(-1022))). */ -static void -norm (const mp_no *x, double *y, int p) -{ -#define R RADIXI - long i; - double a, c, u, v, z[5]; - if (p < 5) - { - if (p == 1) - c = X[1]; - else if (p == 2) - c = X[1] + R * X[2]; - else if (p == 3) - c = X[1] + R * (X[2] + R * X[3]); - else if (p == 4) - c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]); - } - else - { - for (a = ONE, z[1] = X[1]; z[1] < TWO23;) - { - a *= TWO; - z[1] *= TWO; - } - - for (i = 2; i < 5; i++) - { - z[i] = X[i] * a; - u = (z[i] + CUTTER) - CUTTER; - if (u > z[i]) - u -= RADIX; - z[i] -= u; - z[i - 1] += u * RADIXI; - } - - u = (z[3] + TWO71) - TWO71; - if (u > z[3]) - u -= TWO19; - v = z[3] - u; - - if (v == TWO18) - { - if (z[4] == ZERO) - { - for (i = 5; i <= p; i++) - { - if (X[i] == ZERO) - continue; - else - { - z[3] += ONE; - break; - } - } - } - else - z[3] += ONE; - } - - c = (z[1] + R * (z[2] + R * z[3])) / a; - } - - c *= X[0]; - - for (i = 1; i < EX; i++) - c *= RADIX; - for (i = 1; i > EX; i--) - c *= RADIXI; - - *y = c; -#undef R -} - -/* Convert a multiple precision number *X into a double precision - number *Y, Denormal case (|x| < 2**(-1022))). */ -static void -denorm (const mp_no *x, double *y, int p) -{ - long i, k; - long p2 = p; - double c, u, z[5]; - -#define R RADIXI - if (EX < -44 || (EX == -44 && X[1] < TWO5)) - { - *y = ZERO; - return; - } - - if (p2 == 1) - { - if (EX == -42) - { - z[1] = X[1] + TWO10; - z[2] = ZERO; - z[3] = ZERO; - k = 3; - } - else if (EX == -43) - { - z[1] = TWO10; - z[2] = X[1]; - z[3] = ZERO; - k = 2; - } - else - { - z[1] = TWO10; - z[2] = ZERO; - z[3] = X[1]; - k = 1; - } - } - else if (p2 == 2) - { - if (EX == -42) - { - z[1] = X[1] + TWO10; - z[2] = X[2]; - z[3] = ZERO; - k = 3; - } - else if (EX == -43) - { - z[1] = TWO10; - z[2] = X[1]; - z[3] = X[2]; - k = 2; - } - else - { - z[1] = TWO10; - z[2] = ZERO; - z[3] = X[1]; - k = 1; - } - } - else - { - if (EX == -42) - { - z[1] = X[1] + TWO10; - z[2] = X[2]; - k = 3; - } - else if (EX == -43) - { - z[1] = TWO10; - z[2] = X[1]; - k = 2; - } - else - { - z[1] = TWO10; - z[2] = ZERO; - k = 1; - } - z[3] = X[k]; - } - - u = (z[3] + TWO57) - TWO57; - if (u > z[3]) - u -= TWO5; - - if (u == z[3]) - { - for (i = k + 1; i <= p2; i++) - { - if (X[i] == ZERO) - continue; - else - { - z[3] += ONE; - break; - } - } - } - - c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10); - - *y = c * TWOM1032; -#undef R -} - -/* Convert multiple precision number *X into double precision number *Y. The - result is correctly rounded to the nearest/even. */ -void -__mp_dbl (const mp_no *x, double *y, int p) -{ - if (X[0] == ZERO) - { - *y = ZERO; - return; - } - - if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10))) - norm (x, y, p); - else - denorm (x, y, p); -} - -/* Get the multiple precision equivalent of X into *Y. If the precision is too - small, the result is truncated. */ -void -__dbl_mp (double x, mp_no *y, int p) -{ - long i, n; - long p2 = p; - double u; - - /* Sign. */ - if (x == ZERO) - { - Y[0] = ZERO; - return; - } - else if (x > ZERO) - Y[0] = ONE; - else - { - Y[0] = MONE; - x = -x; - } - - /* Exponent. */ - for (EY = ONE; x >= RADIX; EY += ONE) - x *= RADIXI; - for (; x < ONE; EY -= ONE) - x *= RADIX; - - /* Digits. */ - n = MIN (p2, 4); - for (i = 1; i <= n; i++) - { - u = (x + TWO52) - TWO52; - if (u > x) - u -= ONE; - Y[i] = u; - x -= u; - x *= RADIX; - } - for (; i <= p2; i++) - Y[i] = ZERO; -} - -/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The - sign of the sum *Z is not changed. X and Y may overlap but not X and Z or - Y and Z. No guard digit is used. The result equals the exact sum, - truncated. */ -static void -add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - long i, j, k; - long p2 = p; - double zk; - - EZ = EX; - - i = p2; - j = p2 + EY - EX; - k = p2 + 1; - - if (__glibc_unlikely (j < 1)) - { - __cpy (x, z, p); - return; - } - - zk = ZERO; - - for (; j > 0; i--, j--) - { - zk += X[i] + Y[j]; - if (zk >= RADIX) - { - Z[k--] = zk - RADIX; - zk = ONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - for (; i > 0; i--) - { - zk += X[i]; - if (zk >= RADIX) - { - Z[k--] = zk - RADIX; - zk = ONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - if (zk == ZERO) - { - for (i = 1; i <= p2; i++) - Z[i] = Z[i + 1]; - } - else - { - Z[1] = zk; - EZ += ONE; - } -} - -/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. - The sign of the difference *Z is not changed. X and Y may overlap but not X - and Z or Y and Z. One guard digit is used. The error is less than one - ULP. */ -static void -sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - long i, j, k; - long p2 = p; - double zk; - - EZ = EX; - i = p2; - j = p2 + EY - EX; - k = p2; - - /* Y is too small compared to X, copy X over to the result. */ - if (__glibc_unlikely (j < 1)) - { - __cpy (x, z, p); - return; - } - - /* The relevant least significant digit in Y is non-zero, so we factor it in - to enhance accuracy. */ - if (j < p2 && Y[j + 1] > ZERO) - { - Z[k + 1] = RADIX - Y[j + 1]; - zk = MONE; - } - else - zk = Z[k + 1] = ZERO; - - /* Subtract and borrow. */ - for (; j > 0; i--, j--) - { - zk += (X[i] - Y[j]); - if (zk < ZERO) - { - Z[k--] = zk + RADIX; - zk = MONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - /* We're done with digits from Y, so it's just digits in X. */ - for (; i > 0; i--) - { - zk += X[i]; - if (zk < ZERO) - { - Z[k--] = zk + RADIX; - zk = MONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - /* Normalize. */ - for (i = 1; Z[i] == ZERO; i++); - EZ = EZ - i + 1; - for (k = 1; i <= p2 + 1;) - Z[k++] = Z[i++]; - for (; k <= p2;) - Z[k++] = ZERO; -} - -/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X - and Z or Y and Z. One guard digit is used. The error is less than one - ULP. */ -void -__add (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - int n; - - if (X[0] == ZERO) - { - __cpy (y, z, p); - return; - } - else if (Y[0] == ZERO) - { - __cpy (x, z, p); - return; - } - - if (X[0] == Y[0]) - { - if (__acr (x, y, p) > 0) - { - add_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else - { - add_magnitudes (y, x, z, p); - Z[0] = Y[0]; - } - } - else - { - if ((n = __acr (x, y, p)) == 1) - { - sub_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else if (n == -1) - { - sub_magnitudes (y, x, z, p); - Z[0] = Y[0]; - } - else - Z[0] = ZERO; - } -} - -/* Subtract *Y from *X and return the result in *Z. X and Y may overlap but - not X and Z or Y and Z. One guard digit is used. The error is less than - one ULP. */ -void -__sub (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - int n; - - if (X[0] == ZERO) - { - __cpy (y, z, p); - Z[0] = -Z[0]; - return; - } - else if (Y[0] == ZERO) - { - __cpy (x, z, p); - return; - } - - if (X[0] != Y[0]) - { - if (__acr (x, y, p) > 0) - { - add_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else - { - add_magnitudes (y, x, z, p); - Z[0] = -Y[0]; - } - } - else - { - if ((n = __acr (x, y, p)) == 1) - { - sub_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else if (n == -1) - { - sub_magnitudes (y, x, z, p); - Z[0] = -Y[0]; - } - else - Z[0] = ZERO; - } -} - -/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X - and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P - digits. In case P > 3 the error is bounded by 1.001 ULP. */ -void -__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - long i, i1, i2, j, k, k2; - long p2 = p; - double u, zk, zk2; - - /* Is z=0? */ - if (__glibc_unlikely (X[0] * Y[0] == ZERO)) - { - Z[0] = ZERO; - return; - } - - /* Multiply, add and carry */ - k2 = (p2 < 3) ? p2 + p2 : p2 + 3; - zk = Z[k2] = ZERO; - for (k = k2; k > 1;) - { - if (k > p2) - { - i1 = k - p2; - i2 = p2 + 1; - } - else - { - i1 = 1; - i2 = k; - } -#if 1 - /* Rearrange this inner loop to allow the fmadd instructions to be - independent and execute in parallel on processors that have - dual symmetrical FP pipelines. */ - if (i1 < (i2 - 1)) - { - /* Make sure we have at least 2 iterations. */ - if (((i2 - i1) & 1L) == 1L) - { - /* Handle the odd iterations case. */ - zk2 = x->d[i2 - 1] * y->d[i1]; - } - else - zk2 = 0.0; - /* Do two multiply/adds per loop iteration, using independent - accumulators; zk and zk2. */ - for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) - { - zk += x->d[i] * y->d[j]; - zk2 += x->d[i + 1] * y->d[j - 1]; - } - zk += zk2; /* Final sum. */ - } - else - { - /* Special case when iterations is 1. */ - zk += x->d[i1] * y->d[i1]; - } -#else - /* The original code. */ - for (i = i1, j = i2 - 1; i < i2; i++, j--) - zk += X[i] * Y[j]; -#endif - - u = (zk + CUTTER) - CUTTER; - if (u > zk) - u -= RADIX; - Z[k] = zk - u; - zk = u * RADIXI; - --k; - } - Z[k] = zk; - - /* Is there a carry beyond the most significant digit? */ - if (Z[1] == ZERO) - { - for (i = 1; i <= p2; i++) - Z[i] = Z[i + 1]; - EZ = EX + EY - 1; - } - else - EZ = EX + EY; - - Z[0] = X[0] * Y[0]; -} - -/* Square *X and store result in *Y. X and Y may not overlap. For P in - [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the - error is bounded by 1.001 ULP. This is a faster special case of - multiplication. */ -void -__sqr (const mp_no *x, mp_no *y, int p) -{ - long i, j, k, ip; - double u, yk; - - /* Is z=0? */ - if (__glibc_unlikely (X[0] == ZERO)) - { - Y[0] = ZERO; - return; - } - - /* We need not iterate through all X's since it's pointless to - multiply zeroes. */ - for (ip = p; ip > 0; ip--) - if (X[ip] != ZERO) - break; - - k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; - - while (k > 2 * ip + 1) - Y[k--] = ZERO; - - yk = ZERO; - - while (k > p) - { - double yk2 = 0.0; - long lim = k / 2; - - if (k % 2 == 0) - { - yk += X[lim] * X[lim]; - lim--; - } - - /* In __mul, this loop (and the one within the next while loop) run - between a range to calculate the mantissa as follows: - - Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] - + X[n] * Y[k] - - For X == Y, we can get away with summing halfway and doubling the - result. For cases where the range size is even, the mid-point needs - to be added separately (above). */ - for (i = k - p, j = p; i <= lim; i++, j--) - yk2 += X[i] * X[j]; - - yk += 2.0 * yk2; - - u = (yk + CUTTER) - CUTTER; - if (u > yk) - u -= RADIX; - Y[k--] = yk - u; - yk = u * RADIXI; - } - - while (k > 1) - { - double yk2 = 0.0; - long lim = k / 2; - - if (k % 2 == 0) - { - yk += X[lim] * X[lim]; - lim--; - } - - /* Likewise for this loop. */ - for (i = 1, j = k - 1; i <= lim; i++, j--) - yk2 += X[i] * X[j]; - - yk += 2.0 * yk2; - - u = (yk + CUTTER) - CUTTER; - if (u > yk) - u -= RADIX; - Y[k--] = yk - u; - yk = u * RADIXI; - } - Y[k] = yk; - - /* Squares are always positive. */ - Y[0] = 1.0; - - EY = 2 * EX; - /* Is there a carry beyond the most significant digit? */ - if (__glibc_unlikely (Y[1] == ZERO)) - { - for (i = 1; i <= p; i++) - Y[i] = Y[i + 1]; - EY--; - } -} - -/* Invert *X and store in *Y. Relative error bound: - - For P = 2: 1.001 * R ^ (1 - P) - - For P = 3: 1.063 * R ^ (1 - P) - - For P > 3: 2.001 * R ^ (1 - P) - - *X = 0 is not permissible. */ -static void -__inv (const mp_no *x, mp_no *y, int p) -{ - long i; - double t; - mp_no z, w; - static const int np1[] = - { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 - }; - - __cpy (x, &z, p); - z.e = 0; - __mp_dbl (&z, &t, p); - t = ONE / t; - __dbl_mp (t, y, p); - EY -= EX; - - for (i = 0; i < np1[p]; i++) - { - __cpy (y, &w, p); - __mul (x, &w, y, p); - __sub (&mptwo, y, &z, p); - __mul (&w, &z, y, p); - } -} - -/* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z - or Y and Z. Relative error bound: - - For P = 2: 2.001 * R ^ (1 - P) - - For P = 3: 2.063 * R ^ (1 - P) - - For P > 3: 3.001 * R ^ (1 - P) - - *X = 0 is not permissible. */ -void -__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - mp_no w; - - if (X[0] == ZERO) - Z[0] = ZERO; - else - { - __inv (y, &w, p); - __mul (x, &w, z, p); - } -} diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c b/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c deleted file mode 100644 index d93f50544..000000000 --- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowexp.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/**************************************************************************/ -/* MODULE_NAME:slowexp.c */ -/* */ -/* FUNCTION:slowexp */ -/* */ -/* FILES NEEDED:mpa.h */ -/* mpa.c mpexp.c */ -/* */ -/*Converting from double precision to Multi-precision and calculating */ -/* e^x */ -/**************************************************************************/ -#include <math_private.h> - -#ifdef NO_LONG_DOUBLE -#include "mpa.h" -void __mpexp(mp_no *x, mp_no *y, int p); -#endif - -/*Converting from double precision to Multi-precision and calculating e^x */ -double __slowexp(double x) { -#ifdef NO_LONG_DOUBLE - double w,z,res,eps=3.0e-26; - int p; - mp_no mpx, mpy, mpz,mpw,mpeps,mpcor; - - p=6; - __dbl_mp(x,&mpx,p); /* Convert a double precision number x */ - /* into a multiple precision number mpx with prec. p. */ - __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */ - __dbl_mp(eps,&mpeps,p); - __mul(&mpeps,&mpy,&mpcor,p); - __add(&mpy,&mpcor,&mpw,p); - __sub(&mpy,&mpcor,&mpz,p); - __mp_dbl(&mpw, &w, p); - __mp_dbl(&mpz, &z, p); - if (w == z) return w; - else { /* if calculating is not exactly */ - p = 32; - __dbl_mp(x,&mpx,p); - __mpexp(&mpx, &mpy, p); - __mp_dbl(&mpy, &res, p); - return res; - } -#else - return (double) __ieee754_expl((long double)x); -#endif -} diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c b/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c deleted file mode 100644 index 7c97d9581..000000000 --- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/slowpow.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/*************************************************************************/ -/* MODULE_NAME:slowpow.c */ -/* */ -/* FUNCTION:slowpow */ -/* */ -/*FILES NEEDED:mpa.h */ -/* mpa.c mpexp.c mplog.c halfulp.c */ -/* */ -/* Given two IEEE double machine numbers y,x , routine computes the */ -/* correctly rounded (to nearest) value of x^y. Result calculated by */ -/* multiplication (in halfulp.c) or if result isn't accurate enough */ -/* then routine converts x and y into multi-precision doubles and */ -/* recompute. */ -/*************************************************************************/ - -#include "mpa.h" -#include <math_private.h> - -void __mpexp (mp_no * x, mp_no * y, int p); -void __mplog (mp_no * x, mp_no * y, int p); -double ulog (double); -double __halfulp (double x, double y); - -double -__slowpow (double x, double y, double z) -{ - double res, res1; - long double ldw, ldz, ldpp; - static const long double ldeps = 0x4.0p-96; - - res = __halfulp (x, y); /* halfulp() returns -10 or x^y */ - if (res >= 0) - return res; /* if result was really computed by halfulp */ - /* else, if result was not really computed by halfulp */ - - /* Compute pow as long double, 106 bits */ - ldz = __ieee754_logl ((long double) x); - ldw = (long double) y *ldz; - ldpp = __ieee754_expl (ldw); - res = (double) (ldpp + ldeps); - res1 = (double) (ldpp - ldeps); - - if (res != res1) /* if result still not accurate enough */ - { /* use mpa for higher precision. */ - mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1; - static const mp_no eps = { -3, {1.0, 4.0} }; - int p; - - p = 10; /* p=precision 240 bits */ - __dbl_mp (x, &mpx, p); - __dbl_mp (y, &mpy, p); - __dbl_mp (z, &mpz, p); - __mplog (&mpx, &mpz, p); /* log(x) = z */ - __mul (&mpy, &mpz, &mpw, p); /* y * z =w */ - __mpexp (&mpw, &mpp, p); /* e^w =pp */ - __add (&mpp, &eps, &mpr, p); /* pp+eps =r */ - __mp_dbl (&mpr, &res, p); - __sub (&mpp, &eps, &mpr1, p); /* pp -eps =r1 */ - __mp_dbl (&mpr1, &res1, p); /* converting into double precision */ - if (res == res1) - return res; - - /* if we get here result wasn't calculated exactly, continue for - more exact calculation using 768 bits. */ - p = 32; - __dbl_mp (x, &mpx, p); - __dbl_mp (y, &mpy, p); - __dbl_mp (z, &mpz, p); - __mplog (&mpx, &mpz, p); /* log(c)=z */ - __mul (&mpy, &mpz, &mpw, p); /* y*z =w */ - __mpexp (&mpw, &mpp, p); /* e^w=pp */ - __mp_dbl (&mpp, &res, p); /* converting into double precision */ - } - return res; -} diff --git a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S index b3a69975c..724d9084a 100644 --- a/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S +++ b/libc/sysdeps/powerpc/powerpc32/power4/strncmp.S @@ -29,9 +29,6 @@ EALIGN (strncmp, 4, 0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rWORD3 r10 diff --git a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S index b58630e33..fdae44d26 100644 --- a/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S +++ b/libc/sysdeps/powerpc/powerpc32/power7/strncmp.S @@ -31,9 +31,6 @@ EALIGN (strncmp,5,0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rWORD3 r10 diff --git a/libc/sysdeps/powerpc/powerpc32/strncmp.S b/libc/sysdeps/powerpc/powerpc32/strncmp.S index 3cb6509e2..fa345d293 100644 --- a/libc/sysdeps/powerpc/powerpc32/strncmp.S +++ b/libc/sysdeps/powerpc/powerpc32/strncmp.S @@ -29,9 +29,6 @@ EALIGN (strncmp, 4, 0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rFEFE r8 /* constant 0xfefefeff (-0x01010101) */ diff --git a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S index 18032752b..70c370439 100644 --- a/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S +++ b/libc/sysdeps/powerpc/powerpc64/__longjmp-common.S @@ -24,8 +24,6 @@ #else # include <jmpbuf-offsets.h> #endif -#include <bp-sym.h> -#include <bp-asm.h> #ifndef __NO_VMX__ .section ".toc","aw" @@ -45,9 +43,8 @@ #endif .machine "altivec" -ENTRY (BP_SYM (__longjmp)) +ENTRY (__longjmp) CALL_MCOUNT 2 - CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE) #ifndef __NO_VMX__ ld r5,.LC__dl_hwcap@toc(r2) # ifdef SHARED @@ -178,4 +175,4 @@ L(no_vmx): lfd fp31,((JB_FPRS+17)*8)(r3) mr r3,r4 blr -END (BP_SYM (__longjmp)) +END (__longjmp) diff --git a/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S b/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S index 42ec5e375..84c82bb76 100644 --- a/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/a2/memcpy.S @@ -18,8 +18,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> #define PREFETCH_AHEAD 4 /* no cache lines SRC prefetching ahead */ #define ZERO_AHEAD 2 /* no cache lines DST zeroing ahead */ @@ -32,7 +30,7 @@ .machine a2 -EALIGN (BP_SYM (memcpy), 5, 0) +EALIGN (memcpy, 5, 0) CALL_MCOUNT 3 dcbt 0,r4 /* Prefetch ONE SRC cacheline */ @@ -522,5 +520,5 @@ L(endloop2_128): b L(lessthancacheline) -END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) +END_GEN_TB (memcpy,TB_TOCLESS) libc_hidden_builtin_def (memcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/bp-asm.h b/libc/sysdeps/powerpc/powerpc64/bp-asm.h deleted file mode 100644 index 6c6c38735..000000000 --- a/libc/sysdeps/powerpc/powerpc64/bp-asm.h +++ /dev/null @@ -1,113 +0,0 @@ -/* Bounded-pointer definitions for PowerPC64 assembler. - Copyright (C) 2000-2013 Free Software Foundation, Inc. - Contributed by Greg McGary <greg@mcgary.org> - - This file is part of the GNU C Library. Its master source is NOT part of - the C library, however. The master source lives in the GNU MP Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If - not, see <http://www.gnu.org/licenses/>. */ - -#if __BOUNDED_POINTERS__ - -/* Byte offsets of BP components. */ -# define oVALUE 0 -# define oLOW 4 -# define oHIGH 8 - -/* Don't check bounds, just convert the BP register to its simple - pointer value. */ - -# define DISCARD_BOUNDS(rBP) \ - ld rBP, oVALUE(rBP) - -/* Check low bound, with the side effect that the BP register is converted - its simple pointer value. Move the high bound into a register for - later use. */ - -# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH) \ - ld rHIGH, oHIGH(rBP); \ - ld rLOW, oLOW(rBP); \ - ld rBP, oVALUE(rBP); \ - tdllt rBP, rLOW - -/* Check the high bound, which is in a register, using the given - conditional trap instruction. */ - -# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc) \ - TWLcc rVALUE, rHIGH - -/* Check the high bound, which is stored at the return-value's high - bound slot, using the given conditional trap instruction. */ - -# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc) \ - ld rHIGH, oHIGH(rRTN); \ - TWLcc rVALUE, rHIGH - -/* Check both bounds, with the side effect that the BP register is - converted to its simple pointer value. */ - -# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH) \ - CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH); \ - tdlge rBP, rHIGH - -/* Check bounds on a memory region of given length, with the side - effect that the BP register is converted to its simple pointer - value. */ - -# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH) \ - CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH); \ - sub rHIGH, rHIGH, rLENGTH; \ - tdlgt rBP, rHIGH - -# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH) \ - CHECK_BOUNDS_LOW (rBP, rLOW, rHIGH); \ - subi rHIGH, rHIGH, LENGTH; \ - tdlgt rBP, rHIGH - -/* Store a pointer value register into the return-value's pointer - value slot. */ - -# define STORE_RETURN_VALUE(rVALUE) \ - std rVALUE, oVALUE(rRTN) - -/* Store a low and high bounds into the return-value's pointer bounds - slots. */ - -# define STORE_RETURN_BOUNDS(rLOW, rHIGH) \ - std rLOW, oLOW(rRTN); \ - std rHIGH, oHIGH(rRTN) - -/* Stuff zero value/low/high into the BP addressed by rRTN. */ - -# define RETURN_NULL_BOUNDED_POINTER \ - li r4, 0; \ - STORE_RETURN_VALUE (r4); \ - STORE_RETURN_BOUNDS (r4, r4) - -#else - -# define DISCARD_BOUNDS(rBP) -# define CHECK_BOUNDS_LOW(rBP, rLOW, rHIGH) -# define CHECK_BOUNDS_HIGH(rVALUE, rHIGH, TWLcc) -# define CHECK_BOUNDS_HIGH_RTN(rVALUE, rHIGH, TWLcc) -# define CHECK_BOUNDS_BOTH(rBP, rLOW, rHIGH) -# define CHECK_BOUNDS_BOTH_WIDE(rBP, rLOW, rHIGH, rLENGTH) -# define CHECK_BOUNDS_BOTH_WIDE_LIT(rBP, rLOW, rHIGH, LENGTH) -# define STORE_RETURN_VALUE(rVALUE) -# define STORE_RETURN_BOUNDS(rLOW, rHIGH) - -# define RETURN_NULL_BOUNDED_POINTER li rRTN, 0 - -#endif diff --git a/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S b/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S index 5ba4ebf62..a271965dd 100644 --- a/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/cell/memcpy.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> #define PREFETCH_AHEAD 6 /* no cache lines SRC prefetching ahead */ #define ZERO_AHEAD 4 /* no cache lines DST zeroing ahead */ @@ -41,7 +39,7 @@ .align 7 -EALIGN (BP_SYM (memcpy), 5, 0) +EALIGN (memcpy, 5, 0) CALL_MCOUNT 3 dcbt 0,r4 /* Prefetch ONE SRC cacheline */ @@ -240,5 +238,5 @@ EALIGN (BP_SYM (memcpy), 5, 0) stb r0,0(r6) 1: blr -END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) +END_GEN_TB (memcpy,TB_TOCLESS) libc_hidden_builtin_def (memcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/memcpy.S b/libc/sysdeps/powerpc/powerpc64/memcpy.S index 7c1b656be..b8c4cc8b1 100644 --- a/libc/sysdeps/powerpc/powerpc64/memcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/memcpy.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst'. @@ -35,7 +33,7 @@ possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. */ -EALIGN (BP_SYM (memcpy), 5, 0) +EALIGN (memcpy, 5, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -364,5 +362,5 @@ EALIGN (BP_SYM (memcpy), 5, 0) ld 31,-8(1) ld 3,-16(1) blr -END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) +END_GEN_TB (memcpy,TB_TOCLESS) libc_hidden_builtin_def (memcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/memset.S b/libc/sysdeps/powerpc/powerpc64/memset.S index f107f8b40..6acf149c8 100644 --- a/libc/sysdeps/powerpc/powerpc64/memset.S +++ b/libc/sysdeps/powerpc/powerpc64/memset.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> .section ".toc","aw" .LC0: @@ -33,22 +31,15 @@ cache line (256 bits). There is a special case for setting cache lines to 0, to take advantage of the dcbz instruction. */ -EALIGN (BP_SYM (memset), 5, 0) +EALIGN (memset, 5, 0) CALL_MCOUNT 3 #define rTMP r0 #define rRTN r3 /* Initial value of 1st argument. */ -#if __BOUNDED_POINTERS__ -# define rMEMP0 r4 /* Original value of 1st arg. */ -# define rCHR r5 /* Char to set in each byte. */ -# define rLEN r6 /* Length of region to set. */ -# define rMEMP r10 /* Address at which we are storing. */ -#else -# define rMEMP0 r3 /* Original value of 1st arg. */ -# define rCHR r4 /* Char to set in each byte. */ -# define rLEN r5 /* Length of region to set. */ -# define rMEMP r6 /* Address at which we are storing. */ -#endif +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ #define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ #define rMEMP2 r8 @@ -56,14 +47,6 @@ EALIGN (BP_SYM (memset), 5, 0) #define rCLS r8 /* Cache line size obtained from static. */ #define rCLM r9 /* Cache line size mask to check for cache alignment. */ L(_memset): -#if __BOUNDED_POINTERS__ - cmpldi cr1, rRTN, 0 - CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN) - beq cr1, L(b0) - STORE_RETURN_VALUE (rMEMP0) - STORE_RETURN_BOUNDS (rTMP, rTMP2) -L(b0): -#endif /* Take care of case for size <= 4. */ cmpldi cr1, rLEN, 8 andi. rALIGN, rMEMP0, 7 @@ -261,25 +244,16 @@ L(medium_27f): L(medium_28t): std rCHR, -8(rMEMP) blr -END_GEN_TB (BP_SYM (memset),TB_TOCLESS) +END_GEN_TB (memset,TB_TOCLESS) libc_hidden_builtin_def (memset) /* Copied from bzero.S to prevent the linker from inserting a stub between bzero and memset. */ -ENTRY (BP_SYM (__bzero)) +ENTRY (__bzero) CALL_MCOUNT 3 -#if __BOUNDED_POINTERS__ - mr r6,r4 - li r5,0 - mr r4,r3 - /* Tell memset that we don't want a return value. */ - li r3,0 - b L(_memset) -#else mr r5,r4 li r4,0 b L(_memset) -#endif -END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS) +END_GEN_TB (__bzero,TB_TOCLESS) -weak_alias (BP_SYM (__bzero), BP_SYM (bzero)) +weak_alias (__bzero, bzero) diff --git a/libc/sysdeps/powerpc/powerpc64/power4/Implies b/libc/sysdeps/powerpc/powerpc64/power4/Implies new file mode 100644 index 000000000..a372141bb --- /dev/null +++ b/libc/sysdeps/powerpc/powerpc64/power4/Implies @@ -0,0 +1,2 @@ +powerpc/power4/fpu +powerpc/power4 diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/Makefile b/libc/sysdeps/powerpc/powerpc64/power4/fpu/Makefile deleted file mode 100644 index f8bb3ef04..000000000 --- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# Makefile fragment for POWER4/5/5+ platforms with FPU. - -ifeq ($(subdir),math) -CFLAGS-mpa.c += --param max-unroll-times=4 -funroll-loops -fpeel-loops -endif diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c deleted file mode 100644 index b22664772..000000000 --- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c +++ /dev/null @@ -1,837 +0,0 @@ - -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/************************************************************************/ -/* MODULE_NAME: mpa.c */ -/* */ -/* FUNCTIONS: */ -/* mcr */ -/* acr */ -/* cpy */ -/* norm */ -/* denorm */ -/* mp_dbl */ -/* dbl_mp */ -/* add_magnitudes */ -/* sub_magnitudes */ -/* add */ -/* sub */ -/* mul */ -/* inv */ -/* dvd */ -/* */ -/* Arithmetic functions for multiple precision numbers. */ -/* Relative errors are bounded */ -/************************************************************************/ - - -#include "endian.h" -#include "mpa.h" -#include <sys/param.h> - -const mp_no mpone = {1, {1.0, 1.0}}; -const mp_no mptwo = {1, {1.0, 2.0}}; - -/* Compare mantissa of two multiple precision numbers regardless of the sign - and exponent of the numbers. */ -static int -mcr (const mp_no *x, const mp_no *y, int p) -{ - long i; - long p2 = p; - for (i = 1; i <= p2; i++) - { - if (X[i] == Y[i]) - continue; - else if (X[i] > Y[i]) - return 1; - else - return -1; - } - return 0; -} - -/* Compare the absolute values of two multiple precision numbers. */ -int -__acr (const mp_no *x, const mp_no *y, int p) -{ - long i; - - if (X[0] == ZERO) - { - if (Y[0] == ZERO) - i = 0; - else - i = -1; - } - else if (Y[0] == ZERO) - i = 1; - else - { - if (EX > EY) - i = 1; - else if (EX < EY) - i = -1; - else - i = mcr (x, y, p); - } - - return i; -} - -/* Copy multiple precision number X into Y. They could be the same - number. */ -void -__cpy (const mp_no *x, mp_no *y, int p) -{ - long i; - - EY = EX; - for (i = 0; i <= p; i++) - Y[i] = X[i]; -} - -/* Convert a multiple precision number *X into a double precision - number *Y, normalized case (|x| >= 2**(-1022))). */ -static void -norm (const mp_no *x, double *y, int p) -{ -#define R RADIXI - long i; - double a, c, u, v, z[5]; - if (p < 5) - { - if (p == 1) - c = X[1]; - else if (p == 2) - c = X[1] + R * X[2]; - else if (p == 3) - c = X[1] + R * (X[2] + R * X[3]); - else if (p == 4) - c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]); - } - else - { - for (a = ONE, z[1] = X[1]; z[1] < TWO23;) - { - a *= TWO; - z[1] *= TWO; - } - - for (i = 2; i < 5; i++) - { - z[i] = X[i] * a; - u = (z[i] + CUTTER) - CUTTER; - if (u > z[i]) - u -= RADIX; - z[i] -= u; - z[i - 1] += u * RADIXI; - } - - u = (z[3] + TWO71) - TWO71; - if (u > z[3]) - u -= TWO19; - v = z[3] - u; - - if (v == TWO18) - { - if (z[4] == ZERO) - { - for (i = 5; i <= p; i++) - { - if (X[i] == ZERO) - continue; - else - { - z[3] += ONE; - break; - } - } - } - else - z[3] += ONE; - } - - c = (z[1] + R * (z[2] + R * z[3])) / a; - } - - c *= X[0]; - - for (i = 1; i < EX; i++) - c *= RADIX; - for (i = 1; i > EX; i--) - c *= RADIXI; - - *y = c; -#undef R -} - -/* Convert a multiple precision number *X into a double precision - number *Y, Denormal case (|x| < 2**(-1022))). */ -static void -denorm (const mp_no *x, double *y, int p) -{ - long i, k; - long p2 = p; - double c, u, z[5]; - -#define R RADIXI - if (EX < -44 || (EX == -44 && X[1] < TWO5)) - { - *y = ZERO; - return; - } - - if (p2 == 1) - { - if (EX == -42) - { - z[1] = X[1] + TWO10; - z[2] = ZERO; - z[3] = ZERO; - k = 3; - } - else if (EX == -43) - { - z[1] = TWO10; - z[2] = X[1]; - z[3] = ZERO; - k = 2; - } - else - { - z[1] = TWO10; - z[2] = ZERO; - z[3] = X[1]; - k = 1; - } - } - else if (p2 == 2) - { - if (EX == -42) - { - z[1] = X[1] + TWO10; - z[2] = X[2]; - z[3] = ZERO; - k = 3; - } - else if (EX == -43) - { - z[1] = TWO10; - z[2] = X[1]; - z[3] = X[2]; - k = 2; - } - else - { - z[1] = TWO10; - z[2] = ZERO; - z[3] = X[1]; - k = 1; - } - } - else - { - if (EX == -42) - { - z[1] = X[1] + TWO10; - z[2] = X[2]; - k = 3; - } - else if (EX == -43) - { - z[1] = TWO10; - z[2] = X[1]; - k = 2; - } - else - { - z[1] = TWO10; - z[2] = ZERO; - k = 1; - } - z[3] = X[k]; - } - - u = (z[3] + TWO57) - TWO57; - if (u > z[3]) - u -= TWO5; - - if (u == z[3]) - { - for (i = k + 1; i <= p2; i++) - { - if (X[i] == ZERO) - continue; - else - { - z[3] += ONE; - break; - } - } - } - - c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10); - - *y = c * TWOM1032; -#undef R -} - -/* Convert multiple precision number *X into double precision number *Y. The - result is correctly rounded to the nearest/even. */ -void -__mp_dbl (const mp_no *x, double *y, int p) -{ - if (X[0] == ZERO) - { - *y = ZERO; - return; - } - - if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10))) - norm (x, y, p); - else - denorm (x, y, p); -} - -/* Get the multiple precision equivalent of X into *Y. If the precision is too - small, the result is truncated. */ -void -__dbl_mp (double x, mp_no *y, int p) -{ - long i, n; - long p2 = p; - double u; - - /* Sign. */ - if (x == ZERO) - { - Y[0] = ZERO; - return; - } - else if (x > ZERO) - Y[0] = ONE; - else - { - Y[0] = MONE; - x = -x; - } - - /* Exponent. */ - for (EY = ONE; x >= RADIX; EY += ONE) - x *= RADIXI; - for (; x < ONE; EY -= ONE) - x *= RADIX; - - /* Digits. */ - n = MIN (p2, 4); - for (i = 1; i <= n; i++) - { - u = (x + TWO52) - TWO52; - if (u > x) - u -= ONE; - Y[i] = u; - x -= u; - x *= RADIX; - } - for (; i <= p2; i++) - Y[i] = ZERO; -} - -/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The - sign of the sum *Z is not changed. X and Y may overlap but not X and Z or - Y and Z. No guard digit is used. The result equals the exact sum, - truncated. */ -static void -add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - long i, j, k; - long p2 = p; - double zk; - - EZ = EX; - - i = p2; - j = p2 + EY - EX; - k = p2 + 1; - - if (__glibc_unlikely (j < 1)) - { - __cpy (x, z, p); - return; - } - - zk = ZERO; - - for (; j > 0; i--, j--) - { - zk += X[i] + Y[j]; - if (zk >= RADIX) - { - Z[k--] = zk - RADIX; - zk = ONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - for (; i > 0; i--) - { - zk += X[i]; - if (zk >= RADIX) - { - Z[k--] = zk - RADIX; - zk = ONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - if (zk == ZERO) - { - for (i = 1; i <= p2; i++) - Z[i] = Z[i + 1]; - } - else - { - Z[1] = zk; - EZ += ONE; - } -} - -/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. - The sign of the difference *Z is not changed. X and Y may overlap but not X - and Z or Y and Z. One guard digit is used. The error is less than one - ULP. */ -static void -sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - long i, j, k; - long p2 = p; - double zk; - - EZ = EX; - i = p2; - j = p2 + EY - EX; - k = p2; - - /* Y is too small compared to X, copy X over to the result. */ - if (__glibc_unlikely (j < 1)) - { - __cpy (x, z, p); - return; - } - - /* The relevant least significant digit in Y is non-zero, so we factor it in - to enhance accuracy. */ - if (j < p2 && Y[j + 1] > ZERO) - { - Z[k + 1] = RADIX - Y[j + 1]; - zk = MONE; - } - else - zk = Z[k + 1] = ZERO; - - /* Subtract and borrow. */ - for (; j > 0; i--, j--) - { - zk += (X[i] - Y[j]); - if (zk < ZERO) - { - Z[k--] = zk + RADIX; - zk = MONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - /* We're done with digits from Y, so it's just digits in X. */ - for (; i > 0; i--) - { - zk += X[i]; - if (zk < ZERO) - { - Z[k--] = zk + RADIX; - zk = MONE; - } - else - { - Z[k--] = zk; - zk = ZERO; - } - } - - /* Normalize. */ - for (i = 1; Z[i] == ZERO; i++); - EZ = EZ - i + 1; - for (k = 1; i <= p2 + 1;) - Z[k++] = Z[i++]; - for (; k <= p2;) - Z[k++] = ZERO; -} - -/* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X - and Z or Y and Z. One guard digit is used. The error is less than one - ULP. */ -void -__add (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - int n; - - if (X[0] == ZERO) - { - __cpy (y, z, p); - return; - } - else if (Y[0] == ZERO) - { - __cpy (x, z, p); - return; - } - - if (X[0] == Y[0]) - { - if (__acr (x, y, p) > 0) - { - add_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else - { - add_magnitudes (y, x, z, p); - Z[0] = Y[0]; - } - } - else - { - if ((n = __acr (x, y, p)) == 1) - { - sub_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else if (n == -1) - { - sub_magnitudes (y, x, z, p); - Z[0] = Y[0]; - } - else - Z[0] = ZERO; - } -} - -/* Subtract *Y from *X and return the result in *Z. X and Y may overlap but - not X and Z or Y and Z. One guard digit is used. The error is less than - one ULP. */ -void -__sub (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - int n; - - if (X[0] == ZERO) - { - __cpy (y, z, p); - Z[0] = -Z[0]; - return; - } - else if (Y[0] == ZERO) - { - __cpy (x, z, p); - return; - } - - if (X[0] != Y[0]) - { - if (__acr (x, y, p) > 0) - { - add_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else - { - add_magnitudes (y, x, z, p); - Z[0] = -Y[0]; - } - } - else - { - if ((n = __acr (x, y, p)) == 1) - { - sub_magnitudes (x, y, z, p); - Z[0] = X[0]; - } - else if (n == -1) - { - sub_magnitudes (y, x, z, p); - Z[0] = -Y[0]; - } - else - Z[0] = ZERO; - } -} - -/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X - and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P - digits. In case P > 3 the error is bounded by 1.001 ULP. */ -void -__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - long i, i1, i2, j, k, k2; - long p2 = p; - double u, zk, zk2; - - /* Is z=0? */ - if (__glibc_unlikely (X[0] * Y[0] == ZERO)) - { - Z[0] = ZERO; - return; - } - - /* Multiply, add and carry */ - k2 = (p2 < 3) ? p2 + p2 : p2 + 3; - zk = Z[k2] = ZERO; - for (k = k2; k > 1;) - { - if (k > p2) - { - i1 = k - p2; - i2 = p2 + 1; - } - else - { - i1 = 1; - i2 = k; - } -#if 1 - /* Rearrange this inner loop to allow the fmadd instructions to be - independent and execute in parallel on processors that have - dual symmetrical FP pipelines. */ - if (i1 < (i2 - 1)) - { - /* Make sure we have at least 2 iterations. */ - if (((i2 - i1) & 1L) == 1L) - { - /* Handle the odd iterations case. */ - zk2 = x->d[i2 - 1] * y->d[i1]; - } - else - zk2 = 0.0; - /* Do two multiply/adds per loop iteration, using independent - accumulators; zk and zk2. */ - for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) - { - zk += x->d[i] * y->d[j]; - zk2 += x->d[i + 1] * y->d[j - 1]; - } - zk += zk2; /* Final sum. */ - } - else - { - /* Special case when iterations is 1. */ - zk += x->d[i1] * y->d[i1]; - } -#else - /* The original code. */ - for (i = i1, j = i2 - 1; i < i2; i++, j--) - zk += X[i] * Y[j]; -#endif - - u = (zk + CUTTER) - CUTTER; - if (u > zk) - u -= RADIX; - Z[k] = zk - u; - zk = u * RADIXI; - --k; - } - Z[k] = zk; - - /* Is there a carry beyond the most significant digit? */ - if (Z[1] == ZERO) - { - for (i = 1; i <= p2; i++) - Z[i] = Z[i + 1]; - EZ = EX + EY - 1; - } - else - EZ = EX + EY; - - Z[0] = X[0] * Y[0]; -} - -/* Square *X and store result in *Y. X and Y may not overlap. For P in - [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the - error is bounded by 1.001 ULP. This is a faster special case of - multiplication. */ -void -__sqr (const mp_no *x, mp_no *y, int p) -{ - long i, j, k, ip; - double u, yk; - - /* Is z=0? */ - if (__glibc_unlikely (X[0] == ZERO)) - { - Y[0] = ZERO; - return; - } - - /* We need not iterate through all X's since it's pointless to - multiply zeroes. */ - for (ip = p; ip > 0; ip--) - if (X[ip] != ZERO) - break; - - k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; - - while (k > 2 * ip + 1) - Y[k--] = ZERO; - - yk = ZERO; - - while (k > p) - { - double yk2 = 0.0; - long lim = k / 2; - - if (k % 2 == 0) - { - yk += X[lim] * X[lim]; - lim--; - } - - /* In __mul, this loop (and the one within the next while loop) run - between a range to calculate the mantissa as follows: - - Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] - + X[n] * Y[k] - - For X == Y, we can get away with summing halfway and doubling the - result. For cases where the range size is even, the mid-point needs - to be added separately (above). */ - for (i = k - p, j = p; i <= lim; i++, j--) - yk2 += X[i] * X[j]; - - yk += 2.0 * yk2; - - u = (yk + CUTTER) - CUTTER; - if (u > yk) - u -= RADIX; - Y[k--] = yk - u; - yk = u * RADIXI; - } - - while (k > 1) - { - double yk2 = 0.0; - long lim = k / 2; - - if (k % 2 == 0) - { - yk += X[lim] * X[lim]; - lim--; - } - - /* Likewise for this loop. */ - for (i = 1, j = k - 1; i <= lim; i++, j--) - yk2 += X[i] * X[j]; - - yk += 2.0 * yk2; - - u = (yk + CUTTER) - CUTTER; - if (u > yk) - u -= RADIX; - Y[k--] = yk - u; - yk = u * RADIXI; - } - Y[k] = yk; - - /* Squares are always positive. */ - Y[0] = 1.0; - - EY = 2 * EX; - /* Is there a carry beyond the most significant digit? */ - if (__glibc_unlikely (Y[1] == ZERO)) - { - for (i = 1; i <= p; i++) - Y[i] = Y[i + 1]; - EY--; - } -} - -/* Invert *X and store in *Y. Relative error bound: - - For P = 2: 1.001 * R ^ (1 - P) - - For P = 3: 1.063 * R ^ (1 - P) - - For P > 3: 2.001 * R ^ (1 - P) - - *X = 0 is not permissible. */ -static void -__inv (const mp_no *x, mp_no *y, int p) -{ - long i; - double t; - mp_no z, w; - static const int np1[] = - { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 - }; - - __cpy (x, &z, p); - z.e = 0; - __mp_dbl (&z, &t, p); - t = ONE / t; - __dbl_mp (t, y, p); - EY -= EX; - - for (i = 0; i < np1[p]; i++) - { - __cpy (y, &w, p); - __mul (x, &w, y, p); - __sub (&mptwo, y, &z, p); - __mul (&w, &z, y, p); - } -} - -/* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z - or Y and Z. Relative error bound: - - For P = 2: 2.001 * R ^ (1 - P) - - For P = 3: 2.063 * R ^ (1 - P) - - For P > 3: 3.001 * R ^ (1 - P) - - *X = 0 is not permissible. */ -void -__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) -{ - mp_no w; - - if (X[0] == ZERO) - Z[0] = ZERO; - else - { - __inv (y, &w, p); - __mul (x, &w, z, p); - } -} diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c deleted file mode 100644 index d93f50544..000000000 --- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowexp.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/**************************************************************************/ -/* MODULE_NAME:slowexp.c */ -/* */ -/* FUNCTION:slowexp */ -/* */ -/* FILES NEEDED:mpa.h */ -/* mpa.c mpexp.c */ -/* */ -/*Converting from double precision to Multi-precision and calculating */ -/* e^x */ -/**************************************************************************/ -#include <math_private.h> - -#ifdef NO_LONG_DOUBLE -#include "mpa.h" -void __mpexp(mp_no *x, mp_no *y, int p); -#endif - -/*Converting from double precision to Multi-precision and calculating e^x */ -double __slowexp(double x) { -#ifdef NO_LONG_DOUBLE - double w,z,res,eps=3.0e-26; - int p; - mp_no mpx, mpy, mpz,mpw,mpeps,mpcor; - - p=6; - __dbl_mp(x,&mpx,p); /* Convert a double precision number x */ - /* into a multiple precision number mpx with prec. p. */ - __mpexp(&mpx, &mpy, p); /* Multi-Precision exponential function */ - __dbl_mp(eps,&mpeps,p); - __mul(&mpeps,&mpy,&mpcor,p); - __add(&mpy,&mpcor,&mpw,p); - __sub(&mpy,&mpcor,&mpz,p); - __mp_dbl(&mpw, &w, p); - __mp_dbl(&mpz, &z, p); - if (w == z) return w; - else { /* if calculating is not exactly */ - p = 32; - __dbl_mp(x,&mpx,p); - __mpexp(&mpx, &mpy, p); - __mp_dbl(&mpy, &res, p); - return res; - } -#else - return (double) __ieee754_expl((long double)x); -#endif -} diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c deleted file mode 100644 index 7c97d9581..000000000 --- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ -/*************************************************************************/ -/* MODULE_NAME:slowpow.c */ -/* */ -/* FUNCTION:slowpow */ -/* */ -/*FILES NEEDED:mpa.h */ -/* mpa.c mpexp.c mplog.c halfulp.c */ -/* */ -/* Given two IEEE double machine numbers y,x , routine computes the */ -/* correctly rounded (to nearest) value of x^y. Result calculated by */ -/* multiplication (in halfulp.c) or if result isn't accurate enough */ -/* then routine converts x and y into multi-precision doubles and */ -/* recompute. */ -/*************************************************************************/ - -#include "mpa.h" -#include <math_private.h> - -void __mpexp (mp_no * x, mp_no * y, int p); -void __mplog (mp_no * x, mp_no * y, int p); -double ulog (double); -double __halfulp (double x, double y); - -double -__slowpow (double x, double y, double z) -{ - double res, res1; - long double ldw, ldz, ldpp; - static const long double ldeps = 0x4.0p-96; - - res = __halfulp (x, y); /* halfulp() returns -10 or x^y */ - if (res >= 0) - return res; /* if result was really computed by halfulp */ - /* else, if result was not really computed by halfulp */ - - /* Compute pow as long double, 106 bits */ - ldz = __ieee754_logl ((long double) x); - ldw = (long double) y *ldz; - ldpp = __ieee754_expl (ldw); - res = (double) (ldpp + ldeps); - res1 = (double) (ldpp - ldeps); - - if (res != res1) /* if result still not accurate enough */ - { /* use mpa for higher precision. */ - mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1; - static const mp_no eps = { -3, {1.0, 4.0} }; - int p; - - p = 10; /* p=precision 240 bits */ - __dbl_mp (x, &mpx, p); - __dbl_mp (y, &mpy, p); - __dbl_mp (z, &mpz, p); - __mplog (&mpx, &mpz, p); /* log(x) = z */ - __mul (&mpy, &mpz, &mpw, p); /* y * z =w */ - __mpexp (&mpw, &mpp, p); /* e^w =pp */ - __add (&mpp, &eps, &mpr, p); /* pp+eps =r */ - __mp_dbl (&mpr, &res, p); - __sub (&mpp, &eps, &mpr1, p); /* pp -eps =r1 */ - __mp_dbl (&mpr1, &res1, p); /* converting into double precision */ - if (res == res1) - return res; - - /* if we get here result wasn't calculated exactly, continue for - more exact calculation using 768 bits. */ - p = 32; - __dbl_mp (x, &mpx, p); - __dbl_mp (y, &mpy, p); - __dbl_mp (z, &mpz, p); - __mplog (&mpx, &mpz, p); /* log(c)=z */ - __mul (&mpy, &mpz, &mpw, p); /* y*z =w */ - __mpexp (&mpw, &mpp, p); /* e^w=pp */ - __mp_dbl (&mpp, &res, p); /* converting into double precision */ - } - return res; -} diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S index 7df52f810..6378ecb2d 100644 --- a/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S +++ b/libc/sysdeps/powerpc/powerpc64/power4/memcmp.S @@ -17,13 +17,11 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ .machine power4 -EALIGN (BP_SYM(memcmp), 4, 0) +EALIGN (memcmp, 4, 0) CALL_MCOUNT 3 #define rTMP r0 @@ -31,9 +29,6 @@ EALIGN (BP_SYM(memcmp), 4, 0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rWORD3 r8 /* next word in s1 */ @@ -976,6 +971,6 @@ L(duzeroLength): li rRTN,0 blr -END (BP_SYM (memcmp)) +END (memcmp) libc_hidden_builtin_def (memcmp) weak_alias (memcmp, bcmp) diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S index 734434af0..c43d1d2e4 100644 --- a/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/power4/memcpy.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst'. @@ -36,7 +34,7 @@ Each case has a optimized unrolled loop. */ .machine power4 -EALIGN (BP_SYM (memcpy), 5, 0) +EALIGN (memcpy, 5, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -413,5 +411,5 @@ EALIGN (BP_SYM (memcpy), 5, 0) ld 31,-8(1) ld 3,-16(1) blr -END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) +END_GEN_TB (memcpy,TB_TOCLESS) libc_hidden_builtin_def (memcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/power4/memset.S b/libc/sysdeps/powerpc/powerpc64/power4/memset.S index 198269272..dbecee8b9 100644 --- a/libc/sysdeps/powerpc/powerpc64/power4/memset.S +++ b/libc/sysdeps/powerpc/powerpc64/power4/memset.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); Returns 's'. @@ -28,22 +26,15 @@ to 0, to take advantage of the dcbz instruction. */ .machine power4 -EALIGN (BP_SYM (memset), 5, 0) +EALIGN (memset, 5, 0) CALL_MCOUNT 3 #define rTMP r0 #define rRTN r3 /* Initial value of 1st argument. */ -#if __BOUNDED_POINTERS__ -# define rMEMP0 r4 /* Original value of 1st arg. */ -# define rCHR r5 /* Char to set in each byte. */ -# define rLEN r6 /* Length of region to set. */ -# define rMEMP r10 /* Address at which we are storing. */ -#else -# define rMEMP0 r3 /* Original value of 1st arg. */ -# define rCHR r4 /* Char to set in each byte. */ -# define rLEN r5 /* Length of region to set. */ -# define rMEMP r6 /* Address at which we are storing. */ -#endif +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ #define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ #define rMEMP2 r8 @@ -51,14 +42,6 @@ EALIGN (BP_SYM (memset), 5, 0) #define rCLS r8 /* Cache line size obtained from static. */ #define rCLM r9 /* Cache line size mask to check for cache alignment. */ L(_memset): -#if __BOUNDED_POINTERS__ - cmpldi cr1, rRTN, 0 - CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN) - beq cr1, L(b0) - STORE_RETURN_VALUE (rMEMP0) - STORE_RETURN_BOUNDS (rTMP, rTMP2) -L(b0): -#endif /* Take care of case for size <= 4. */ cmpldi cr1, rLEN, 8 andi. rALIGN, rMEMP0, 7 @@ -249,25 +232,16 @@ L(medium_27f): L(medium_28t): std rCHR, -8(rMEMP) blr -END_GEN_TB (BP_SYM (memset),TB_TOCLESS) +END_GEN_TB (memset,TB_TOCLESS) libc_hidden_builtin_def (memset) /* Copied from bzero.S to prevent the linker from inserting a stub between bzero and memset. */ -ENTRY (BP_SYM (__bzero)) +ENTRY (__bzero) CALL_MCOUNT 3 -#if __BOUNDED_POINTERS__ - mr r6,r4 - li r5,0 - mr r4,r3 - /* Tell memset that we don't want a return value. */ - li r3,0 - b L(_memset) -#else mr r5,r4 li r4,0 b L(_memset) -#endif -END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS) +END_GEN_TB (__bzero,TB_TOCLESS) -weak_alias (BP_SYM (__bzero), BP_SYM (bzero)) +weak_alias (__bzero, bzero) diff --git a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S index 19877fa78..1276e16a5 100644 --- a/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S +++ b/libc/sysdeps/powerpc/powerpc64/power4/strncmp.S @@ -17,14 +17,12 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* See strlen.s for comments on how the end-of-string testing works. */ /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ -EALIGN (BP_SYM(strncmp), 4, 0) +EALIGN (strncmp, 4, 0) CALL_MCOUNT 3 #define rTMP r0 @@ -32,9 +30,6 @@ EALIGN (BP_SYM(strncmp), 4, 0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rWORD3 r10 @@ -175,5 +170,5 @@ L(u4): sub rRTN, rWORD1, rWORD2 L(ux): li rRTN, 0 blr -END (BP_SYM (strncmp)) +END (strncmp) libc_hidden_builtin_def (strncmp) diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S index 64f5b2f42..55c0d7118 100644 --- a/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/power6/memcpy.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst'. @@ -43,7 +41,7 @@ for the destination. */ .machine "power6" -EALIGN (BP_SYM (memcpy), 7, 0) +EALIGN (memcpy, 7, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -1165,5 +1163,5 @@ L(du_done): ld 31,-8(1) ld 3,-16(1) blr -END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) +END_GEN_TB (memcpy,TB_TOCLESS) libc_hidden_builtin_def (memcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/power6/memset.S b/libc/sysdeps/powerpc/powerpc64/power6/memset.S index a7913a10a..541a45fd3 100644 --- a/libc/sysdeps/powerpc/powerpc64/power6/memset.S +++ b/libc/sysdeps/powerpc/powerpc64/power6/memset.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); Returns 's'. @@ -28,34 +26,19 @@ to 0, to take advantage of the dcbz instruction. */ .machine power6 -EALIGN (BP_SYM (memset), 7, 0) +EALIGN (memset, 7, 0) CALL_MCOUNT 3 #define rTMP r0 #define rRTN r3 /* Initial value of 1st argument. */ -#if __BOUNDED_POINTERS__ -# define rMEMP0 r4 /* Original value of 1st arg. */ -# define rCHR r5 /* Char to set in each byte. */ -# define rLEN r6 /* Length of region to set. */ -# define rMEMP r10 /* Address at which we are storing. */ -#else -# define rMEMP0 r3 /* Original value of 1st arg. */ -# define rCHR r4 /* Char to set in each byte. */ -# define rLEN r5 /* Length of region to set. */ -# define rMEMP r6 /* Address at which we are storing. */ -#endif +#define rMEMP0 r3 /* Original value of 1st arg. */ +#define rCHR r4 /* Char to set in each byte. */ +#define rLEN r5 /* Length of region to set. */ +#define rMEMP r6 /* Address at which we are storing. */ #define rALIGN r7 /* Number of bytes we are setting now (when aligning). */ #define rMEMP2 r8 #define rMEMP3 r9 /* Alt mem pointer. */ L(_memset): -#if __BOUNDED_POINTERS__ - cmpldi cr1, rRTN, 0 - CHECK_BOUNDS_BOTH_WIDE (rMEMP0, rTMP, rTMP2, rLEN) - beq cr1, L(b0) - STORE_RETURN_VALUE (rMEMP0) - STORE_RETURN_BOUNDS (rTMP, rTMP2) -L(b0): -#endif /* Take care of case for size <= 4. */ cmpldi cr1, rLEN, 8 andi. rALIGN, rMEMP0, 7 @@ -393,25 +376,16 @@ L(medium_27f): L(medium_28t): std rCHR, -8(rMEMP) blr -END_GEN_TB (BP_SYM (memset),TB_TOCLESS) +END_GEN_TB (memset,TB_TOCLESS) libc_hidden_builtin_def (memset) /* Copied from bzero.S to prevent the linker from inserting a stub between bzero and memset. */ -ENTRY (BP_SYM (__bzero)) +ENTRY (__bzero) CALL_MCOUNT 3 -#if __BOUNDED_POINTERS__ - mr r6,r4 - li r5,0 - mr r4,r3 - /* Tell memset that we don't want a return value. */ - li r3,0 - b L(_memset) -#else mr r5,r4 li r4,0 b L(_memset) -#endif -END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS) +END_GEN_TB (__bzero,TB_TOCLESS) -weak_alias (BP_SYM (__bzero), BP_SYM (bzero)) +weak_alias (__bzero, bzero) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S index 7b71a19e6..3416897f5 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/memchr.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/memchr.S @@ -18,12 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] memchr (char *s [r3], int byte [r4], int size [r5]) */ .machine power7 -ENTRY (BP_SYM (__memchr)) +ENTRY (__memchr) CALL_MCOUNT 2 dcbt 0,r3 clrrdi r8,r3,3 @@ -202,6 +200,6 @@ L(loop_small): /* loop_small has been unrolled. */ blr -END (BP_SYM (__memchr)) -weak_alias (BP_SYM (__memchr), BP_SYM(memchr)) +END (__memchr) +weak_alias (__memchr, memchr) libc_hidden_builtin_def (memchr) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S index a7caa4894..f190c6461 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/memcmp.S @@ -17,15 +17,13 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] memcmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ .machine power7 -EALIGN (BP_SYM(memcmp),4,0) +EALIGN (memcmp,4,0) CALL_MCOUNT 3 #define rTMP r0 @@ -33,9 +31,6 @@ EALIGN (BP_SYM(memcmp),4,0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rWORD3 r8 /* next word in s1 */ @@ -978,6 +973,6 @@ L(duzeroLength): li rRTN,0 blr -END (BP_SYM (memcmp)) +END (memcmp) libc_hidden_builtin_def (memcmp) weak_alias (memcmp,bcmp) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S index aa0db8e15..800a9f1bb 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/memcpy.S @@ -18,15 +18,13 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst'. */ .machine power7 -EALIGN (BP_SYM (memcpy), 5, 0) +EALIGN (memcpy, 5, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -502,5 +500,5 @@ L(end_unaligned_loop): ld 3,-16(1) blr -END_GEN_TB (BP_SYM (memcpy),TB_TOCLESS) +END_GEN_TB (memcpy,TB_TOCLESS) libc_hidden_builtin_def (memcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S index 9993040ac..f20be938d 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/mempcpy.S @@ -18,15 +18,13 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] __mempcpy (__ptr_t dst [r3], __ptr_t src [r4], size_t len [r5]); Returns 'dst' + 'len'. */ .machine power7 -EALIGN (BP_SYM (__mempcpy), 5, 0) +EALIGN (__mempcpy, 5, 0) CALL_MCOUNT 3 cmpldi cr1,5,31 @@ -451,7 +449,7 @@ L(end_unaligned_loop): add 3,3,5 blr -END_GEN_TB (BP_SYM (__mempcpy),TB_TOCLESS) -libc_hidden_def (BP_SYM (__mempcpy)) -weak_alias (BP_SYM (__mempcpy), BP_SYM (mempcpy)) +END_GEN_TB (__mempcpy,TB_TOCLESS) +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) libc_hidden_builtin_def (mempcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S index d3ffe4c08..d24fbbb1b 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/memrchr.S @@ -18,12 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ .machine power7 -ENTRY (BP_SYM (__memrchr)) +ENTRY (__memrchr) CALL_MCOUNT dcbt 0,r3 mr r7,r3 @@ -174,6 +172,6 @@ L(loop_small): ble L(null) b L(loop_small) -END (BP_SYM (__memrchr)) -weak_alias (BP_SYM (__memrchr), BP_SYM(memrchr)) +END (__memrchr) +weak_alias (__memrchr, memrchr) libc_hidden_builtin_def (memrchr) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/memset.S b/libc/sysdeps/powerpc/powerpc64/power7/memset.S index abb2d3528..b24cfa163 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/memset.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/memset.S @@ -18,14 +18,12 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); Returns 's'. */ .machine power7 -EALIGN (BP_SYM (memset), 5, 0) +EALIGN (memset, 5, 0) CALL_MCOUNT 3 L(_memset): @@ -382,16 +380,16 @@ L(small): stw 4,4(10) blr -END_GEN_TB (BP_SYM (memset),TB_TOCLESS) +END_GEN_TB (memset,TB_TOCLESS) libc_hidden_builtin_def (memset) /* Copied from bzero.S to prevent the linker from inserting a stub between bzero and memset. */ -ENTRY (BP_SYM (__bzero)) +ENTRY (__bzero) CALL_MCOUNT 3 mr r5,r4 li r4,0 b L(_memset) -END_GEN_TB (BP_SYM (__bzero),TB_TOCLESS) +END_GEN_TB (__bzero,TB_TOCLESS) -weak_alias (BP_SYM (__bzero), BP_SYM (bzero)) +weak_alias (__bzero, bzero) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S index 5fc284de8..50a33d8fa 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/rawmemchr.S @@ -18,12 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] rawmemchr (void *s [r3], int c [r4]) */ .machine power7 -ENTRY (BP_SYM(__rawmemchr)) +ENTRY (__rawmemchr) CALL_MCOUNT 2 dcbt 0,r3 clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ @@ -97,6 +95,6 @@ L(done): srdi r0,r0,3 /* Convert leading zeroes to bytes. */ add r3,r8,r0 /* Return address of the matching char. */ blr -END (BP_SYM (__rawmemchr)) +END (__rawmemchr) weak_alias (__rawmemchr,rawmemchr) libc_hidden_builtin_def (__rawmemchr) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S index 6323154ea..9eee38469 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/strcasecmp.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> #include <locale-defines.h> /* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) @@ -33,7 +31,7 @@ # define STRCMP strcasecmp #endif -ENTRY (BP_SYM (__STRCMP)) +ENTRY (__STRCMP) CALL_MCOUNT 2 #define rRTN r3 /* Return value */ @@ -118,7 +116,7 @@ L(done): subf r0, rLWR2, rLWR1 extsw rRTN, r0 blr -END (BP_SYM (__STRCMP)) +END (__STRCMP) -weak_alias (BP_SYM (__STRCMP), BP_SYM (STRCMP)) +weak_alias (__STRCMP, STRCMP) libc_hidden_builtin_def (__STRCMP) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S index 04b7d4f5e..3ffe7a188 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/strchr.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/strchr.S @@ -18,12 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] strchr (char *s [r3], int c [r4]) */ .machine power7 -ENTRY (BP_SYM(strchr)) +ENTRY (strchr) CALL_MCOUNT 2 dcbt 0,r3 clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ @@ -198,6 +196,6 @@ L(done_null): srdi r0,r0,3 /* Convert leading zeros to bytes. */ add r3,r8,r0 /* Return address of the matching null byte. */ blr -END (BP_SYM (strchr)) -weak_alias (BP_SYM (strchr), BP_SYM (index)) +END (strchr) +weak_alias (strchr, index) libc_hidden_builtin_def (strchr) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S index 2b1e1c002..9dbc51b0d 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/strchrnul.S @@ -18,12 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] strchrnul (char *s [r3], int c [r4]) */ .machine power7 -ENTRY (BP_SYM(__strchrnul)) +ENTRY (__strchrnul) CALL_MCOUNT 2 dcbt 0,r3 clrrdi r8,r3,3 /* Align the address to doubleword boundary. */ @@ -112,6 +110,6 @@ L(done): srdi r0,r0,3 /* Convert leading zeros to bytes. */ add r3,r8,r0 /* Return address of matching c/null byte. */ blr -END (BP_SYM (__strchrnul)) +END (__strchrnul) weak_alias (__strchrnul,strchrnul) libc_hidden_builtin_def (__strchrnul) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S index a36aa7d97..343216952 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/strlen.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/strlen.S @@ -18,12 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] strlen (char *s [r3]) */ .machine power7 -ENTRY (BP_SYM (strlen)) +ENTRY (strlen) CALL_MCOUNT 1 dcbt 0,r3 clrrdi r4,r3,3 /* Align the address to doubleword boundary. */ @@ -94,5 +92,5 @@ L(done): srdi r0,r0,3 /* Convert leading zeroes to bytes. */ add r3,r5,r0 /* Compute final length. */ blr -END (BP_SYM (strlen)) +END (strlen) libc_hidden_builtin_def (strlen) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S index 25a6baf47..77ecad5ab 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/strncmp.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* See strlen.s for comments on how the end-of-string testing works. */ @@ -26,7 +24,7 @@ const char *s2 [r4], size_t size [r5]) */ -EALIGN (BP_SYM(strncmp),5,0) +EALIGN (strncmp,5,0) CALL_MCOUNT 3 #define rTMP r0 @@ -34,9 +32,6 @@ EALIGN (BP_SYM(strncmp),5,0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rWORD3 r10 @@ -179,5 +174,5 @@ L(u4): sub rRTN,rWORD1,rWORD2 L(ux): li rRTN,0 blr -END (BP_SYM (strncmp)) +END (strncmp) libc_hidden_builtin_def (strncmp) diff --git a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S index 23e0a355c..37c7dbfe8 100644 --- a/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S +++ b/libc/sysdeps/powerpc/powerpc64/power7/strnlen.S @@ -18,12 +18,10 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* int [r3] strnlen (char *s [r3], int size [r4]) */ .machine power7 -ENTRY (BP_SYM (__strnlen)) +ENTRY (__strnlen) CALL_MCOUNT 2 dcbt 0,r3 clrrdi r8,r3,3 @@ -167,6 +165,6 @@ L(loop_small): cmpld r9,r7 bge L(end_max) b L(loop_small) -END (BP_SYM (__strnlen)) -weak_alias (BP_SYM (__strnlen), BP_SYM(strnlen)) +END (__strnlen) +weak_alias (__strnlen, strnlen) libc_hidden_builtin_def (strnlen) diff --git a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S index 8586c2d4e..58ec61062 100644 --- a/libc/sysdeps/powerpc/powerpc64/setjmp-common.S +++ b/libc/sysdeps/powerpc/powerpc64/setjmp-common.S @@ -23,8 +23,6 @@ #else #include <jmpbuf-offsets.h> #endif -#include <bp-sym.h> -#include <bp-asm.h> #ifndef __NO_VMX__ .section ".toc","aw" @@ -55,24 +53,23 @@ END (setjmp) that saves r2 since the call won't go via a plt call stub. See bugz #269. __GI__setjmp is used in csu/libc-start.c when HAVE_CLEANUP_JMP_BUF is defined. */ -ENTRY (BP_SYM (__GI__setjmp)) +ENTRY (__GI__setjmp) std r2,40(r1) /* Save the callers TOC in the save area. */ cfi_endproc -END_2 (BP_SYM (__GI__setjmp)) +END_2 (__GI__setjmp) /* Fall thru. */ #endif -ENTRY (BP_SYM (_setjmp)) +ENTRY (_setjmp) CALL_MCOUNT 1 li r4,0 /* Set second argument to 0. */ b JUMPTARGET (GLUE(__sigsetjmp,_ent)) -END (BP_SYM (_setjmp)) +END (_setjmp) libc_hidden_def (_setjmp) -ENTRY (BP_SYM (__sigsetjmp)) +ENTRY (__sigsetjmp) CALL_MCOUNT 2 JUMPTARGET(GLUE(__sigsetjmp,_ent)): - CHECK_BOUNDS_BOTH_WIDE_LIT (r3, r8, r9, JB_SIZE) #ifdef PTR_MANGLE mr r5, r1 PTR_MANGLE (r5, r6) @@ -219,18 +216,18 @@ L(no_vmx): li r3,0 blr #elif defined SHARED - b JUMPTARGET (BP_SYM (__sigjmp_save)) + b JUMPTARGET (__sigjmp_save) #else mflr r0 std r0,16(r1) stdu r1,-112(r1) cfi_adjust_cfa_offset(112) cfi_offset(lr,16) - bl JUMPTARGET (BP_SYM (__sigjmp_save)) + bl JUMPTARGET (__sigjmp_save) nop ld r0,112+16(r1) addi r1,r1,112 mtlr r0 blr #endif -END (BP_SYM (__sigsetjmp)) +END (__sigsetjmp) diff --git a/libc/sysdeps/powerpc/powerpc64/start.S b/libc/sysdeps/powerpc/powerpc64/start.S index 210779c84..ec0fd30e7 100644 --- a/libc/sysdeps/powerpc/powerpc64/start.S +++ b/libc/sysdeps/powerpc/powerpc64/start.S @@ -34,7 +34,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include "bp-sym.h" /* These are the various addresses we require. */ #ifdef PIC @@ -46,7 +45,7 @@ L(start_addresses): .quad 0 /* was _SDA_BASE_ but not in 64-bit ABI*/ /* function descriptors so don't need JUMPTARGET */ - .quad BP_SYM(main) + .quad main .quad __libc_csu_init .quad __libc_csu_fini @@ -71,7 +70,7 @@ ENTRY(_start) ld r8,.L01(r2) /* and continue in libc-start, in glibc. */ - b JUMPTARGET(BP_SYM(__libc_start_main)) + b JUMPTARGET(__libc_start_main) /* The linker needs this nop to recognize that it's OK to call via a TOC adjusting stub. */ nop diff --git a/libc/sysdeps/powerpc/powerpc64/stpcpy.S b/libc/sysdeps/powerpc/powerpc64/stpcpy.S index d9cffe9ad..070cd4662 100644 --- a/libc/sysdeps/powerpc/powerpc64/stpcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/stpcpy.S @@ -17,37 +17,24 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* See strlen.s for comments on how the end-of-string testing works. */ /* char * [r3] stpcpy (char *dest [r3], const char *src [r4]) */ -EALIGN (BP_SYM (__stpcpy), 4, 0) +EALIGN (__stpcpy, 4, 0) CALL_MCOUNT 2 #define rTMP r0 #define rRTN r3 -#if __BOUNDED_POINTERS__ -# define rDEST r4 /* pointer to previous word in dest */ -# define rSRC r5 /* pointer to previous word in src */ -# define rLOW r11 -# define rHIGH r12 -#else -# define rDEST r3 /* pointer to previous word in dest */ -# define rSRC r4 /* pointer to previous word in src */ -#endif +#define rDEST r3 /* pointer to previous word in dest */ +#define rSRC r4 /* pointer to previous word in src */ #define rWORD r6 /* current word from src */ #define rFEFE r7 /* 0xfefefeff */ #define r7F7F r8 /* 0x7f7f7f7f */ #define rNEG r9 /* ~(word in src | 0x7f7f7f7f) */ #define rALT r10 /* alternate word from src */ - CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) - CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) - STORE_RETURN_BOUNDS (rLOW, rHIGH) - or rTMP, rSRC, rDEST clrldi. rTMP, rTMP, 62 addi rDEST, rDEST, -4 @@ -85,8 +72,6 @@ L(g1): rlwinm. rTMP, rALT, 8, 24, 31 stbu rTMP, 1(rDEST) beqlr- stbu rALT, 1(rDEST) - CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) - STORE_RETURN_VALUE (rDEST) blr /* Oh well. In this case, we just do a byte-by-byte copy. */ @@ -108,15 +93,11 @@ L(u0): lbzu rALT, 1(rSRC) cmpwi rWORD, 0 bne+ L(u0) L(u2): stbu rWORD, 1(rDEST) - CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) - STORE_RETURN_VALUE (rDEST) blr L(u1): stbu rALT, 1(rDEST) - CHECK_BOUNDS_HIGH (rDEST, rHIGH, twlgt) - STORE_RETURN_VALUE (rDEST) blr -END (BP_SYM (__stpcpy)) +END (__stpcpy) -weak_alias (BP_SYM (__stpcpy), BP_SYM (stpcpy)) +weak_alias (__stpcpy, stpcpy) libc_hidden_def (__stpcpy) libc_hidden_builtin_def (stpcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/strchr.S b/libc/sysdeps/powerpc/powerpc64/strchr.S index 3bd392949..d2d8cd361 100644 --- a/libc/sysdeps/powerpc/powerpc64/strchr.S +++ b/libc/sysdeps/powerpc/powerpc64/strchr.S @@ -17,32 +17,19 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* See strlen.s for comments on how this works. */ /* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ -ENTRY (BP_SYM (strchr)) +ENTRY (strchr) CALL_MCOUNT 2 #define rTMP1 r0 #define rRTN r3 /* outgoing result */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Currently PPC gcc does not support -fbounds-check or -fbounded-pointers. - These artifacts are left in the code as a reminder in case we need - bounded pointer support in the future. */ -#if __BOUNDED_POINTERS__ -# define rSTR r4 -# define rCHR r5 /* byte we're looking for, spread over the whole word */ -# define rWORD r8 /* the current word */ -#else -# define rSTR r8 /* current word pointer */ -# define rCHR r4 /* byte we're looking for, spread over the whole word */ -# define rWORD r5 /* the current word */ -#endif +#define rSTR r8 /* current word pointer */ +#define rCHR r4 /* byte we're looking for, spread over the whole word */ +#define rWORD r5 /* the current word */ #define rCLZB rCHR /* leading zero byte count */ #define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ #define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ @@ -51,9 +38,6 @@ ENTRY (BP_SYM (strchr)) #define rMASK r11 /* mask with the bits to ignore set to 0 */ #define rTMP3 r12 - CHECK_BOUNDS_LOW (rSTR, rTMP1, rTMP2) - STORE_RETURN_BOUNDS (rTMP1, rTMP2) - dcbt 0,rRTN rlwimi rCHR, rCHR, 8, 16, 23 li rMASK, -1 @@ -101,7 +85,6 @@ L(loopentry): L(missed): and. rTMP1, rTMP1, rTMP2 li rRTN, 0 - STORE_RETURN_VALUE (rSTR) beqlr /* It did happen. Decide which one was first... I'm not sure if this is actually faster than a sequence of @@ -119,8 +102,6 @@ L(missed): cntlzd rCLZB, rTMP2 srdi rCLZB, rCLZB, 3 add rRTN, rSTR, rCLZB - CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge) - STORE_RETURN_VALUE (rSTR) blr L(foundit): @@ -132,10 +113,8 @@ L(foundit): subi rSTR, rSTR, 8 srdi rCLZB, rCLZB, 3 add rRTN, rSTR, rCLZB - CHECK_BOUNDS_HIGH_RTN (rSTR, rTMP2, tdlge) - STORE_RETURN_VALUE (rSTR) blr -END (BP_SYM (strchr)) +END (strchr) -weak_alias (BP_SYM (strchr), BP_SYM (index)) +weak_alias (strchr, index) libc_hidden_builtin_def (strchr) diff --git a/libc/sysdeps/powerpc/powerpc64/strcmp.S b/libc/sysdeps/powerpc/powerpc64/strcmp.S index 46600d5d0..c9d6dac12 100644 --- a/libc/sysdeps/powerpc/powerpc64/strcmp.S +++ b/libc/sysdeps/powerpc/powerpc64/strcmp.S @@ -17,29 +17,18 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* See strlen.s for comments on how the end-of-string testing works. */ /* int [r3] strcmp (const char *s1 [r3], const char *s2 [r4]) */ -EALIGN (BP_SYM(strcmp), 4, 0) +EALIGN (strcmp, 4, 0) CALL_MCOUNT 2 #define rTMP r0 #define rRTN r3 #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. - These artifacts are left in the code as a reminder in case we need - bounded pointer support in the future. */ -#if __BOUNDED_POINTERS__ -# define rHIGH1 r11 -# define rHIGH2 r12 -#endif #define rWORD1 r5 /* current word in s1 */ #define rWORD2 r6 /* current word in s2 */ #define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ @@ -47,9 +36,6 @@ EALIGN (BP_SYM(strcmp), 4, 0) #define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ #define rBITDIF r10 /* bits that differ in s1 & s2 words */ - CHECK_BOUNDS_LOW (rSTR1, rTMP, rHIGH1) - CHECK_BOUNDS_LOW (rSTR2, rTMP, rHIGH2) - dcbt 0,rSTR1 or rTMP, rSTR2, rSTR1 dcbt 0,rSTR2 @@ -98,7 +84,6 @@ L(endstring): blr L(equal): li rRTN, 0 - /* GKM FIXME: check high bounds. */ blr L(different): @@ -113,7 +98,6 @@ L(highbit): srdi rWORD2, rWORD2, 56 srdi rWORD1, rWORD1, 56 sub rRTN, rWORD1, rWORD2 - /* GKM FIXME: check high bounds. */ blr @@ -137,11 +121,9 @@ L(u1): cmpwi cr1, rWORD1, 0 cmpd rWORD1, rWORD2 bne+ cr1, L(u0) L(u3): sub rRTN, rWORD1, rWORD2 - /* GKM FIXME: check high bounds. */ blr L(u4): lbz rWORD1, -1(rSTR1) sub rRTN, rWORD1, rWORD2 - /* GKM FIXME: check high bounds. */ blr -END (BP_SYM (strcmp)) +END (strcmp) libc_hidden_builtin_def (strcmp) diff --git a/libc/sysdeps/powerpc/powerpc64/strcpy.S b/libc/sysdeps/powerpc/powerpc64/strcpy.S index 56845cf8f..4c6fd3f9d 100644 --- a/libc/sysdeps/powerpc/powerpc64/strcpy.S +++ b/libc/sysdeps/powerpc/powerpc64/strcpy.S @@ -17,50 +17,28 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* See strlen.s for comments on how the end-of-string testing works. */ /* char * [r3] strcpy (char *dest [r3], const char *src [r4]) */ -EALIGN (BP_SYM (strcpy), 4, 0) +EALIGN (strcpy, 4, 0) CALL_MCOUNT 2 #define rTMP r0 #define rRTN r3 /* incoming DEST arg preserved as result */ -/* Note. The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. - These artifacts are left in the code as a reminder in case we need - bounded pointer support in the future. */ -#if __BOUNDED_POINTERS__ -# define rDEST r4 /* pointer to previous word in dest */ -# define rSRC r5 /* pointer to previous word in src */ -# define rLOW r11 -# define rHIGH r12 -#else -# define rSRC r4 /* pointer to previous word in src */ -# define rDEST r5 /* pointer to previous word in dest */ -#endif +#define rSRC r4 /* pointer to previous word in src */ +#define rDEST r5 /* pointer to previous word in dest */ #define rWORD r6 /* current word from src */ #define rFEFE r7 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ #define r7F7F r8 /* constant 0x7f7f7f7f7f7f7f7f */ #define rNEG r9 /* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */ #define rALT r10 /* alternate word from src */ - CHECK_BOUNDS_LOW (rSRC, rLOW, rHIGH) - CHECK_BOUNDS_LOW (rDEST, rLOW, rHIGH) - STORE_RETURN_BOUNDS (rLOW, rHIGH) - dcbt 0,rSRC or rTMP, rSRC, rRTN clrldi. rTMP, rTMP, 61 -#if __BOUNDED_POINTERS__ - addi rDEST, rDEST, -8 -#else addi rDEST, rRTN, -8 -#endif dcbtst 0,rRTN bne L(unaligned) @@ -112,7 +90,6 @@ L(g1): stb rTMP, 14(rDEST) beqlr- stb rALT, 15(rDEST) - /* GKM FIXME: check high bound. */ blr /* Oh well. In this case, we just do a byte-by-byte copy. */ @@ -134,11 +111,9 @@ L(u0): lbzu rALT, 1(rSRC) cmpwi rWORD, 0 bne+ L(u0) L(u2): stb rWORD, 1(rDEST) - /* GKM FIXME: check high bound. */ blr L(u1): stb rALT, 1(rDEST) - /* GKM FIXME: check high bound. */ blr -END (BP_SYM (strcpy)) +END (strcpy) libc_hidden_builtin_def (strcpy) diff --git a/libc/sysdeps/powerpc/powerpc64/strlen.S b/libc/sysdeps/powerpc/powerpc64/strlen.S index 3ef4cc88e..dafd03387 100644 --- a/libc/sysdeps/powerpc/powerpc64/strlen.S +++ b/libc/sysdeps/powerpc/powerpc64/strlen.S @@ -17,8 +17,6 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* The algorithm here uses the following techniques: @@ -77,7 +75,7 @@ /* int [r3] strlen (char *s [r3]) */ -ENTRY (BP_SYM (strlen)) +ENTRY (strlen) CALL_MCOUNT 1 #define rTMP1 r0 @@ -94,13 +92,6 @@ ENTRY (BP_SYM (strlen)) #define rTMP3 r11 #define rTMP4 r12 -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. - These artifacts are left in the code as a reminder in case we need - bounded pointer support in the future. */ - CHECK_BOUNDS_LOW (rRTN, rTMP1, rTMP2) - dcbt 0,rRTN clrrdi rSTR, rRTN, 3 lis r7F7F, 0x7f7f @@ -168,7 +159,6 @@ L(done0): subf rTMP1, rRTN, rSTR srdi rTMP3, rTMP3, 3 add rRTN, rTMP1, rTMP3 - /* GKM FIXME: check high bound. */ blr -END (BP_SYM (strlen)) +END (strlen) libc_hidden_builtin_def (strlen) diff --git a/libc/sysdeps/powerpc/powerpc64/strncmp.S b/libc/sysdeps/powerpc/powerpc64/strncmp.S index 89a3246fd..e2726883f 100644 --- a/libc/sysdeps/powerpc/powerpc64/strncmp.S +++ b/libc/sysdeps/powerpc/powerpc64/strncmp.S @@ -17,14 +17,12 @@ <http://www.gnu.org/licenses/>. */ #include <sysdep.h> -#include <bp-sym.h> -#include <bp-asm.h> /* See strlen.s for comments on how the end-of-string testing works. */ /* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5]) */ -EALIGN (BP_SYM(strncmp), 4, 0) +EALIGN (strncmp, 4, 0) CALL_MCOUNT 3 #define rTMP r0 @@ -32,9 +30,6 @@ EALIGN (BP_SYM(strncmp), 4, 0) #define rSTR1 r3 /* first string arg */ #define rSTR2 r4 /* second string arg */ #define rN r5 /* max string length */ -/* Note: The Bounded pointer support in this code is broken. This code - was inherited from PPC32 and that support was never completed. - Current PPC gcc does not support -fbounds-check or -fbounded-pointers. */ #define rWORD1 r6 /* current word in s1 */ #define rWORD2 r7 /* current word in s2 */ #define rFEFE r8 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ @@ -160,5 +155,5 @@ L(u1): L(u2): lbzu rWORD1, -1(rSTR1) L(u3): sub rRTN, rWORD1, rWORD2 blr -END (BP_SYM (strncmp)) +END (strncmp) libc_hidden_builtin_def (strncmp) diff --git a/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c b/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c index fcbd15e26..d57a907df 100644 --- a/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c +++ b/libc/sysdeps/s390/s390-32/multiarch/ifunc-resolve.c @@ -34,9 +34,9 @@ \ /* Make the declarations of the optimized functions hidden in order to prevent GOT slots being generated for them. */ \ - extern __attribute__((visibility("hidden"))) void *FUNC##_z196; \ - extern __attribute__((visibility("hidden"))) void *FUNC##_z10; \ - extern __attribute__((visibility("hidden"))) void *FUNC##_g5; \ + extern void *FUNC##_z196 attribute_hidden; \ + extern void *FUNC##_z10 attribute_hidden; \ + extern void *FUNC##_g5 attribute_hidden; \ \ void *resolve_##FUNC (unsigned long int dl_hwcap) \ { \ diff --git a/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c b/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c index 256179b26..14d9c13eb 100644 --- a/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c +++ b/libc/sysdeps/s390/s390-64/multiarch/ifunc-resolve.c @@ -34,9 +34,9 @@ \ /* Make the declarations of the optimized functions hidden in order to prevent GOT slots being generated for them. */ \ - extern __attribute__((visibility("hidden"))) void *FUNC##_z196; \ - extern __attribute__((visibility("hidden"))) void *FUNC##_z10; \ - extern __attribute__((visibility("hidden"))) void *FUNC##_z900; \ + extern void *FUNC##_z196 attribute_hidden; \ + extern void *FUNC##_z10 attribute_hidden; \ + extern void *FUNC##_z900 attribute_hidden; \ \ void *resolve_##FUNC (unsigned long int dl_hwcap) \ { \ diff --git a/libc/sysdeps/sparc/fpu/libm-test-ulps b/libc/sysdeps/sparc/fpu/libm-test-ulps index 6eee78843..bacac6e00 100644 --- a/libc/sysdeps/sparc/fpu/libm-test-ulps +++ b/libc/sysdeps/sparc/fpu/libm-test-ulps @@ -2637,6 +2637,9 @@ float: 2 ifloat: 2 ildouble: 1 ldouble: 1 +Test "j0 (0x1p16383) == 9.5859502826270374691362975419147645151233e-2467": +ildouble: 2 +ldouble: 2 Test "j0 (10.0) == -0.245935764451348335197760862485328754": double: 2 float: 1 @@ -2677,6 +2680,9 @@ double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "j1 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467": +ildouble: 1 +ldouble: 1 Test "j1 (1.0) == 0.440050585744933515959682203718914913": ildouble: 1 ldouble: 1 @@ -3277,6 +3283,9 @@ double: 1 float: 1 idouble: 1 ifloat: 1 +Test "y0 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467": +ildouble: 1 +ldouble: 1 Test "y0 (1.0) == 0.0882569642156769579829267660235151628": double: 2 float: 1 @@ -3321,6 +3330,9 @@ ldouble: 1 Test "y1 (0x1p-30) == -6.8356527557643159612937462812258975438856e+08": ildouble: 1 ldouble: 1 +Test "y1 (0x1p16383) == -9.5859502826270374691362975419147645151233e-2467": +ildouble: 2 +ldouble: 2 Test "y1 (1.5) == -0.412308626973911295952829820633445323": float: 1 ifloat: 1 @@ -4133,8 +4145,8 @@ double: 3 float: 2 idouble: 3 ifloat: 2 -ildouble: 1 -ldouble: 1 +ildouble: 2 +ldouble: 2 Function: "yn": double: 3 diff --git a/libc/sysdeps/unix/sysv/linux/Makefile b/libc/sysdeps/unix/sysv/linux/Makefile index ecd9c2c97..f82c94982 100644 --- a/libc/sysdeps/unix/sysv/linux/Makefile +++ b/libc/sysdeps/unix/sysv/linux/Makefile @@ -35,7 +35,8 @@ sysdep_headers += sys/mount.h sys/acct.h sys/sysctl.h \ bits/a.out.h sys/inotify.h sys/signalfd.h sys/eventfd.h \ sys/timerfd.h sys/fanotify.h bits/eventfd.h bits/inotify.h \ bits/signalfd.h bits/timerfd.h bits/epoll.h \ - bits/socket_type.h bits/syscall.h bits/sysctl.h + bits/socket_type.h bits/syscall.h bits/sysctl.h \ + bits/mman-linux.h tests += tst-clone diff --git a/libc/sysdeps/unix/sysv/linux/bits/mman-linux.h b/libc/sysdeps/unix/sysv/linux/bits/mman-linux.h new file mode 100644 index 000000000..05d2d9237 --- /dev/null +++ b/libc/sysdeps/unix/sysv/linux/bits/mman-linux.h @@ -0,0 +1,108 @@ +/* Definitions for POSIX memory map interface. Linux generic version. + Copyright (C) 2001-2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _SYS_MMAN_H +# error "Never use <bits/mman-linux.h> directly; include <sys/mman.h> instead." +#endif + +/* The following definitions basically come from the kernel headers. + But the kernel header is not namespace clean. */ + + +/* Protections are chosen from these bits, OR'd together. The + implementation does not necessarily support PROT_EXEC or PROT_WRITE + without PROT_READ. The only guarantees are that no writing will be + allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */ + +#define PROT_READ 0x1 /* Page can be read. */ +#define PROT_WRITE 0x2 /* Page can be written. */ +#define PROT_EXEC 0x4 /* Page can be executed. */ +#define PROT_NONE 0x0 /* Page can not be accessed. */ +#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of + growsdown vma (mprotect only). */ +#define PROT_GROWSUP 0x02000000 /* Extend change to start of + growsup vma (mprotect only). */ + +/* Sharing types (must choose one and only one of these). */ +#define MAP_SHARED 0x01 /* Share changes. */ +#define MAP_PRIVATE 0x02 /* Changes are private. */ +#ifdef __USE_MISC +# define MAP_TYPE 0x0f /* Mask for type of mapping. */ +#endif + +/* Other flags. */ +#define MAP_FIXED 0x10 /* Interpret addr exactly. */ +#ifdef __USE_MISC +# define MAP_FILE 0 +# ifdef __MAP_ANONYMOUS +# define MAP_ANONYMOUS __MAP_ANONYMOUS /* Don't use a file. */ +# else +# define MAP_ANONYMOUS 0x20 /* Don't use a file. */ +# endif +# define MAP_ANON MAP_ANONYMOUS +/* When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. */ +# define MAP_HUGE_SHIFT 26 +# define MAP_HUGE_MASK 0x3f +#endif + +/* Flags to `msync'. */ +#define MS_ASYNC 1 /* Sync memory asynchronously. */ +#define MS_SYNC 4 /* Synchronous memory sync. */ +#define MS_INVALIDATE 2 /* Invalidate the caches. */ + +/* Flags for `mremap'. */ +#ifdef __USE_GNU +# define MREMAP_MAYMOVE 1 +# define MREMAP_FIXED 2 +#endif + +/* Advice to `madvise'. */ +#ifdef __USE_BSD +# define MADV_NORMAL 0 /* No further special treatment. */ +# define MADV_RANDOM 1 /* Expect random page references. */ +# define MADV_SEQUENTIAL 2 /* Expect sequential page references. */ +# define MADV_WILLNEED 3 /* Will need these pages. */ +# define MADV_DONTNEED 4 /* Don't need these pages. */ +# define MADV_REMOVE 9 /* Remove these pages and resources. */ +# define MADV_DONTFORK 10 /* Do not inherit across fork. */ +# define MADV_DOFORK 11 /* Do inherit across fork. */ +# define MADV_MERGEABLE 12 /* KSM may merge identical pages. */ +# define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages. */ +# define MADV_HUGEPAGE 14 /* Worth backing with hugepages. */ +# define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages. */ +# define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits. */ +# define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag. */ +# define MADV_HWPOISON 100 /* Poison a page for testing. */ +#endif + +/* The POSIX people had to invent similar names for the same things. */ +#ifdef __USE_XOPEN2K +# define POSIX_MADV_NORMAL 0 /* No further special treatment. */ +# define POSIX_MADV_RANDOM 1 /* Expect random page references. */ +# define POSIX_MADV_SEQUENTIAL 2 /* Expect sequential page references. */ +# define POSIX_MADV_WILLNEED 3 /* Will need these pages. */ +# define POSIX_MADV_DONTNEED 4 /* Don't need these pages. */ +#endif + +/* Flags for `mlockall'. */ +#ifndef MCL_CURRENT +# define MCL_CURRENT 1 /* Lock all currently mapped pages. */ +# define MCL_FUTURE 2 /* Lock all additions to address + space. */ +#endif diff --git a/libc/sysdeps/unix/sysv/linux/bits/msq.h b/libc/sysdeps/unix/sysv/linux/bits/msq.h index bd005fb10..8f6eb8a7d 100644 --- a/libc/sysdeps/unix/sysv/linux/bits/msq.h +++ b/libc/sysdeps/unix/sysv/linux/bits/msq.h @@ -25,6 +25,7 @@ #define MSG_NOERROR 010000 /* no error if message is too big */ #ifdef __USE_GNU # define MSG_EXCEPT 020000 /* recv any msg except of specified type */ +# define MSG_COPY 040000 /* copy (not remove) all queue messages */ #endif /* Types used in the structure definition. */ diff --git a/libc/sysdeps/unix/sysv/linux/fpathconf.c b/libc/sysdeps/unix/sysv/linux/fpathconf.c index c97164468..e8c4dc972 100644 --- a/libc/sysdeps/unix/sysv/linux/fpathconf.c +++ b/libc/sysdeps/unix/sysv/linux/fpathconf.c @@ -33,7 +33,6 @@ __fpathconf (fd, name) int name; { struct statfs fsbuf; - int r; switch (name) { @@ -49,12 +48,6 @@ __fpathconf (fd, name) case _PC_CHOWN_RESTRICTED: return __statfs_chown_restricted (__fstatfs (fd, &fsbuf), &fsbuf); - case _PC_PIPE_BUF: - r = __fcntl (fd, F_GETPIPE_SZ); - if (r > 0) - return r; - /* FALLTHROUGH */ - default: return posix_fpathconf (fd, name); } diff --git a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c index 4cd4f042c..45a66b83d 100644 --- a/libc/sysdeps/unix/sysv/linux/internal_statvfs.c +++ b/libc/sysdeps/unix/sysv/linux/internal_statvfs.c @@ -117,6 +117,12 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) case LUSTRE_SUPER_MAGIC: fsname = "lustre"; break; + case F2FS_SUPER_MAGIC: + fsname = "f2fs"; + break; + case EFIVARFS_MAGIC: + fsname = "efivarfs"; + break; } FILE *mtab = __setmntent ("/proc/mounts", "r"); diff --git a/libc/sysdeps/unix/sysv/linux/ldsodefs.h b/libc/sysdeps/unix/sysv/linux/ldsodefs.h index 081fa01f8..18ff8528c 100644 --- a/libc/sysdeps/unix/sysv/linux/ldsodefs.h +++ b/libc/sysdeps/unix/sysv/linux/ldsodefs.h @@ -29,12 +29,6 @@ /* We have the auxiliary vector. */ #define HAVE_AUX_VECTOR -/* Used by static binaries to check the auxiliary vector. */ -extern void _dl_aux_init (ElfW(auxv_t) *av) internal_function; - -/* Initialization which is normally done by the dynamic linker. */ -extern void _dl_non_dynamic_init (void) internal_function; - /* We can assume that the kernel always provides the AT_UID, AT_EUID, AT_GID, and AT_EGID values in the auxiliary vector from 2.4.0 or so on. */ #define HAVE_AUX_XID diff --git a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h b/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h index 1bcd9e2b2..2312b4702 100644 --- a/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h +++ b/libc/sysdeps/unix/sysv/linux/linux_fsinfo.h @@ -61,9 +61,15 @@ #define EFS_SUPER_MAGIC 0x414a53 #define EFS_MAGIC 0x072959 +/* Constants that identifies the `evivar' filesystem. */ +#define EFIVARFS_MAGIC 0xde5e81e4 + /* Constant that identifies the `ext2' and `ext3' filesystems. */ #define EXT2_SUPER_MAGIC 0xef53 +/* Constant that identifies the `f2fs' filesystem. */ +#define F2FS_SUPER_MAGIC 0xf2f52010 + /* Constant that identifies the `hpfs' filesystem. */ #define HPFS_SUPER_MAGIC 0xf995e849 @@ -153,6 +159,7 @@ #define COH_LINK_MAX 10000 #define EXT2_LINK_MAX 32000 #define EXT4_LINK_MAX 65000 +#define F2FS_LINK_MAX 32000 #define LUSTRE_LINK_MAX EXT4_LINK_MAX #define MINIX2_LINK_MAX 65530 #define MINIX_LINK_MAX 250 diff --git a/libc/sysdeps/unix/sysv/linux/pathconf.c b/libc/sysdeps/unix/sysv/linux/pathconf.c index e86925f7d..de91a4541 100644 --- a/libc/sysdeps/unix/sysv/linux/pathconf.c +++ b/libc/sysdeps/unix/sysv/linux/pathconf.c @@ -39,8 +39,6 @@ long int __pathconf (const char *file, int name) { struct statfs fsbuf; - int fd; - int flags; switch (name) { @@ -56,21 +54,6 @@ __pathconf (const char *file, int name) case _PC_CHOWN_RESTRICTED: return __statfs_chown_restricted (__statfs (file, &fsbuf), &fsbuf); - case _PC_PIPE_BUF: - flags = O_RDONLY|O_NONBLOCK|O_NOCTTY; -#ifdef O_CLOEXEC - flags |= O_CLOEXEC; -#endif - fd = open_not_cancel_2 (file, flags); - if (fd >= 0) - { - long int r = __fcntl (fd, F_GETPIPE_SZ); - close_not_cancel_no_status (fd); - if (r > 0) - return r; - } - /* FALLTHROUGH */ - default: return posix_pathconf (file, name); } @@ -168,6 +151,9 @@ __statfs_link_max (int result, const struct statfs *fsbuf, const char *file, the hard way. */ return distinguish_extX (fsbuf, file, fd); + case F2FS_SUPER_MAGIC: + return F2FS_LINK_MAX; + case MINIX_SUPER_MAGIC: case MINIX_SUPER_MAGIC2: return MINIX_LINK_MAX; @@ -221,6 +207,9 @@ __statfs_filesize_max (int result, const struct statfs *fsbuf) switch (fsbuf->f_type) { + case F2FS_SUPER_MAGIC: + return 256; + case BTRFS_SUPER_MAGIC: return 255; diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/Implies b/libc/sysdeps/unix/sysv/linux/powerpc/Implies deleted file mode 100644 index ff27cdb56..000000000 --- a/libc/sysdeps/unix/sysv/linux/powerpc/Implies +++ /dev/null @@ -1,4 +0,0 @@ -# Make sure these routines come before ldbl-opt. -ieee754/ldbl-128ibm -# These supply the ABI compatibility for when long double was double. -ieee754/ldbl-opt diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h index 545fda462..5f5fc1eb3 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h @@ -32,6 +32,16 @@ extern void *__vdso_get_tbfreq; extern void *__vdso_getcpu; +/* This macro is needed for PPC64 to return a skeleton OPD entry of a vDSO + symbol. This works because _dl_vdso_vsym always return the function + address, and no vDSO symbols use the TOC or chain pointers from the OPD + so we can allow them to be garbage. */ +#if defined(__PPC64__) || defined(__powerpc64__) +#define VDSO_IFUNC_RET(value) &value +#else +#define VDSO_IFUNC_RET(value) value +#endif + #endif #endif /* _LIBC_VDSO_H */ diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h index a27018965..3f72c0335 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/mman.h @@ -17,42 +17,13 @@ <http://www.gnu.org/licenses/>. */ #ifndef _SYS_MMAN_H -# error "Never use <bits/mman.h> directly; iclude <sys/mman.h> instead." +# error "Never use <bits/mman.h> directly; include <sys/mman.h> instead." #endif /* The following definitions basically come from the kernel headers. But the kernel header is not namespace clean. */ - -/* Protections are chosen from these bits, OR'd together. The - implementation does not necessarily support PROT_EXEC or PROT_WRITE - without PROT_READ. The only guarantees are that no writing will be - allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */ - -#define PROT_READ 0x1 /* Page can be read. */ -#define PROT_WRITE 0x2 /* Page can be written. */ -#define PROT_EXEC 0x4 /* Page can be executed. */ -#define PROT_NONE 0x0 /* Page can not be accessed. */ #define PROT_SAO 0x10 /* Strong Access Ordering. */ -#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of - growsdown vma (mprotect only). */ -#define PROT_GROWSUP 0x02000000 /* Extend change to start of - growsup vma (mprotect only). */ - -/* Sharing types (must choose one and only one of these). */ -#define MAP_SHARED 0x001 /* Share changes. */ -#define MAP_PRIVATE 0x002 /* Changes are private. */ -#ifdef __USE_MISC -# define MAP_TYPE 0x00f /* Mask for type of mapping. */ -#endif - -/* Other flags. */ -#define MAP_FIXED 0x010 /* Interpret addr exactly. */ -#ifdef __USE_MISC -# define MAP_FILE 0x000 -# define MAP_ANONYMOUS 0x020 /* Don't use a file. */ -# define MAP_ANON MAP_ANONYMOUS -#endif /* These are Linux-specific. */ #ifdef __USE_MISC @@ -67,48 +38,10 @@ # define MAP_HUGETLB 0x40000 /* Create huge page mapping. */ #endif -/* Flags to `msync'. */ -#define MS_ASYNC 1 /* Sync memory asynchronously. */ -#define MS_SYNC 4 /* Synchronous memory sync. */ -#define MS_INVALIDATE 2 /* Invalidate the caches. */ - /* Flags for `mlockall'. */ #define MCL_CURRENT 0x2000 /* Lock all currently mapped pages. */ #define MCL_FUTURE 0x4000 /* Lock all additions to address space. */ - -/* Flags for `mremap'. */ -#ifdef __USE_GNU -# define MREMAP_MAYMOVE 1 -# define MREMAP_FIXED 2 -#endif - -/* Advice to `madvise'. */ -#ifdef __USE_BSD -# define MADV_NORMAL 0 /* No further special treatment. */ -# define MADV_RANDOM 1 /* Expect random page references. */ -# define MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define MADV_WILLNEED 3 /* Will need these pages. */ -# define MADV_DONTNEED 4 /* Don't need these pages. */ -# define MADV_REMOVE 9 /* Remove these pages and resources. */ -# define MADV_DONTFORK 10 /* Do not inherit across fork. */ -# define MADV_DOFORK 11 /* Do inherit across fork. */ -# define MADV_MERGEABLE 12 /* KSM may merge identical pages. */ -# define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages. */ -# define MADV_HUGEPAGE 14 /* Worth backing with hugepages. */ -# define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages. */ -# define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, - overrides the coredump filter bits. */ -# define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag. */ -# define MADV_HWPOISON 100 /* Poison a page for testing. */ -#endif - -/* The POSIX people had to invent similar names for the same things. */ -#ifdef __USE_XOPEN2K -# define POSIX_MADV_NORMAL 0 /* No further special treatment. */ -# define POSIX_MADV_RANDOM 1 /* Expect random page references. */ -# define POSIX_MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define POSIX_MADV_WILLNEED 3 /* Will need these pages. */ -# define POSIX_MADV_DONTNEED 4 /* Don't need these pages. */ -#endif +/* Include generic Linux declarations. */ +#include <bits/mman-linux.h> diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h index b9811c656..59147c268 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/msq.h @@ -25,6 +25,7 @@ #define MSG_NOERROR 010000 /* no error if message is too big */ #ifdef __USE_GNU # define MSG_EXCEPT 020000 /* recv any msg except of specified type */ +# define MSG_COPY 040000 /* copy (not remove) all queue messages */ #endif /* Types used in the structure definition. */ diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c b/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c index f60748507..6506d75e6 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c +++ b/libc/sysdeps/unix/sysv/linux/powerpc/gettimeofday.c @@ -15,25 +15,49 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#include <sysdep.h> -#include <stddef.h> + #include <sys/time.h> -#include <time.h> -#include <hp-timing.h> -#include <bits/libc-vdso.h> +#ifdef SHARED + +# include <dl-vdso.h> +# include <bits/libc-vdso.h> + +void *gettimeofday_ifunc (void) __asm__ ("__gettimeofday"); + +static int +__gettimeofday_syscall (struct timeval *tv, struct timezone *tz) +{ + return INLINE_SYSCALL (gettimeofday, 2, tv, tz); +} + +void * +gettimeofday_ifunc (void) +{ + /* If the vDSO is not available we fall back syscall. */ + return (__vdso_gettimeofday ? VDSO_IFUNC_RET (__vdso_gettimeofday) + : __gettimeofday_syscall); +} +asm (".type __gettimeofday, %gnu_indirect_function"); + +/* This is doing "libc_hidden_def (__gettimeofday)" but the compiler won't + let us do it in C because it doesn't know we're defining __gettimeofday + here in this file. */ +asm (".globl __GI___gettimeofday\n" + "__GI___gettimeofday = __gettimeofday"); + +#else -/* Get the current time of day and timezone information, - putting it into *TV and *TZ. If TZ is NULL, *TZ is not filled. - Returns 0 on success, -1 on errors. */ +# include <sysdep.h> +# include <errno.h> int -__gettimeofday (tv, tz) - struct timeval *tv; - struct timezone *tz; +__gettimeofday (struct timeval *tv, struct timezone *tz) { - return INLINE_VSYSCALL (gettimeofday, 2, tv, tz); + return INLINE_SYSCALL (gettimeofday, 2, tv, tz); } libc_hidden_def (__gettimeofday) + +#endif weak_alias (__gettimeofday, gettimeofday) libc_hidden_weak (gettimeofday) diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S index 06596ce58..348aeb5ba 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S +++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S @@ -19,17 +19,14 @@ #include <sysdep.h> #define _ERRNO_H 1 #include <bits/errno.h> -#include <bp-sym.h> -#include <bp-asm.h> .comm __curbrk,8,8 .section ".toc","aw" .LC__curbrk: .tc __curbrk[TC],__curbrk .section ".text" -ENTRY (BP_SYM (__brk)) +ENTRY (__brk) CALL_MCOUNT 1 - DISCARD_BOUNDS (r3) /* the bounds are meaningless, so toss 'em. */ std r3,48(r1) DO_CALL(SYS_ify(brk)) @@ -41,6 +38,6 @@ ENTRY (BP_SYM (__brk)) blelr+ li r3,ENOMEM TAIL_CALL_SYSCALL_ERROR -END (BP_SYM (__brk)) +END (__brk) -weak_alias (BP_SYM (__brk), BP_SYM (brk)) +weak_alias (__brk, brk) diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S index f74dcae90..cf46856e1 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S +++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S @@ -19,8 +19,6 @@ #include <sysdep.h> #define _ERRNO_H 1 #include <bits/errno.h> -#include <bp-sym.h> -#include <bp-asm.h> #define CLONE_VM 0x00000100 #define CLONE_THREAD 0x00010000 @@ -33,11 +31,8 @@ int flags [r5], void *arg [r6], void *parent_tid [r7], void *tls [r8], void *child_tid [r9]); */ -ENTRY (BP_SYM (__clone)) +ENTRY (__clone) CALL_MCOUNT 7 - /* GKM FIXME: add bounds checks, where sensible. */ - DISCARD_BOUNDS (r4) - DISCARD_BOUNDS (r6) /* Check for child_stack == NULL || fn == NULL. */ cmpdi cr0,r4,0 @@ -144,6 +139,6 @@ L(parent): cfi_restore(r31) PSEUDO_RET -END (BP_SYM (__clone)) +END (__clone) -weak_alias (BP_SYM (__clone), BP_SYM (clone)) +weak_alias (__clone, clone) diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h index dfda1c889..e6e916b0f 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/sys/ptrace.h @@ -136,7 +136,8 @@ enum __ptrace_setoptions PTRACE_O_TRACEVFORKDONE = 0x00000020, PTRACE_O_TRACEEXIT = 0x00000040, PTRACE_O_TRACESECCOMP = 0x00000080, - PTRACE_O_MASK = 0x000000ff + PTRACE_O_EXITKILL = 0x00100000, + PTRACE_O_MASK = 0x001000ff }; /* Wait extended result codes for the above trace options. */ diff --git a/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h b/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h index 3e7bf92f4..b788fa50d 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h +++ b/libc/sysdeps/unix/sysv/linux/s390/bits/mman.h @@ -24,39 +24,9 @@ But the kernel header is not namespace clean. */ -/* Protections are chosen from these bits, OR'd together. The - implementation does not necessarily support PROT_EXEC or PROT_WRITE - without PROT_READ. The only guarantees are that no writing will be - allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */ - -#define PROT_READ 0x1 /* Page can be read. */ -#define PROT_WRITE 0x2 /* Page can be written. */ -#define PROT_EXEC 0x4 /* Page can be executed. */ -#define PROT_NONE 0x0 /* Page can not be accessed. */ -#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of - growsdown vma (mprotect only). */ -#define PROT_GROWSUP 0x02000000 /* Extend change to start of - growsup vma (mprotect only). */ - -/* Sharing types (must choose one and only one of these). */ -#define MAP_SHARED 0x01 /* Share changes. */ -#define MAP_PRIVATE 0x02 /* Changes are private. */ -#ifdef __USE_MISC -# define MAP_TYPE 0x0f /* Mask for type of mapping. */ -#endif - -/* Other flags. */ -#define MAP_FIXED 0x10 /* Interpret addr exactly. */ -#ifdef __USE_MISC -# define MAP_FILE 0 -# define MAP_ANONYMOUS 0x20 /* Don't use a file. */ -# define MAP_ANON MAP_ANONYMOUS -#endif - /* These are Linux-specific. */ #ifdef __USE_MISC # define MAP_GROWSDOWN 0x00100 /* Stack-like segment. */ -# define MAP_GROWSUP 0x00200 /* Register stack-like segment */ # define MAP_DENYWRITE 0x00800 /* ETXTBSY */ # define MAP_EXECUTABLE 0x01000 /* Mark it as an executable. */ # define MAP_LOCKED 0x02000 /* Lock the mapping. */ @@ -67,47 +37,5 @@ # define MAP_HUGETLB 0x40000 /* Create huge page mapping. */ #endif -/* Flags to `msync'. */ -#define MS_ASYNC 1 /* Sync memory asynchronously. */ -#define MS_SYNC 4 /* Synchronous memory sync. */ -#define MS_INVALIDATE 2 /* Invalidate the caches. */ - -/* Flags for `mlockall'. */ -#define MCL_CURRENT 1 /* Lock all currently mapped pages. */ -#define MCL_FUTURE 2 /* Lock all additions to address - space. */ - -/* Flags for `mremap'. */ -#ifdef __USE_GNU -# define MREMAP_MAYMOVE 1 -# define MREMAP_FIXED 2 -#endif - -/* Advice to `madvise'. */ -#ifdef __USE_BSD -# define MADV_NORMAL 0 /* No further special treatment. */ -# define MADV_RANDOM 1 /* Expect random page references. */ -# define MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define MADV_WILLNEED 3 /* Will need these pages. */ -# define MADV_DONTNEED 4 /* Don't need these pages. */ -# define MADV_REMOVE 9 /* Remove these pages and resources. */ -# define MADV_DONTFORK 10 /* Do not inherit across fork. */ -# define MADV_DOFORK 11 /* Do inherit across fork. */ -# define MADV_MERGEABLE 12 /* KSM may merge identical pages. */ -# define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages. */ -# define MADV_HUGEPAGE 14 /* Worth backing with hugepages. */ -# define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages. */ -# define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, - overrides the coredump filter bits. */ -# define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag. */ -# define MADV_HWPOISON 100 /* Poison a page for testing. */ -#endif - -/* The POSIX people had to invent similar names for the same things. */ -#ifdef __USE_XOPEN2K -# define POSIX_MADV_NORMAL 0 /* No further special treatment. */ -# define POSIX_MADV_RANDOM 1 /* Expect random page references. */ -# define POSIX_MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define POSIX_MADV_WILLNEED 3 /* Will need these pages. */ -# define POSIX_MADV_DONTNEED 4 /* Don't need these pages. */ -#endif +/* Include generic Linux declarations. */ +#include <bits/mman-linux.h> diff --git a/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h b/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h index 5a1f6b29e..a5eaf89dd 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h +++ b/libc/sysdeps/unix/sysv/linux/s390/bits/msq.h @@ -26,6 +26,7 @@ #define MSG_NOERROR 010000 /* no error if message is too big */ #ifdef __USE_GNU # define MSG_EXCEPT 020000 /* recv any msg except of specified type */ +# define MSG_COPY 040000 /* copy (not remove) all queue messages */ #endif /* Types used in the structure definition. */ diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S index 1a3712d8c..0a2e63e78 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S +++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/getcontext.S @@ -31,41 +31,42 @@ other than the PRESERVED state. */ ENTRY(__getcontext) - lr %r5,%r2 + lr %r1,%r2 /* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask). */ la %r2,SIG_BLOCK slr %r3,%r3 - la %r4,SC_MASK(%r5) + la %r4,SC_MASK(%r1) + lhi %r5,_NSIG8 svc SYS_ify(rt_sigprocmask) /* Store fpu context. */ - stfpc SC_FPC(%r5) - std %f0,SC_FPRS(%r5) - std %f1,SC_FPRS+8(%r5) - std %f2,SC_FPRS+16(%r5) - std %f3,SC_FPRS+24(%r5) - std %f4,SC_FPRS+32(%r5) - std %f5,SC_FPRS+40(%r5) - std %f6,SC_FPRS+48(%r5) - std %f7,SC_FPRS+56(%r5) - std %f8,SC_FPRS+64(%r5) - std %f9,SC_FPRS+72(%r5) - std %f10,SC_FPRS+80(%r5) - std %f11,SC_FPRS+88(%r5) - std %f12,SC_FPRS+96(%r5) - std %f13,SC_FPRS+104(%r5) - std %f14,SC_FPRS+112(%r5) - std %f15,SC_FPRS+120(%r5) + stfpc SC_FPC(%r1) + std %f0,SC_FPRS(%r1) + std %f1,SC_FPRS+8(%r1) + std %f2,SC_FPRS+16(%r1) + std %f3,SC_FPRS+24(%r1) + std %f4,SC_FPRS+32(%r1) + std %f5,SC_FPRS+40(%r1) + std %f6,SC_FPRS+48(%r1) + std %f7,SC_FPRS+56(%r1) + std %f8,SC_FPRS+64(%r1) + std %f9,SC_FPRS+72(%r1) + std %f10,SC_FPRS+80(%r1) + std %f11,SC_FPRS+88(%r1) + std %f12,SC_FPRS+96(%r1) + std %f13,SC_FPRS+104(%r1) + std %f14,SC_FPRS+112(%r1) + std %f15,SC_FPRS+120(%r1) /* Set __getcontext return value to 0. */ slr %r2,%r2 /* Store access registers. */ - stam %a0,%a15,SC_ACRS(%r5) + stam %a0,%a15,SC_ACRS(%r1) /* Store general purpose registers. */ - stm %r0,%r15,SC_GPRS(%r5) + stm %r0,%r15,SC_GPRS(%r1) /* Return. */ br %r14 diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S index fe56c24aa..ac25bea50 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S +++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/setcontext.S @@ -31,38 +31,39 @@ other than the PRESERVED state. */ ENTRY(__setcontext) - lr %r5,%r2 + lr %r1,%r2 /* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL). */ la %r2,SIG_BLOCK - la %r3,SC_MASK(%r5) + la %r3,SC_MASK(%r1) slr %r4,%r4 + lhi %r5,_NSIG8 svc SYS_ify(rt_sigprocmask) /* Load fpu context. */ - lfpc SC_FPC(%r5) - ld %f0,SC_FPRS(%r5) - ld %f1,SC_FPRS+8(%r5) - ld %f2,SC_FPRS+16(%r5) - ld %f3,SC_FPRS+24(%r5) - ld %f4,SC_FPRS+32(%r5) - ld %f5,SC_FPRS+40(%r5) - ld %f6,SC_FPRS+48(%r5) - ld %f7,SC_FPRS+56(%r5) - ld %f8,SC_FPRS+64(%r5) - ld %f9,SC_FPRS+72(%r5) - ld %f10,SC_FPRS+80(%r5) - ld %f11,SC_FPRS+88(%r5) - ld %f12,SC_FPRS+96(%r5) - ld %f13,SC_FPRS+104(%r5) - ld %f14,SC_FPRS+112(%r5) - ld %f15,SC_FPRS+120(%r5) + lfpc SC_FPC(%r1) + ld %f0,SC_FPRS(%r1) + ld %f1,SC_FPRS+8(%r1) + ld %f2,SC_FPRS+16(%r1) + ld %f3,SC_FPRS+24(%r1) + ld %f4,SC_FPRS+32(%r1) + ld %f5,SC_FPRS+40(%r1) + ld %f6,SC_FPRS+48(%r1) + ld %f7,SC_FPRS+56(%r1) + ld %f8,SC_FPRS+64(%r1) + ld %f9,SC_FPRS+72(%r1) + ld %f10,SC_FPRS+80(%r1) + ld %f11,SC_FPRS+88(%r1) + ld %f12,SC_FPRS+96(%r1) + ld %f13,SC_FPRS+104(%r1) + ld %f14,SC_FPRS+112(%r1) + ld %f15,SC_FPRS+120(%r1) /* Don't touch %a0, used for thread purposes. */ - lam %a1,%a15,SC_ACRS+4(%r5) + lam %a1,%a15,SC_ACRS+4(%r1) /* Load general purpose registers. */ - lm %r0,%r15,SC_GPRS(%r5) + lm %r0,%r15,SC_GPRS(%r1) /* Return. */ br %r14 diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S index 9a4b2b987..ecb0b3f80 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S +++ b/libc/sysdeps/unix/sysv/linux/s390/s390-32/swapcontext.S @@ -34,12 +34,13 @@ ENTRY(__swapcontext) lr %r1,%r2 - lr %r5,%r3 + lr %r0,%r3 /* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask). */ la %r2,SIG_BLOCK slr %r3,%r3 la %r4,SC_MASK(%r1) + lhi %r5,_NSIG8 svc SYS_ify(rt_sigprocmask) /* Store fpu context. */ @@ -72,11 +73,14 @@ ENTRY(__swapcontext) /* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL). */ la %r2,SIG_BLOCK + lr %r5,%r0 la %r3,SC_MASK(%r5) slr %r4,%r4 + lhi %r5,_NSIG8 svc SYS_ify(rt_sigprocmask) /* Load fpu context. */ + lr %r5,%r0 lfpc SC_FPC(%r5) ld %f0,SC_FPRS(%r5) ld %f1,SC_FPRS+8(%r5) diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S index 68e89102a..7c406cb23 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S +++ b/libc/sysdeps/unix/sysv/linux/s390/s390-64/getcontext.S @@ -31,41 +31,42 @@ other than the PRESERVED state. */ ENTRY(__getcontext) - lgr %r5,%r2 + lgr %r1,%r2 /* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask). */ la %r2,SIG_BLOCK slgr %r3,%r3 - la %r4,SC_MASK(%r5) + la %r4,SC_MASK(%r1) + lghi %r5,_NSIG8 svc SYS_ify(rt_sigprocmask) /* Store fpu context. */ - stfpc SC_FPC(%r5) - std %f0,SC_FPRS(%r5) - std %f1,SC_FPRS+8(%r5) - std %f2,SC_FPRS+16(%r5) - std %f3,SC_FPRS+24(%r5) - std %f4,SC_FPRS+32(%r5) - std %f5,SC_FPRS+40(%r5) - std %f6,SC_FPRS+48(%r5) - std %f7,SC_FPRS+56(%r5) - std %f8,SC_FPRS+64(%r5) - std %f9,SC_FPRS+72(%r5) - std %f10,SC_FPRS+80(%r5) - std %f11,SC_FPRS+88(%r5) - std %f12,SC_FPRS+96(%r5) - std %f13,SC_FPRS+104(%r5) - std %f14,SC_FPRS+112(%r5) - std %f15,SC_FPRS+120(%r5) + stfpc SC_FPC(%r1) + std %f0,SC_FPRS(%r1) + std %f1,SC_FPRS+8(%r1) + std %f2,SC_FPRS+16(%r1) + std %f3,SC_FPRS+24(%r1) + std %f4,SC_FPRS+32(%r1) + std %f5,SC_FPRS+40(%r1) + std %f6,SC_FPRS+48(%r1) + std %f7,SC_FPRS+56(%r1) + std %f8,SC_FPRS+64(%r1) + std %f9,SC_FPRS+72(%r1) + std %f10,SC_FPRS+80(%r1) + std %f11,SC_FPRS+88(%r1) + std %f12,SC_FPRS+96(%r1) + std %f13,SC_FPRS+104(%r1) + std %f14,SC_FPRS+112(%r1) + std %f15,SC_FPRS+120(%r1) /* Set __getcontext return value to 0. */ slgr %r2,%r2 /* Store access registers. */ - stam %a0,%a15,SC_ACRS(%r5) + stam %a0,%a15,SC_ACRS(%r1) /* Store general purpose registers. */ - stmg %r0,%r15,SC_GPRS(%r5) + stmg %r0,%r15,SC_GPRS(%r1) /* Return. */ br %r14 diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S index 7415bd938..8157327bf 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S +++ b/libc/sysdeps/unix/sysv/linux/s390/s390-64/setcontext.S @@ -31,38 +31,39 @@ other than the PRESERVED state. */ ENTRY(__setcontext) - lgr %r5,%r2 + lgr %r1,%r2 /* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL). */ la %r2,SIG_BLOCK - la %r3,SC_MASK(%r5) + la %r3,SC_MASK(%r1) slgr %r4,%r4 + lghi %r5,_NSIG8 svc SYS_ify(rt_sigprocmask) /* Load fpu context. */ - lfpc SC_FPC(%r5) - ld %f0,SC_FPRS(%r5) - ld %f1,SC_FPRS+8(%r5) - ld %f2,SC_FPRS+16(%r5) - ld %f3,SC_FPRS+24(%r5) - ld %f4,SC_FPRS+32(%r5) - ld %f5,SC_FPRS+40(%r5) - ld %f6,SC_FPRS+48(%r5) - ld %f7,SC_FPRS+56(%r5) - ld %f8,SC_FPRS+64(%r5) - ld %f9,SC_FPRS+72(%r5) - ld %f10,SC_FPRS+80(%r5) - ld %f11,SC_FPRS+88(%r5) - ld %f12,SC_FPRS+96(%r5) - ld %f13,SC_FPRS+104(%r5) - ld %f14,SC_FPRS+112(%r5) - ld %f15,SC_FPRS+120(%r5) + lfpc SC_FPC(%r1) + ld %f0,SC_FPRS(%r1) + ld %f1,SC_FPRS+8(%r1) + ld %f2,SC_FPRS+16(%r1) + ld %f3,SC_FPRS+24(%r1) + ld %f4,SC_FPRS+32(%r1) + ld %f5,SC_FPRS+40(%r1) + ld %f6,SC_FPRS+48(%r1) + ld %f7,SC_FPRS+56(%r1) + ld %f8,SC_FPRS+64(%r1) + ld %f9,SC_FPRS+72(%r1) + ld %f10,SC_FPRS+80(%r1) + ld %f11,SC_FPRS+88(%r1) + ld %f12,SC_FPRS+96(%r1) + ld %f13,SC_FPRS+104(%r1) + ld %f14,SC_FPRS+112(%r1) + ld %f15,SC_FPRS+120(%r1) /* Don't touch %a0 and %a1, used for thread purposes. */ - lam %a2,%a15,SC_ACRS+8(%r5) + lam %a2,%a15,SC_ACRS+8(%r1) /* Load general purpose registers. */ - lmg %r0,%r15,SC_GPRS(%r5) + lmg %r0,%r15,SC_GPRS(%r1) /* Return. */ br %r14 diff --git a/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S b/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S index 2d8f0d50e..a08e68cdd 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S +++ b/libc/sysdeps/unix/sysv/linux/s390/s390-64/swapcontext.S @@ -34,12 +34,13 @@ ENTRY(__swapcontext) lgr %r1,%r2 - lgr %r5,%r3 + lgr %r0,%r3 /* sigprocmask (SIG_BLOCK, NULL, &sc->sc_mask). */ la %r2,SIG_BLOCK slgr %r3,%r3 la %r4,SC_MASK(%r1) + lghi %r5,_NSIG8 svc SYS_ify(rt_sigprocmask) /* Store fpu context. */ @@ -72,11 +73,14 @@ ENTRY(__swapcontext) /* sigprocmask (SIG_SETMASK, &sc->sc_mask, NULL). */ la %r2,SIG_BLOCK + lgr %r5,%r0 la %r3,SC_MASK(%r5) + lghi %r5,_NSIG8 slgr %r4,%r4 svc SYS_ify(rt_sigprocmask) /* Load fpu context. */ + lgr %r5,%r0 lfpc SC_FPC(%r5) ld %f0,SC_FPRS(%r5) ld %f1,SC_FPRS+8(%r5) diff --git a/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h index b9062dc1a..ca2ebb959 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/s390/sys/ptrace.h @@ -175,7 +175,8 @@ enum __ptrace_setoptions PTRACE_O_TRACEVFORKDONE = 0x00000020, PTRACE_O_TRACEEXIT = 0x00000040, PTRACE_O_TRACESECCOMP = 0x00000080, - PTRACE_O_MASK = 0x000000ff + PTRACE_O_EXITKILL = 0x00100000, + PTRACE_O_MASK = 0x001000ff }; /* Wait extended result codes for the above trace options. */ diff --git a/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym b/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym index 525b54300..6cc9f1962 100644 --- a/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym +++ b/libc/sysdeps/unix/sysv/linux/s390/ucontext_i.sym @@ -8,6 +8,8 @@ SIG_BLOCK SIG_UNBLOCK SIG_SETMASK +_NSIG8 (_NSIG / 8) + #define ucontext(member) offsetof (ucontext_t, member) #define mcontext(member) ucontext (uc_mcontext.member) diff --git a/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h b/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h index 40da97e2f..396a9b918 100644 --- a/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h +++ b/libc/sysdeps/unix/sysv/linux/sh/bits/mman.h @@ -23,36 +23,6 @@ /* The following definitions basically come from the kernel headers. But the kernel header is not namespace clean. */ - -/* Protections are chosen from these bits, OR'd together. The - implementation does not necessarily support PROT_EXEC or PROT_WRITE - without PROT_READ. The only guarantees are that no writing will be - allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */ - -#define PROT_READ 0x1 /* Page can be read. */ -#define PROT_WRITE 0x2 /* Page can be written. */ -#define PROT_EXEC 0x4 /* Page can be executed. */ -#define PROT_NONE 0x0 /* Page can not be accessed. */ -#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of - growsdown vma (mprotect only). */ -#define PROT_GROWSUP 0x02000000 /* Extend change to start of - growsup vma (mprotect only). */ - -/* Sharing types (must choose one and only one of these). */ -#define MAP_SHARED 0x01 /* Share changes. */ -#define MAP_PRIVATE 0x02 /* Changes are private. */ -#ifdef __USE_MISC -# define MAP_TYPE 0x0f /* Mask for type of mapping. */ -#endif - -/* Other flags. */ -#define MAP_FIXED 0x10 /* Interpret addr exactly. */ -#ifdef __USE_MISC -# define MAP_FILE 0 -# define MAP_ANONYMOUS 0x20 /* Don't use a file. */ -# define MAP_ANON MAP_ANONYMOUS -#endif - /* These are Linux-specific. */ #ifdef __USE_MISC # define MAP_GROWSDOWN 0x0100 /* Stack-like segment. */ @@ -66,47 +36,5 @@ # define MAP_HUGETLB 0x40000 /* Create huge page mapping. */ #endif -/* Flags to `msync'. */ -#define MS_ASYNC 1 /* Sync memory asynchronously. */ -#define MS_SYNC 4 /* Synchronous memory sync. */ -#define MS_INVALIDATE 2 /* Invalidate the caches. */ - -/* Flags for `mlockall'. */ -#define MCL_CURRENT 1 /* Lock all currently mapped pages. */ -#define MCL_FUTURE 2 /* Lock all additions to address - space. */ - -/* Flags for `mremap'. */ -#ifdef __USE_GNU -# define MREMAP_MAYMOVE 1 -# define MREMAP_FIXED 2 -#endif - -/* Advice to `madvise'. */ -#ifdef __USE_BSD -# define MADV_NORMAL 0 /* No further special treatment. */ -# define MADV_RANDOM 1 /* Expect random page references. */ -# define MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define MADV_WILLNEED 3 /* Will need these pages. */ -# define MADV_DONTNEED 4 /* Don't need these pages. */ -# define MADV_REMOVE 9 /* Remove these pages and resources. */ -# define MADV_DONTFORK 10 /* Do not inherit across fork. */ -# define MADV_DOFORK 11 /* Do inherit across fork. */ -# define MADV_MERGEABLE 12 /* KSM may merge identical pages. */ -# define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages. */ -# define MADV_HUGEPAGE 14 /* Worth backing with hugepages. */ -# define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages. */ -# define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, - overrides the coredump filter bits. */ -# define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag. */ -# define MADV_HWPOISON 100 /* Poison a page for testing. */ -#endif - -/* The POSIX people had to invent similar names for the same things. */ -#ifdef __USE_XOPEN2K -# define POSIX_MADV_NORMAL 0 /* No further special treatment. */ -# define POSIX_MADV_RANDOM 1 /* Expect random page references. */ -# define POSIX_MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define POSIX_MADV_WILLNEED 3 /* Will need these pages. */ -# define POSIX_MADV_DONTNEED 4 /* Don't need these pages. */ -#endif +/* Include generic Linux declarations. */ +#include <bits/mman-linux.h> diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h index 616e24333..ad0389ca3 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/mman.h @@ -24,36 +24,6 @@ But the kernel header is not namespace clean. */ -/* Protections are chosen from these bits, OR'd together. The - implementation does not necessarily support PROT_EXEC or PROT_WRITE - without PROT_READ. The only guarantees are that no writing will be - allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */ - -#define PROT_READ 0x1 /* Page can be read. */ -#define PROT_WRITE 0x2 /* Page can be written. */ -#define PROT_EXEC 0x4 /* Page can be executed. */ -#define PROT_NONE 0x0 /* Page can not be accessed. */ -#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of - growsdown vma (mprotect only). */ -#define PROT_GROWSUP 0x02000000 /* Extend change to start of - growsup vma (mprotect only). */ - -/* Sharing types (must choose one and only one of these). */ -#define MAP_SHARED 0x01 /* Share changes. */ -#define MAP_PRIVATE 0x02 /* Changes are private. */ -#ifdef __USE_MISC -# define MAP_TYPE 0x0f /* Mask for type of mapping. */ -#endif - -/* Other flags. */ -#define MAP_FIXED 0x10 /* Interpret addr exactly. */ -#ifdef __USE_MISC -# define MAP_FILE 0x00 -# define MAP_ANONYMOUS 0x20 /* Don't use a file. */ -# define MAP_ANON MAP_ANONYMOUS -# define MAP_RENAME MAP_ANONYMOUS -#endif - /* These are Linux-specific. */ #ifdef __USE_MISC # define MAP_GROWSDOWN 0x0200 /* Stack-like segment. */ @@ -68,48 +38,14 @@ # define MAP_HUGETLB 0x40000 /* Create huge page mapping. */ #endif -/* Flags to `msync'. */ -#define MS_ASYNC 1 /* Sync memory asynchronously. */ -#define MS_SYNC 4 /* Synchronous memory sync. */ -#define MS_INVALIDATE 2 /* Invalidate the caches. */ - /* Flags for `mlockall'. */ #define MCL_CURRENT 0x2000 /* Lock all currently mapped pages. */ #define MCL_FUTURE 0x4000 /* Lock all additions to address space. */ +/* Include generic Linux declarations. */ +#include <bits/mman-linux.h> -/* Flags for `mremap'. */ -#ifdef __USE_GNU -# define MREMAP_MAYMOVE 1 -# define MREMAP_FIXED 2 -#endif - -/* Advice to `madvise'. */ -#ifdef __USE_BSD -# define MADV_NORMAL 0 /* No further special treatment. */ -# define MADV_RANDOM 1 /* Expect random page references. */ -# define MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define MADV_WILLNEED 3 /* Will need these pages. */ -# define MADV_DONTNEED 4 /* Don't need these pages. */ -# define MADV_FREE 5 /* Content can be freed (Solaris). */ -# define MADV_REMOVE 9 /* Remove these pages and resources. */ -# define MADV_DONTFORK 10 /* Do not inherit across fork. */ -# define MADV_DOFORK 11 /* Do inherit across fork. */ -# define MADV_MERGEABLE 12 /* KSM may merge identical pages. */ -# define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages. */ -# define MADV_HUGEPAGE 14 /* Worth backing with hugepages. */ -# define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages. */ -# define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, - overrides the coredump filter bits. */ -# define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag. */ -# define MADV_HWPOISON 100 /* Poison a page for testing. */ -#endif - -/* The POSIX people had to invent similar names for the same things. */ -#ifdef __USE_XOPEN2K -# define POSIX_MADV_NORMAL 0 /* No further special treatment. */ -# define POSIX_MADV_RANDOM 1 /* Expect random page references. */ -# define POSIX_MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define POSIX_MADV_WILLNEED 3 /* Will need these pages. */ -# define POSIX_MADV_DONTNEED 4 /* Don't need these pages. */ +/* Other flags. */ +#ifdef __USE_MISC +# define MAP_RENAME MAP_ANONYMOUS #endif diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h index 84c4b858b..0a0192732 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/msq.h @@ -26,6 +26,7 @@ #define MSG_NOERROR 010000 /* no error if message is too big */ #ifdef __USE_GNU # define MSG_EXCEPT 020000 /* recv any msg except of specified type */ +# define MSG_COPY 040000 /* copy (not remove) all queue messages */ #endif /* Types used in the structure definition. */ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h index bd6fd536d..7ba8f5f25 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/sys/ptrace.h @@ -219,7 +219,8 @@ enum __ptrace_setoptions PTRACE_O_TRACEVFORKDONE = 0x00000020, PTRACE_O_TRACEEXIT = 0x00000040, PTRACE_O_TRACESECCOMP = 0x00000080, - PTRACE_O_MASK = 0x000000ff + PTRACE_O_EXITKILL = 0x00100000, + PTRACE_O_MASK = 0x001000ff }; /* Wait extended result codes for the above trace options. */ diff --git a/libc/sysdeps/unix/sysv/linux/sys/ptrace.h b/libc/sysdeps/unix/sysv/linux/sys/ptrace.h index d04fab599..08709bf64 100644 --- a/libc/sysdeps/unix/sysv/linux/sys/ptrace.h +++ b/libc/sysdeps/unix/sysv/linux/sys/ptrace.h @@ -166,7 +166,8 @@ enum __ptrace_setoptions PTRACE_O_TRACEVFORKDONE = 0x00000020, PTRACE_O_TRACEEXIT = 0x00000040, PTRACE_O_TRACESECCOMP = 0x00000080, - PTRACE_O_MASK = 0x000000ff + PTRACE_O_EXITKILL = 0x00100000, + PTRACE_O_MASK = 0x001000ff }; /* Wait extended result codes for the above trace options. */ diff --git a/libc/sysdeps/unix/sysv/linux/times.c b/libc/sysdeps/unix/sysv/linux/times.c index f3b5f014e..2a5caf2cd 100644 --- a/libc/sysdeps/unix/sysv/linux/times.c +++ b/libc/sysdeps/unix/sysv/linux/times.c @@ -26,13 +26,14 @@ __times (struct tms *buf) INTERNAL_SYSCALL_DECL (err); clock_t ret = INTERNAL_SYSCALL (times, err, 1, buf); if (INTERNAL_SYSCALL_ERROR_P (ret, err) - && __builtin_expect (INTERNAL_SYSCALL_ERRNO (ret, err) == EFAULT, 0)) + && __builtin_expect (INTERNAL_SYSCALL_ERRNO (ret, err) == EFAULT, 0) + && buf) { /* This might be an error or not. For architectures which have no separate return value and error indicators we cannot distinguish a return value of -1 from an error. Do it the - hard way. We crash applications which pass in an invalid BUF - pointer. */ + hard way. We crash applications which pass in an invalid + non-NULL BUF pointer. Linux allows BUF to be NULL. */ #define touch(v) \ do { \ clock_t temp = v; \ @@ -44,7 +45,8 @@ __times (struct tms *buf) touch (buf->tms_cutime); touch (buf->tms_cstime); - /* If we come here the memory is valid and the kernel did not + /* If we come here the memory is valid (or BUF is NULL, which is + a valid condition for the kernel syscall) and the kernel did not return an EFAULT error. Return the value given by the kernel. */ } diff --git a/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h b/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h index 591df139d..a2fa80879 100644 --- a/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h +++ b/libc/sysdeps/unix/sysv/linux/x86/bits/mman.h @@ -23,34 +23,8 @@ /* The following definitions basically come from the kernel headers. But the kernel header is not namespace clean. */ - -/* Protections are chosen from these bits, OR'd together. The - implementation does not necessarily support PROT_EXEC or PROT_WRITE - without PROT_READ. The only guarantees are that no writing will be - allowed without PROT_WRITE and no access will be allowed for PROT_NONE. */ - -#define PROT_READ 0x1 /* Page can be read. */ -#define PROT_WRITE 0x2 /* Page can be written. */ -#define PROT_EXEC 0x4 /* Page can be executed. */ -#define PROT_NONE 0x0 /* Page can not be accessed. */ -#define PROT_GROWSDOWN 0x01000000 /* Extend change to start of - growsdown vma (mprotect only). */ -#define PROT_GROWSUP 0x02000000 /* Extend change to start of - growsup vma (mprotect only). */ - -/* Sharing types (must choose one and only one of these). */ -#define MAP_SHARED 0x01 /* Share changes. */ -#define MAP_PRIVATE 0x02 /* Changes are private. */ -#ifdef __USE_MISC -# define MAP_TYPE 0x0f /* Mask for type of mapping. */ -#endif - /* Other flags. */ -#define MAP_FIXED 0x10 /* Interpret addr exactly. */ #ifdef __USE_MISC -# define MAP_FILE 0 -# define MAP_ANONYMOUS 0x20 /* Don't use a file. */ -# define MAP_ANON MAP_ANONYMOUS # define MAP_32BIT 0x40 /* Only give out 32-bit addresses. */ #endif @@ -67,47 +41,5 @@ # define MAP_HUGETLB 0x40000 /* Create huge page mapping. */ #endif -/* Flags to `msync'. */ -#define MS_ASYNC 1 /* Sync memory asynchronously. */ -#define MS_SYNC 4 /* Synchronous memory sync. */ -#define MS_INVALIDATE 2 /* Invalidate the caches. */ - -/* Flags for `mlockall'. */ -#define MCL_CURRENT 1 /* Lock all currently mapped pages. */ -#define MCL_FUTURE 2 /* Lock all additions to address - space. */ - -/* Flags for `mremap'. */ -#ifdef __USE_GNU -# define MREMAP_MAYMOVE 1 -# define MREMAP_FIXED 2 -#endif - -/* Advice to `madvise'. */ -#ifdef __USE_BSD -# define MADV_NORMAL 0 /* No further special treatment. */ -# define MADV_RANDOM 1 /* Expect random page references. */ -# define MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define MADV_WILLNEED 3 /* Will need these pages. */ -# define MADV_DONTNEED 4 /* Don't need these pages. */ -# define MADV_REMOVE 9 /* Remove these pages and resources. */ -# define MADV_DONTFORK 10 /* Do not inherit across fork. */ -# define MADV_DOFORK 11 /* Do inherit across fork. */ -# define MADV_MERGEABLE 12 /* KSM may merge identical pages. */ -# define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages. */ -# define MADV_HUGEPAGE 14 /* Worth backing with hugepages. */ -# define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages. */ -# define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, - overrides the coredump filter bits. */ -# define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag. */ -# define MADV_HWPOISON 100 /* Poison a page for testing. */ -#endif - -/* The POSIX people had to invent similar names for the same things. */ -#ifdef __USE_XOPEN2K -# define POSIX_MADV_NORMAL 0 /* No further special treatment. */ -# define POSIX_MADV_RANDOM 1 /* Expect random page references. */ -# define POSIX_MADV_SEQUENTIAL 2 /* Expect sequential page references. */ -# define POSIX_MADV_WILLNEED 3 /* Will need these pages. */ -# define POSIX_MADV_DONTNEED 4 /* Don't need these pages. */ -#endif +/* Include generic Linux declarations. */ +#include <bits/mman-linux.h> diff --git a/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h b/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h index ef5cc3868..9355e465d 100644 --- a/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h +++ b/libc/sysdeps/unix/sysv/linux/x86/bits/msq.h @@ -25,6 +25,7 @@ #define MSG_NOERROR 010000 /* no error if message is too big */ #ifdef __USE_GNU # define MSG_EXCEPT 020000 /* recv any msg except of specified type */ +# define MSG_COPY 040000 /* copy (not remove) all queue messages */ #endif /* Types used in the structure definition. */ diff --git a/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist b/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist index ee6993291..b07d16f78 100644 --- a/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist +++ b/libc/sysdeps/unix/sysv/linux/x86_64/x32/nptl/libpthread.abilist @@ -73,7 +73,6 @@ GLIBC_2.16 pause F pread F pread64 F - pthread_atfork F pthread_attr_destroy F pthread_attr_getaffinity_np F pthread_attr_getdetachstate F diff --git a/libc/sysdeps/x86_64/fpu/libm-test-ulps b/libc/sysdeps/x86_64/fpu/libm-test-ulps index b828774c7..f190ed881 100644 --- a/libc/sysdeps/x86_64/fpu/libm-test-ulps +++ b/libc/sysdeps/x86_64/fpu/libm-test-ulps @@ -2390,6 +2390,9 @@ ifloat: 1 Test "j0 (0x1.d7ce3ap+107) == 2.775523647291230802651040996274861694514e-17": float: 2 ifloat: 2 +Test "j0 (0x1p16382) == -1.2193782500509000574176799046642541129387e-2466": +ildouble: 1 +ldouble: 1 Test "j0 (10.0) == -0.245935764451348335197760862485328754": double: 2 float: 1 @@ -2420,6 +2423,9 @@ ldouble: 1 Test "j1 (0x1.ff00000000002p+840) == 1.846591691699331493194965158699937660696e-127": double: 1 idouble: 1 +Test "j1 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467": +ildouble: 1 +ldouble: 1 Test "j1 (10.0) == 0.0434727461688614366697487680258592883": float: 2 ifloat: 2 @@ -3073,6 +3079,9 @@ double: 1 float: 1 idouble: 1 ifloat: 1 +Test "y0 (0x1p16382) == 8.0839224448726336195866026476176740513439e-2467": +ildouble: 1 +ldouble: 1 Test "y0 (1.0) == 0.0882569642156769579829267660235151628": double: 2 float: 1 @@ -3117,6 +3126,9 @@ ldouble: 1 Test "y1 (0x1p-10) == -6.5190099301063115047395187618929589514382e+02": double: 1 idouble: 1 +Test "y1 (0x1p16382) == 1.2193782500509000574176799046642541129387e-2466": +ildouble: 1 +ldouble: 1 Test "y1 (1.5) == -0.412308626973911295952829820633445323": float: 1 ifloat: 1 diff --git a/libc/sysdeps/x86_64/memset.S b/libc/sysdeps/x86_64/memset.S index f3a4d448d..b393efe44 100644 --- a/libc/sysdeps/x86_64/memset.S +++ b/libc/sysdeps/x86_64/memset.S @@ -23,7 +23,7 @@ #define __STOS_UPPER_BOUNDARY $65536 .text -#if !defined NOT_IN_libc && !defined USE_MULTIARCH +#if !defined NOT_IN_libc ENTRY(__bzero) mov %rsi,%rdx /* Adjust parameter. */ xorl %esi,%esi /* Fill with 0s. */ diff --git a/libc/sysdeps/x86_64/multiarch/Makefile b/libc/sysdeps/x86_64/multiarch/Makefile index dd6c27d0b..86787ee6e 100644 --- a/libc/sysdeps/x86_64/multiarch/Makefile +++ b/libc/sysdeps/x86_64/multiarch/Makefile @@ -10,14 +10,12 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \ memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ - strncase_l-ssse3 strlen-sse4 strlen-sse2-no-bsf memset-x86-64 \ + strncase_l-ssse3 strcat-ssse3 strncat-ssse3\ strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ strcpy-sse2-unaligned strncpy-sse2-unaligned \ stpcpy-sse2-unaligned stpncpy-sse2-unaligned \ strcat-sse2-unaligned strncat-sse2-unaligned \ - strcat-ssse3 strncat-ssse3 strlen-sse2-pminub \ - strnlen-sse2-no-bsf strrchr-sse2-no-bsf strchr-sse2-no-bsf \ - memcmp-ssse3 + strrchr-sse2-no-bsf strchr-sse2-no-bsf memcmp-ssse3 ifeq (yes,$(config-cflags-sse4)) sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift CFLAGS-varshift.c += -msse4 diff --git a/libc/sysdeps/x86_64/multiarch/bzero.S b/libc/sysdeps/x86_64/multiarch/bzero.S deleted file mode 100644 index 88e96ea8e..000000000 --- a/libc/sysdeps/x86_64/multiarch/bzero.S +++ /dev/null @@ -1,28 +0,0 @@ -/* bzero. x86-64 version. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - .text -ENTRY(__bzero) - mov %rsi,%rdx /* Adjust parameter. */ - xorl %esi,%esi /* Fill with 0s. */ - jmp __libc_memset /* Branch to IFUNC memset. */ -END(__bzero) -weak_alias (__bzero, bzero) diff --git a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c index 643cb2dd0..05315fdd7 100644 --- a/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/libc/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -61,17 +61,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __memmove_ssse3) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2)) - /* Support sysdeps/x86_64/multiarch/memset_chk.S. */ - IFUNC_IMPL (i, name, __memset_chk, - IFUNC_IMPL_ADD (array, i, __memset_chk, 1, __memset_chk_sse2) - IFUNC_IMPL_ADD (array, i, __memset_chk, 1, - __memset_chk_x86_64)) - - /* Support sysdeps/x86_64/multiarch/memset.S. */ - IFUNC_IMPL (i, name, memset, - IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2) - IFUNC_IMPL_ADD (array, i, memset, 1, __memset_x86_64)) - /* Support sysdeps/x86_64/multiarch/rawmemchr.S. */ IFUNC_IMPL (i, name, rawmemchr, IFUNC_IMPL_ADD (array, i, rawmemchr, HAS_SSE4_2, @@ -187,11 +176,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strncpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2)) - /* Support sysdeps/x86_64/multiarch/strnlen.S. */ - IFUNC_IMPL (i, name, strnlen, - IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2_no_bsf) - IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) - /* Support sysdeps/x86_64/multiarch/strpbrk.S. */ IFUNC_IMPL (i, name, strpbrk, IFUNC_IMPL_ADD (array, i, strpbrk, HAS_SSE4_2, @@ -262,14 +246,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __mempcpy_ssse3) IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2)) - /* Support sysdeps/x86_64/multiarch/strlen.S. */ - IFUNC_IMPL (i, name, strlen, - IFUNC_IMPL_ADD (array, i, strlen, HAS_SSE4_2, __strlen_sse42) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2_pminub) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2_no_bsf) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2) - IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) - /* Support sysdeps/x86_64/multiarch/strncmp.S. */ IFUNC_IMPL (i, name, strncmp, IFUNC_IMPL_ADD (array, i, strncmp, HAS_SSE4_2, diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.c b/libc/sysdeps/x86_64/multiarch/init-arch.c index 992cbfb75..7daaf4609 100644 --- a/libc/sysdeps/x86_64/multiarch/init-arch.c +++ b/libc/sysdeps/x86_64/multiarch/init-arch.c @@ -58,11 +58,6 @@ __init_cpu_features (void) get_common_indeces (&family, &model); - /* Intel processors prefer SSE instruction for memory/string - routines if they are available. */ - __cpu_features.feature[index_Prefer_SSE_for_memop] - |= bit_Prefer_SSE_for_memop; - unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax; unsigned int extended_family = (eax >> 20) & 0xff; unsigned int extended_model = (eax >> 12) & 0xf0; @@ -125,12 +120,6 @@ __init_cpu_features (void) ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx; - /* AMD processors prefer SSE instructions for memory/string routines - if they are available, otherwise they prefer integer instructions. */ - if ((ecx & 0x200)) - __cpu_features.feature[index_Prefer_SSE_for_memop] - |= bit_Prefer_SSE_for_memop; - unsigned int eax; __cpuid (0x80000000, eax, ebx, ecx, edx); if (eax >= 0x80000001) diff --git a/libc/sysdeps/x86_64/multiarch/init-arch.h b/libc/sysdeps/x86_64/multiarch/init-arch.h index 0aece18de..28edbf7d0 100644 --- a/libc/sysdeps/x86_64/multiarch/init-arch.h +++ b/libc/sysdeps/x86_64/multiarch/init-arch.h @@ -18,7 +18,6 @@ #define bit_Fast_Rep_String (1 << 0) #define bit_Fast_Copy_Backward (1 << 1) #define bit_Slow_BSF (1 << 2) -#define bit_Prefer_SSE_for_memop (1 << 3) #define bit_Fast_Unaligned_Load (1 << 4) #define bit_Prefer_PMINUB_for_stringop (1 << 5) #define bit_AVX_Usable (1 << 6) @@ -58,7 +57,6 @@ # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE # define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE -# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE # define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE @@ -157,7 +155,6 @@ extern const struct cpu_features *__get_cpu_features (void) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 # define index_Slow_BSF FEATURE_INDEX_1 -# define index_Prefer_SSE_for_memop FEATURE_INDEX_1 # define index_Fast_Unaligned_Load FEATURE_INDEX_1 # define index_AVX_Usable FEATURE_INDEX_1 # define index_FMA_Usable FEATURE_INDEX_1 @@ -169,7 +166,6 @@ extern const struct cpu_features *__get_cpu_features (void) # define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) # define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) # define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) -# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) # define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) # define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) # define HAS_FMA HAS_ARCH_FEATURE (FMA_Usable) diff --git a/libc/sysdeps/x86_64/multiarch/memset-x86-64.S b/libc/sysdeps/x86_64/multiarch/memset-x86-64.S deleted file mode 100644 index 551d105d2..000000000 --- a/libc/sysdeps/x86_64/multiarch/memset-x86-64.S +++ /dev/null @@ -1,19 +0,0 @@ -#include <sysdep.h> - -#ifndef NOT_IN_libc -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memset_chk_x86_64, @function; \ - .globl __memset_chk_x86_64; \ - .p2align 4; \ - __memset_chk_x86_64: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64 - -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) -# define memset __memset_x86_64 -# include "../memset.S" -#endif diff --git a/libc/sysdeps/x86_64/multiarch/memset.S b/libc/sysdeps/x86_64/multiarch/memset.S deleted file mode 100644 index 7f673faa7..000000000 --- a/libc/sysdeps/x86_64/multiarch/memset.S +++ /dev/null @@ -1,79 +0,0 @@ -/* Multiple versions of memset - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc -ENTRY(memset) - .type memset, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __memset_x86_64(%rip), %rax - testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) - jz 2f - leaq __memset_sse2(%rip), %rax -2: ret -END(memset) - -/* Define internal IFUNC memset for bzero. */ - .globl __libc_memset - .hidden __libc_memset - __libc_memset = memset - -# define USE_SSE2 1 - -# undef ENTRY -# define ENTRY(name) \ - .type __memset_sse2, @function; \ - .globl __memset_sse2; \ - .p2align 4; \ - __memset_sse2: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __memset_sse2, .-__memset_sse2 - -# undef ENTRY_CHK -# define ENTRY_CHK(name) \ - .type __memset_chk_sse2, @function; \ - .globl __memset_chk_sse2; \ - .p2align 4; \ - __memset_chk_sse2: cfi_startproc; \ - CALL_MCOUNT -# undef END_CHK -# define END_CHK(name) \ - cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2 - -# ifdef SHARED -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal memset calls through a PLT. - The speedup we get from using GPR instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_memset; __GI_memset = __memset_sse2 -# endif - -# undef strong_alias -# define strong_alias(original, alias) -#endif - -#include "../memset.S" diff --git a/libc/sysdeps/x86_64/multiarch/memset_chk.S b/libc/sysdeps/x86_64/multiarch/memset_chk.S deleted file mode 100644 index 55e263542..000000000 --- a/libc/sysdeps/x86_64/multiarch/memset_chk.S +++ /dev/null @@ -1,44 +0,0 @@ -/* Multiple versions of __memset_chk - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - -/* Define multiple versions only for the definition in lib. */ -#ifndef NOT_IN_libc -# ifdef SHARED -ENTRY(__memset_chk) - .type __memset_chk, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __memset_chk_x86_64(%rip), %rax - testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip) - jz 2f - leaq __memset_chk_sse2(%rip), %rax -2: ret -END(__memset_chk) - -strong_alias (__memset_chk, __memset_zero_constant_len_parameter) - .section .gnu.warning.__memset_zero_constant_len_parameter - .string "memset used with constant zero length parameter; this could be due to transposed parameters" -# else -# include "../memset_chk.S" -# endif -#endif diff --git a/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S index 72bb60994..028c6d3d7 100644 --- a/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S +++ b/libc/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S @@ -34,10 +34,236 @@ ENTRY (STRCAT) mov %rdx, %r8 # endif -# define RETURN jmp L(StartStrcpyPart) -# include "strlen-sse2-pminub.S" -# undef RETURN +/* Inline corresponding strlen file, temporary until new strcpy + implementation gets merged. */ + xor %rax, %rax + mov %edi, %ecx + and $0x3f, %ecx + pxor %xmm0, %xmm0 + cmp $0x30, %ecx + ja L(next) + movdqu (%rdi), %xmm1 + pcmpeqb %xmm1, %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit_less16) + mov %rdi, %rax + and $-16, %rax + jmp L(align16_start) +L(next): + mov %rdi, %rax + and $-16, %rax + pcmpeqb (%rax), %xmm0 + mov $-1, %r10d + sub %rax, %rcx + shl %cl, %r10d + pmovmskb %xmm0, %edx + and %r10d, %edx + jnz L(exit) + +L(align16_start): + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + pcmpeqb 16(%rax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + jnz L(exit64) + + pcmpeqb 80(%rax), %xmm0 + add $64, %rax + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + jnz L(exit64) + + pcmpeqb 80(%rax), %xmm0 + add $64, %rax + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + jnz L(exit64) + + pcmpeqb 80(%rax), %xmm0 + add $64, %rax + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + jnz L(exit64) + + test $0x3f, %rax + jz L(align64_loop) + + pcmpeqb 80(%rax), %xmm0 + add $80, %rax + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit) + + test $0x3f, %rax + jz L(align64_loop) + + pcmpeqb 16(%rax), %xmm1 + add $16, %rax + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit) + + test $0x3f, %rax + jz L(align64_loop) + + pcmpeqb 16(%rax), %xmm2 + add $16, %rax + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit) + + test $0x3f, %rax + jz L(align64_loop) + + pcmpeqb 16(%rax), %xmm3 + add $16, %rax + pmovmskb %xmm3, %edx + test %edx, %edx + jnz L(exit) + + add $16, %rax + .p2align 4 + L(align64_loop): + movaps (%rax), %xmm4 + pminub 16(%rax), %xmm4 + movaps 32(%rax), %xmm5 + pminub 48(%rax), %xmm5 + add $64, %rax + pminub %xmm4, %xmm5 + pcmpeqb %xmm0, %xmm5 + pmovmskb %xmm5, %edx + test %edx, %edx + jz L(align64_loop) + + pcmpeqb -64(%rax), %xmm0 + sub $80, %rax + pmovmskb %xmm0, %edx + test %edx, %edx + jnz L(exit16) + + pcmpeqb 32(%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + jnz L(exit32) + + pcmpeqb 48(%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + jnz L(exit48) + + pcmpeqb 64(%rax), %xmm3 + pmovmskb %xmm3, %edx + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $64, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit): + sub %rdi, %rax +L(exit_less16): + bsf %rdx, %rdx + add %rdx, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit16): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $16, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit32): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $32, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit48): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $48, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit64): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $64, %rax + + .p2align 4 L(StartStrcpyPart): lea (%r9, %rax), %rdi mov %rsi, %rcx diff --git a/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S b/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S index fea9d11b4..8101b91e5 100644 --- a/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S +++ b/libc/sysdeps/x86_64/multiarch/strcat-ssse3.S @@ -33,11 +33,321 @@ ENTRY (STRCAT) mov %rdx, %r8 # endif -# define RETURN jmp L(StartStrcpyPart) -# include "strlen-sse2-no-bsf.S" -# undef RETURN +/* Inline corresponding strlen file, temporary until new strcpy + implementation gets merged. */ + + xor %eax, %eax + cmpb $0, (%rdi) + jz L(exit_tail0) + cmpb $0, 1(%rdi) + jz L(exit_tail1) + cmpb $0, 2(%rdi) + jz L(exit_tail2) + cmpb $0, 3(%rdi) + jz L(exit_tail3) + + cmpb $0, 4(%rdi) + jz L(exit_tail4) + cmpb $0, 5(%rdi) + jz L(exit_tail5) + cmpb $0, 6(%rdi) + jz L(exit_tail6) + cmpb $0, 7(%rdi) + jz L(exit_tail7) + + cmpb $0, 8(%rdi) + jz L(exit_tail8) + cmpb $0, 9(%rdi) + jz L(exit_tail9) + cmpb $0, 10(%rdi) + jz L(exit_tail10) + cmpb $0, 11(%rdi) + jz L(exit_tail11) + + cmpb $0, 12(%rdi) + jz L(exit_tail12) + cmpb $0, 13(%rdi) + jz L(exit_tail13) + cmpb $0, 14(%rdi) + jz L(exit_tail14) + cmpb $0, 15(%rdi) + jz L(exit_tail15) + pxor %xmm0, %xmm0 + lea 16(%rdi), %rcx + lea 16(%rdi), %rax + and $-16, %rax + + pcmpeqb (%rax), %xmm0 + pmovmskb %xmm0, %edx + pxor %xmm1, %xmm1 + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm1 + pmovmskb %xmm1, %edx + pxor %xmm2, %xmm2 + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm2 + pmovmskb %xmm2, %edx + pxor %xmm3, %xmm3 + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm0 + pmovmskb %xmm0, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm1 + pmovmskb %xmm1, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm2 + pmovmskb %xmm2, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + pcmpeqb (%rax), %xmm3 + pmovmskb %xmm3, %edx + test %edx, %edx + lea 16(%rax), %rax + jnz L(exit) + + and $-0x40, %rax + .p2align 4 +L(aligned_64): + pcmpeqb (%rax), %xmm0 + pcmpeqb 16(%rax), %xmm1 + pcmpeqb 32(%rax), %xmm2 + pcmpeqb 48(%rax), %xmm3 + pmovmskb %xmm0, %edx + pmovmskb %xmm1, %r11d + pmovmskb %xmm2, %r10d + pmovmskb %xmm3, %r9d + or %edx, %r9d + or %r11d, %r9d + or %r10d, %r9d + lea 64(%rax), %rax + jz L(aligned_64) + + test %edx, %edx + jnz L(aligned_64_exit_16) + test %r11d, %r11d + jnz L(aligned_64_exit_32) + test %r10d, %r10d + jnz L(aligned_64_exit_48) + +L(aligned_64_exit_64): + pmovmskb %xmm3, %edx + jmp L(exit) + +L(aligned_64_exit_48): + lea -16(%rax), %rax + mov %r10d, %edx + jmp L(exit) + +L(aligned_64_exit_32): + lea -32(%rax), %rax + mov %r11d, %edx + jmp L(exit) + +L(aligned_64_exit_16): + lea -48(%rax), %rax + +L(exit): + sub %rcx, %rax + test %dl, %dl + jz L(exit_high) + test $0x01, %dl + jnz L(exit_tail0) + + test $0x02, %dl + jnz L(exit_tail1) + + test $0x04, %dl + jnz L(exit_tail2) + + test $0x08, %dl + jnz L(exit_tail3) + + test $0x10, %dl + jnz L(exit_tail4) + + test $0x20, %dl + jnz L(exit_tail5) + + test $0x40, %dl + jnz L(exit_tail6) + add $7, %eax +L(exit_tail0): + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_high): + add $8, %eax + test $0x01, %dh + jnz L(exit_tail0) + + test $0x02, %dh + jnz L(exit_tail1) + + test $0x04, %dh + jnz L(exit_tail2) + + test $0x08, %dh + jnz L(exit_tail3) + + test $0x10, %dh + jnz L(exit_tail4) + + test $0x20, %dh + jnz L(exit_tail5) + + test $0x40, %dh + jnz L(exit_tail6) + add $7, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail1): + add $1, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail2): + add $2, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail3): + add $3, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail4): + add $4, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail5): + add $5, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail6): + add $6, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail7): + add $7, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail8): + add $8, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail9): + add $9, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail10): + add $10, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail11): + add $11, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail12): + add $12, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail13): + add $13, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail14): + add $14, %eax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_tail15): + add $15, %eax + + .p2align 4 L(StartStrcpyPart): mov %rsi, %rcx lea (%rdi, %rax), %rdx diff --git a/libc/sysdeps/x86_64/multiarch/strlen-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strlen-sse2-no-bsf.S deleted file mode 100644 index ff2ab7004..000000000 --- a/libc/sysdeps/x86_64/multiarch/strlen-sse2-no-bsf.S +++ /dev/null @@ -1,685 +0,0 @@ -/* strlen SSE2 without bsf - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* only for strlen case we don't use optimized version for STATIC build just for SHARED */ - -#if (defined SHARED || defined USE_AS_STRCAT || defined USE_AS_STRNLEN) && !defined NOT_IN_libc - -# ifndef USE_AS_STRCAT - -# include <sysdep.h> - -# define RETURN ret - -# ifndef STRLEN -# define STRLEN __strlen_sse2_no_bsf -# endif - - atom_text_section -ENTRY (STRLEN) -# endif - xor %eax, %eax -# ifdef USE_AS_STRNLEN - mov %rsi, %r8 - sub $4, %rsi - jbe L(len_less4_prolog) -# endif - cmpb $0, (%rdi) - jz L(exit_tail0) - cmpb $0, 1(%rdi) - jz L(exit_tail1) - cmpb $0, 2(%rdi) - jz L(exit_tail2) - cmpb $0, 3(%rdi) - jz L(exit_tail3) - -# ifdef USE_AS_STRNLEN - sub $4, %rsi - jbe L(len_less8_prolog) -# endif - - cmpb $0, 4(%rdi) - jz L(exit_tail4) - cmpb $0, 5(%rdi) - jz L(exit_tail5) - cmpb $0, 6(%rdi) - jz L(exit_tail6) - cmpb $0, 7(%rdi) - jz L(exit_tail7) - -# ifdef USE_AS_STRNLEN - sub $4, %rsi - jbe L(len_less12_prolog) -# endif - - cmpb $0, 8(%rdi) - jz L(exit_tail8) - cmpb $0, 9(%rdi) - jz L(exit_tail9) - cmpb $0, 10(%rdi) - jz L(exit_tail10) - cmpb $0, 11(%rdi) - jz L(exit_tail11) - -# ifdef USE_AS_STRNLEN - sub $4, %rsi - jbe L(len_less16_prolog) -# endif - - cmpb $0, 12(%rdi) - jz L(exit_tail12) - cmpb $0, 13(%rdi) - jz L(exit_tail13) - cmpb $0, 14(%rdi) - jz L(exit_tail14) - cmpb $0, 15(%rdi) - jz L(exit_tail15) - pxor %xmm0, %xmm0 - lea 16(%rdi), %rcx - lea 16(%rdi), %rax - and $-16, %rax - -# ifdef USE_AS_STRNLEN - and $15, %rdi - add %rdi, %rsi - sub $64, %rsi - jbe L(len_less64) -# endif - - pcmpeqb (%rax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm1 - pmovmskb %xmm1, %edx - pxor %xmm2, %xmm2 - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm2 - pmovmskb %xmm2, %edx - pxor %xmm3, %xmm3 - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %rsi - jbe L(len_less64) -# endif - - pcmpeqb (%rax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %rsi - jbe L(len_less64) -# endif - - pcmpeqb (%rax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - sub $64, %rsi - jbe L(len_less64) -# endif - - pcmpeqb (%rax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - - pcmpeqb (%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - lea 16(%rax), %rax - jnz L(exit) - -# ifdef USE_AS_STRNLEN - mov %rax, %rdx - and $63, %rdx - add %rdx, %rsi -# endif - - and $-0x40, %rax - - .p2align 4 -L(aligned_64): -# ifdef USE_AS_STRNLEN - sub $64, %rsi - jbe L(len_less64) -# endif - pcmpeqb (%rax), %xmm0 - pcmpeqb 16(%rax), %xmm1 - pcmpeqb 32(%rax), %xmm2 - pcmpeqb 48(%rax), %xmm3 - pmovmskb %xmm0, %edx - pmovmskb %xmm1, %r11d - pmovmskb %xmm2, %r10d - pmovmskb %xmm3, %r9d - or %edx, %r9d - or %r11d, %r9d - or %r10d, %r9d - lea 64(%rax), %rax - jz L(aligned_64) - - test %edx, %edx - jnz L(aligned_64_exit_16) - test %r11d, %r11d - jnz L(aligned_64_exit_32) - test %r10d, %r10d - jnz L(aligned_64_exit_48) -L(aligned_64_exit_64): - pmovmskb %xmm3, %edx - jmp L(aligned_64_exit) -L(aligned_64_exit_48): - lea -16(%rax), %rax - mov %r10d, %edx - jmp L(aligned_64_exit) -L(aligned_64_exit_32): - lea -32(%rax), %rax - mov %r11d, %edx - jmp L(aligned_64_exit) -L(aligned_64_exit_16): - lea -48(%rax), %rax -L(aligned_64_exit): -L(exit): - sub %rcx, %rax - test %dl, %dl - jz L(exit_high) - test $0x01, %dl - jnz L(exit_tail0) - - test $0x02, %dl - jnz L(exit_tail1) - - test $0x04, %dl - jnz L(exit_tail2) - - test $0x08, %dl - jnz L(exit_tail3) - - test $0x10, %dl - jnz L(exit_tail4) - - test $0x20, %dl - jnz L(exit_tail5) - - test $0x40, %dl - jnz L(exit_tail6) - add $7, %eax -L(exit_tail0): - RETURN - -L(exit_high): - add $8, %eax - test $0x01, %dh - jnz L(exit_tail0) - - test $0x02, %dh - jnz L(exit_tail1) - - test $0x04, %dh - jnz L(exit_tail2) - - test $0x08, %dh - jnz L(exit_tail3) - - test $0x10, %dh - jnz L(exit_tail4) - - test $0x20, %dh - jnz L(exit_tail5) - - test $0x40, %dh - jnz L(exit_tail6) - add $7, %eax - RETURN - -# ifdef USE_AS_STRNLEN - - .p2align 4 -L(len_less64): - pxor %xmm0, %xmm0 - add $64, %rsi - - pcmpeqb (%rax), %xmm0 - pmovmskb %xmm0, %edx - pxor %xmm1, %xmm1 - lea 16(%rax), %rax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %rsi - jbe L(return_start_len) - - pcmpeqb (%rax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%rax), %rax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %rsi - jbe L(return_start_len) - - pcmpeqb (%rax), %xmm0 - pmovmskb %xmm0, %edx - lea 16(%rax), %rax - test %edx, %edx - jnz L(strnlen_exit) - - sub $16, %rsi - jbe L(return_start_len) - - pcmpeqb (%rax), %xmm1 - pmovmskb %xmm1, %edx - lea 16(%rax), %rax - test %edx, %edx - jnz L(strnlen_exit) - - mov %r8, %rax - ret - - .p2align 4 -L(strnlen_exit): - sub %rcx, %rax - - test %dl, %dl - jz L(strnlen_exit_high) - mov %dl, %cl - and $15, %cl - jz L(strnlen_exit_8) - test $0x01, %dl - jnz L(exit_tail0) - test $0x02, %dl - jnz L(strnlen_exit_tail1) - test $0x04, %dl - jnz L(strnlen_exit_tail2) - sub $4, %rsi - jb L(return_start_len) - lea 3(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_8): - test $0x10, %dl - jnz L(strnlen_exit_tail4) - test $0x20, %dl - jnz L(strnlen_exit_tail5) - test $0x40, %dl - jnz L(strnlen_exit_tail6) - sub $8, %rsi - jb L(return_start_len) - lea 7(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_high): - mov %dh, %ch - and $15, %ch - jz L(strnlen_exit_high_8) - test $0x01, %dh - jnz L(strnlen_exit_tail8) - test $0x02, %dh - jnz L(strnlen_exit_tail9) - test $0x04, %dh - jnz L(strnlen_exit_tail10) - sub $12, %rsi - jb L(return_start_len) - lea 11(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_high_8): - test $0x10, %dh - jnz L(strnlen_exit_tail12) - test $0x20, %dh - jnz L(strnlen_exit_tail13) - test $0x40, %dh - jnz L(strnlen_exit_tail14) - sub $16, %rsi - jb L(return_start_len) - lea 15(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail1): - sub $2, %rsi - jb L(return_start_len) - lea 1(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail2): - sub $3, %rsi - jb L(return_start_len) - lea 2(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail4): - sub $5, %rsi - jb L(return_start_len) - lea 4(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail5): - sub $6, %rsi - jb L(return_start_len) - lea 5(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail6): - sub $7, %rsi - jb L(return_start_len) - lea 6(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail8): - sub $9, %rsi - jb L(return_start_len) - lea 8(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail9): - sub $10, %rsi - jb L(return_start_len) - lea 9(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail10): - sub $11, %rsi - jb L(return_start_len) - lea 10(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail12): - sub $13, %rsi - jb L(return_start_len) - lea 12(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail13): - sub $14, %rsi - jb L(return_start_len) - lea 13(%eax), %eax - ret - - .p2align 4 -L(strnlen_exit_tail14): - sub $15, %rsi - jb L(return_start_len) - lea 14(%eax), %eax - ret - - .p2align 4 -L(return_start_len): - mov %r8, %rax - ret - -/* for prolog only */ - - .p2align 4 -L(len_less4_prolog): - add $4, %rsi - jz L(exit_tail0) - - cmpb $0, (%rdi) - jz L(exit_tail0) - cmp $1, %esi - je L(exit_tail1) - - cmpb $0, 1(%rdi) - jz L(exit_tail1) - cmp $2, %esi - je L(exit_tail2) - - cmpb $0, 2(%rdi) - jz L(exit_tail2) - cmp $3, %esi - je L(exit_tail3) - - cmpb $0, 3(%rdi) - jz L(exit_tail3) - mov $4, %eax - ret - - .p2align 4 -L(len_less8_prolog): - add $4, %rsi - - cmpb $0, 4(%rdi) - jz L(exit_tail4) - cmp $1, %esi - je L(exit_tail5) - - cmpb $0, 5(%rdi) - jz L(exit_tail5) - cmp $2, %esi - je L(exit_tail6) - - cmpb $0, 6(%rdi) - jz L(exit_tail6) - cmp $3, %esi - je L(exit_tail7) - - cmpb $0, 7(%rdi) - jz L(exit_tail7) - mov $8, %eax - ret - - .p2align 4 -L(len_less12_prolog): - add $4, %rsi - - cmpb $0, 8(%rdi) - jz L(exit_tail8) - cmp $1, %esi - je L(exit_tail9) - - cmpb $0, 9(%rdi) - jz L(exit_tail9) - cmp $2, %esi - je L(exit_tail10) - - cmpb $0, 10(%rdi) - jz L(exit_tail10) - cmp $3, %esi - je L(exit_tail11) - - cmpb $0, 11(%rdi) - jz L(exit_tail11) - mov $12, %eax - ret - - .p2align 4 -L(len_less16_prolog): - add $4, %rsi - - cmpb $0, 12(%rdi) - jz L(exit_tail12) - cmp $1, %esi - je L(exit_tail13) - - cmpb $0, 13(%rdi) - jz L(exit_tail13) - cmp $2, %esi - je L(exit_tail14) - - cmpb $0, 14(%rdi) - jz L(exit_tail14) - cmp $3, %esi - je L(exit_tail15) - - cmpb $0, 15(%rdi) - jz L(exit_tail15) - mov $16, %eax - ret -# endif - - .p2align 4 -L(exit_tail1): - add $1, %eax - RETURN - - .p2align 4 -L(exit_tail2): - add $2, %eax - RETURN - - .p2align 4 -L(exit_tail3): - add $3, %eax - RETURN - - .p2align 4 -L(exit_tail4): - add $4, %eax - RETURN - - .p2align 4 -L(exit_tail5): - add $5, %eax - RETURN - - .p2align 4 -L(exit_tail6): - add $6, %eax - RETURN - - .p2align 4 -L(exit_tail7): - add $7, %eax - RETURN - - .p2align 4 -L(exit_tail8): - add $8, %eax - RETURN - - .p2align 4 -L(exit_tail9): - add $9, %eax - RETURN - - .p2align 4 -L(exit_tail10): - add $10, %eax - RETURN - - .p2align 4 -L(exit_tail11): - add $11, %eax - RETURN - - .p2align 4 -L(exit_tail12): - add $12, %eax - RETURN - - .p2align 4 -L(exit_tail13): - add $13, %eax - RETURN - - .p2align 4 -L(exit_tail14): - add $14, %eax - RETURN - - .p2align 4 -L(exit_tail15): - add $15, %eax -# ifndef USE_AS_STRCAT - RETURN -END (STRLEN) -# endif -#endif diff --git a/libc/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S b/libc/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S deleted file mode 100644 index cc4bb57e9..000000000 --- a/libc/sysdeps/x86_64/multiarch/strlen-sse2-pminub.S +++ /dev/null @@ -1,259 +0,0 @@ -/* strlen SSE2 - Copyright (C) 2011-2013 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if !defined NOT_IN_libc && (defined SHARED || defined USE_AS_STRCAT) - -# ifndef USE_AS_STRCAT - -# include <sysdep.h> - -# define RETURN ret - - .section .text.sse2,"ax",@progbits -ENTRY (__strlen_sse2_pminub) - -# endif - xor %rax, %rax - mov %edi, %ecx - and $0x3f, %ecx - pxor %xmm0, %xmm0 - cmp $0x30, %ecx - ja L(next) - movdqu (%rdi), %xmm1 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit_less16) - mov %rdi, %rax - and $-16, %rax - jmp L(align16_start) -L(next): - mov %rdi, %rax - and $-16, %rax - pcmpeqb (%rax), %xmm0 - mov $-1, %r10d - sub %rax, %rcx - shl %cl, %r10d - pmovmskb %xmm0, %edx - and %r10d, %edx - jnz L(exit) -L(align16_start): - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - pcmpeqb 16(%rax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - jnz L(exit64) - - pcmpeqb 80(%rax), %xmm0 - add $64, %rax - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - jnz L(exit64) - - pcmpeqb 80(%rax), %xmm0 - add $64, %rax - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - jnz L(exit64) - - pcmpeqb 80(%rax), %xmm0 - add $64, %rax - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%rax), %xmm3 - pmovmskb %xmm3, %edx - test %edx, %edx - jnz L(exit64) - - - test $0x3f, %rax - jz L(align64_loop) - - pcmpeqb 80(%rax), %xmm0 - add $80, %rax - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit) - - test $0x3f, %rax - jz L(align64_loop) - - pcmpeqb 16(%rax), %xmm1 - add $16, %rax - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit) - - test $0x3f, %rax - jz L(align64_loop) - - pcmpeqb 16(%rax), %xmm2 - add $16, %rax - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit) - - test $0x3f, %rax - jz L(align64_loop) - - pcmpeqb 16(%rax), %xmm3 - add $16, %rax - pmovmskb %xmm3, %edx - test %edx, %edx - jnz L(exit) - - add $16, %rax - .p2align 4 - L(align64_loop): - movaps (%rax), %xmm4 - pminub 16(%rax), %xmm4 - movaps 32(%rax), %xmm5 - pminub 48(%rax), %xmm5 - add $64, %rax - pminub %xmm4, %xmm5 - pcmpeqb %xmm0, %xmm5 - pmovmskb %xmm5, %edx - test %edx, %edx - jz L(align64_loop) - - - pcmpeqb -64(%rax), %xmm0 - sub $80, %rax - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) - - pcmpeqb 32(%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) - - pcmpeqb 48(%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) - - pcmpeqb 64(%rax), %xmm3 - pmovmskb %xmm3, %edx - sub %rdi, %rax - bsf %rdx, %rdx - add %rdx, %rax - add $64, %rax - RETURN - - .p2align 4 -L(exit): - sub %rdi, %rax -L(exit_less16): - bsf %rdx, %rdx - add %rdx, %rax - RETURN - .p2align 4 -L(exit16): - sub %rdi, %rax - bsf %rdx, %rdx - add %rdx, %rax - add $16, %rax - RETURN - .p2align 4 -L(exit32): - sub %rdi, %rax - bsf %rdx, %rdx - add %rdx, %rax - add $32, %rax - RETURN - .p2align 4 -L(exit48): - sub %rdi, %rax - bsf %rdx, %rdx - add %rdx, %rax - add $48, %rax - RETURN - .p2align 4 -L(exit64): - sub %rdi, %rax - bsf %rdx, %rdx - add %rdx, %rax - add $64, %rax -# ifndef USE_AS_STRCAT - RETURN - -END (__strlen_sse2_pminub) -# endif -#endif diff --git a/libc/sysdeps/x86_64/multiarch/strlen-sse4.S b/libc/sysdeps/x86_64/multiarch/strlen-sse4.S deleted file mode 100644 index 8d685df0c..000000000 --- a/libc/sysdeps/x86_64/multiarch/strlen-sse4.S +++ /dev/null @@ -1,84 +0,0 @@ -/* strlen with SSE4 - Copyright (C) 2009-2013 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@redhat.com>. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if defined SHARED && !defined NOT_IN_libc - -#include <sysdep.h> - - .section .text.sse4.2,"ax",@progbits -ENTRY (__strlen_sse42) - pxor %xmm1, %xmm1 - movl %edi, %ecx - movq %rdi, %r8 - andq $~15, %rdi - xor %edi, %ecx - pcmpeqb (%rdi), %xmm1 - pmovmskb %xmm1, %edx - shrl %cl, %edx - shll %cl, %edx - andl %edx, %edx - jnz L(less16bytes) - pxor %xmm1, %xmm1 - - .p2align 4 -L(more64bytes_loop): - pcmpistri $0x08, 16(%rdi), %xmm1 - jz L(more32bytes) - - pcmpistri $0x08, 32(%rdi), %xmm1 - jz L(more48bytes) - - pcmpistri $0x08, 48(%rdi), %xmm1 - jz L(more64bytes) - - add $64, %rdi - pcmpistri $0x08, (%rdi), %xmm1 - jnz L(more64bytes_loop) - leaq (%rdi,%rcx), %rax - subq %r8, %rax - ret - - .p2align 4 -L(more32bytes): - leaq 16(%rdi,%rcx, 1), %rax - subq %r8, %rax - ret - - .p2align 4 -L(more48bytes): - leaq 32(%rdi,%rcx, 1), %rax - subq %r8, %rax - ret - - .p2align 4 -L(more64bytes): - leaq 48(%rdi,%rcx, 1), %rax - subq %r8, %rax - ret - - .p2align 4 -L(less16bytes): - subq %r8, %rdi - bsfl %edx, %eax - addq %rdi, %rax - ret - -END (__strlen_sse42) - -#endif diff --git a/libc/sysdeps/x86_64/multiarch/strlen.S b/libc/sysdeps/x86_64/multiarch/strlen.S deleted file mode 100644 index ab29ceff2..000000000 --- a/libc/sysdeps/x86_64/multiarch/strlen.S +++ /dev/null @@ -1,68 +0,0 @@ -/* Multiple versions of strlen(str) -- determine the length of the string STR. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2009-2013 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@redhat.com>. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - -/* Define multiple versions only for the definition in libc and for - the DSO. In static binaries we need strlen before the initialization - happened. */ -#if defined SHARED && !defined NOT_IN_libc - .text -ENTRY(strlen) - .type strlen, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __strlen_sse2_pminub(%rip), %rax - testl $bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip) - jnz 2f - leaq __strlen_sse2(%rip), %rax - testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) - jz 2f - leaq __strlen_sse42(%rip), %rax - ret -2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) - jz 3f - leaq __strlen_sse2_no_bsf(%rip), %rax -3: ret -END(strlen) - -# undef ENTRY -# define ENTRY(name) \ - .type __strlen_sse2, @function; \ - .align 16; \ - .globl __strlen_sse2; \ - .hidden __strlen_sse2; \ - __strlen_sse2: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strlen_sse2, .-__strlen_sse2 -# undef libc_hidden_builtin_def -/* It doesn't make sense to send libc-internal strlen calls through a PLT. - The speedup we get from using SSE4.2 instruction is likely eaten away - by the indirect call in the PLT. */ -# define libc_hidden_builtin_def(name) \ - .globl __GI_strlen; __GI_strlen = __strlen_sse2 -#endif - -#include "../strlen.S" diff --git a/libc/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S b/libc/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S deleted file mode 100644 index 248328d99..000000000 --- a/libc/sysdeps/x86_64/multiarch/strnlen-sse2-no-bsf.S +++ /dev/null @@ -1,3 +0,0 @@ -#define USE_AS_STRNLEN -#define STRLEN __strnlen_sse2_no_bsf -#include "strlen-sse2-no-bsf.S" diff --git a/libc/sysdeps/x86_64/multiarch/strnlen.S b/libc/sysdeps/x86_64/multiarch/strnlen.S deleted file mode 100644 index 124f8458a..000000000 --- a/libc/sysdeps/x86_64/multiarch/strnlen.S +++ /dev/null @@ -1,57 +0,0 @@ -/* multiple version of strnlen - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2011-2013 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> -#include <init-arch.h> - - -/* Define multiple versions only for the definition in libc. */ -#ifndef NOT_IN_libc - - .text -ENTRY(__strnlen) - .type __strnlen, @gnu_indirect_function - cmpl $0, __cpu_features+KIND_OFFSET(%rip) - jne 1f - call __init_cpu_features -1: leaq __strnlen_sse2(%rip), %rax - testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip) - jz 2f - leaq __strnlen_sse2_no_bsf(%rip), %rax -2: ret -END(__strnlen) - -# undef ENTRY -# define ENTRY(name) \ - .type __strnlen_sse2, @function; \ - .align 16; \ - .globl __strnlen_sse2; \ - .hidden __strnlen_sse2; \ - __strnlen_sse2: cfi_startproc; \ - CALL_MCOUNT -# undef END -# define END(name) \ - cfi_endproc; .size __strnlen_sse2, .-__strnlen_sse2 - -# undef libc_hidden_def -# define libc_hidden_def(name) \ - .globl __GI_strnlen; __GI_strnlen = __strnlen_sse2 -#endif - -#include "../strnlen.S" diff --git a/libc/sysdeps/x86_64/preconfigure b/libc/sysdeps/x86_64/preconfigure index ca9de7584..d5abba882 100644 --- a/libc/sysdeps/x86_64/preconfigure +++ b/libc/sysdeps/x86_64/preconfigure @@ -1,123 +1,3 @@ - -# as_fn_set_status STATUS -# ----------------------- -# Set $? to STATUS, without forking. -as_fn_set_status () -{ - return $1 -} # as_fn_set_status - -# as_fn_exit STATUS -# ----------------- -# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. -as_fn_exit () -{ - set +e - as_fn_set_status $1 - exit $1 -} # as_fn_exit -if expr a : '\(a\)' >/dev/null 2>&1 && - test "X`expr 00001 : '.*\(...\)'`" = X001; then - as_expr=expr -else - as_expr=false -fi - -if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then - as_basename=basename -else - as_basename=false -fi - -as_me=`$as_basename -- "$0" || -$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ - X"$0" : 'X\(//\)$' \| \ - X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | - sed '/^.*\/\([^/][^/]*\)\/*$/{ - s//\1/ - q - } - /^X\/\(\/\/\)$/{ - s//\1/ - q - } - /^X\/\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q'` - - - as_lineno_1=$LINENO as_lineno_1a=$LINENO - as_lineno_2=$LINENO as_lineno_2a=$LINENO - eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && - test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { - # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) - sed -n ' - p - /[$]LINENO/= - ' <$as_myself | - sed ' - s/[$]LINENO.*/&-/ - t lineno - b - :lineno - N - :loop - s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ - t loop - s/-\n.*// - ' >$as_me.lineno && - chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } - - # Don't try to exec as it changes $[0], causing all sort of problems - # (the dirname of $[0] is not the place where we might find the - # original and so on. Autoconf is especially sensitive to this). - . "./$as_me.lineno" - # Exit status is that of the last command. - exit -} - - -# ac_fn_c_try_compile LINENO -# -------------------------- -# Try to compile conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_compile () -{ - as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext - if { { ac_try="$ac_compile" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 - (eval "$ac_compile") 2>conftest.err - ac_status=$? - if test -s conftest.err; then - grep -v '^ *+' conftest.err >conftest.er1 - cat conftest.er1 >&5 - mv -f conftest.er1 conftest.err - fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || - test ! -s conftest.err - } && test -s conftest.$ac_objext; then : - ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_retval=1 -fi - eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno - as_fn_set_status $ac_retval - -} # ac_fn_c_try_compile # This file is generated from configure.in by Autoconf. DO NOT EDIT! # Local preconfigure fragment for sysdeps/x86_64 diff --git a/libc/sysdeps/x86_64/strcat.S b/libc/sysdeps/x86_64/strcat.S index 287ffd24c..8bea6fb5d 100644 --- a/libc/sysdeps/x86_64/strcat.S +++ b/libc/sysdeps/x86_64/strcat.S @@ -21,6 +21,7 @@ #include <sysdep.h> #include "asm-syntax.h" +/* Will be removed when new strcpy implementation gets merged. */ .text ENTRY (strcat) diff --git a/libc/sysdeps/x86_64/strlen.S b/libc/sysdeps/x86_64/strlen.S index 4bdca0a45..eeb109221 100644 --- a/libc/sysdeps/x86_64/strlen.S +++ b/libc/sysdeps/x86_64/strlen.S @@ -1,6 +1,5 @@ -/* strlen(str) -- determine the length of the string STR. - Copyright (C) 2009-2013 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@redhat.com>. +/* SSE2 version of strlen. + Copyright (C) 2012-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -19,83 +18,222 @@ #include <sysdep.h> +/* Long lived register in strlen(s), strnlen(s, n) are: - .text + %xmm11 - zero + %rdi - s + %r10 (s+n) & (~(64-1)) + %r11 s+n +*/ + + +.text ENTRY(strlen) + +/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ +#define FIND_ZERO \ + pcmpeqb (%rax), %xmm8; \ + pcmpeqb 16(%rax), %xmm9; \ + pcmpeqb 32(%rax), %xmm10; \ + pcmpeqb 48(%rax), %xmm11; \ + pmovmskb %xmm8, %esi; \ + pmovmskb %xmm9, %edx; \ + pmovmskb %xmm10, %r8d; \ + pmovmskb %xmm11, %ecx; \ + salq $16, %rdx; \ + salq $16, %rcx; \ + orq %rsi, %rdx; \ + orq %r8, %rcx; \ + salq $32, %rcx; \ + orq %rcx, %rdx; + +#ifdef AS_STRNLEN +/* Do not read anything when n==0. */ + test %rsi, %rsi + jne L(n_nonzero) xor %rax, %rax - mov %edi, %ecx - and $0x3f, %ecx - pxor %xmm0, %xmm0 - cmp $0x30, %ecx - ja L(next) - movdqu (%rdi), %xmm1 - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit_less16) - mov %rdi, %rax - and $-16, %rax - jmp L(align16_start) -L(next): - mov %rdi, %rax - and $-16, %rax - pcmpeqb (%rax), %xmm0 - mov $-1, %esi - sub %rax, %rcx - shl %cl, %esi - pmovmskb %xmm0, %edx - and %esi, %edx - jnz L(exit) -L(align16_start): - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - .p2align 4 -L(align16_loop): - pcmpeqb 16(%rax), %xmm0 - pmovmskb %xmm0, %edx - test %edx, %edx - jnz L(exit16) + ret +L(n_nonzero): - pcmpeqb 32(%rax), %xmm1 - pmovmskb %xmm1, %edx - test %edx, %edx - jnz L(exit32) +/* Initialize long lived registers. */ - pcmpeqb 48(%rax), %xmm2 - pmovmskb %xmm2, %edx - test %edx, %edx - jnz L(exit48) + add %rdi, %rsi + mov %rsi, %r10 + and $-64, %r10 + mov %rsi, %r11 +#endif - pcmpeqb 64(%rax), %xmm3 - pmovmskb %xmm3, %edx - lea 64(%rax), %rax + pxor %xmm8, %xmm8 + pxor %xmm9, %xmm9 + pxor %xmm10, %xmm10 + pxor %xmm11, %xmm11 + movq %rdi, %rax + movq %rdi, %rcx + andq $4095, %rcx +/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */ + cmpq $4047, %rcx +/* We cannot unify this branching as it would be ~6 cycles slower. */ + ja L(cross_page) + +#ifdef AS_STRNLEN +/* Test if end is among first 64 bytes. */ +# define STRNLEN_PROLOG \ + mov %r11, %rsi; \ + subq %rax, %rsi; \ + andq $-64, %rax; \ + testq $-64, %rsi; \ + je L(strnlen_ret) +#else +# define STRNLEN_PROLOG andq $-64, %rax; +#endif + +/* Ignore bits in mask that come before start of string. */ +#define PROLOG(lab) \ + movq %rdi, %rcx; \ + xorq %rax, %rcx; \ + STRNLEN_PROLOG; \ + sarq %cl, %rdx; \ + test %rdx, %rdx; \ + je L(lab); \ + bsfq %rdx, %rax; \ + ret + +#ifdef AS_STRNLEN + andq $-16, %rax + FIND_ZERO +#else + /* Test first 16 bytes unaligned. */ + movdqu (%rax), %xmm12 + pcmpeqb %xmm8, %xmm12 + pmovmskb %xmm12, %edx test %edx, %edx - jz L(align16_loop) -L(exit): - sub %rdi, %rax -L(exit_less16): - bsf %rdx, %rdx - add %rdx, %rax + je L(next48_bytes) + bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ + ret + +L(next48_bytes): +/* Same as FIND_ZERO except we do not check first 16 bytes. */ + andq $-16, %rax + pcmpeqb 16(%rax), %xmm9 + pcmpeqb 32(%rax), %xmm10 + pcmpeqb 48(%rax), %xmm11 + pmovmskb %xmm9, %edx + pmovmskb %xmm10, %r8d + pmovmskb %xmm11, %ecx + salq $16, %rdx + salq $16, %rcx + orq %r8, %rcx + salq $32, %rcx + orq %rcx, %rdx +#endif + + /* When no zero byte is found xmm9-11 are zero so we do not have to + zero them. */ + PROLOG(loop) + + .p2align 4 +L(cross_page): + andq $-64, %rax + FIND_ZERO + PROLOG(loop_init) + +#ifdef AS_STRNLEN +/* We must do this check to correctly handle strnlen (s, -1). */ +L(strnlen_ret): + bts %rsi, %rdx + sarq %cl, %rdx + test %rdx, %rdx + je L(loop_init) + bsfq %rdx, %rax ret +#endif + .p2align 4 +L(loop_init): + pxor %xmm9, %xmm9 + pxor %xmm10, %xmm10 + pxor %xmm11, %xmm11 +#ifdef AS_STRNLEN + .p2align 4 +L(loop): + + addq $64, %rax + cmpq %rax, %r10 + je L(exit_end) + + movdqa (%rax), %xmm8 + pminub 16(%rax), %xmm8 + pminub 32(%rax), %xmm8 + pminub 48(%rax), %xmm8 + pcmpeqb %xmm11, %xmm8 + pmovmskb %xmm8, %edx + testl %edx, %edx + jne L(exit) + jmp L(loop) + .p2align 4 -L(exit16): - sub %rdi, %rax - bsf %rdx, %rdx - lea 16(%rdx,%rax), %rax +L(exit_end): + cmp %rax, %r11 + je L(first) /* Do not read when end is at page boundary. */ + pxor %xmm8, %xmm8 + FIND_ZERO + +L(first): + bts %r11, %rdx + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax ret + .p2align 4 -L(exit32): - sub %rdi, %rax - bsf %rdx, %rdx - lea 32(%rdx,%rax), %rax +L(exit): + pxor %xmm8, %xmm8 + FIND_ZERO + + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax ret + +#else + + /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ + .p2align 4 +L(loop): + + movdqa 64(%rax), %xmm8 + pminub 80(%rax), %xmm8 + pminub 96(%rax), %xmm8 + pminub 112(%rax), %xmm8 + pcmpeqb %xmm11, %xmm8 + pmovmskb %xmm8, %edx + testl %edx, %edx + jne L(exit64) + + subq $-128, %rax + + movdqa (%rax), %xmm8 + pminub 16(%rax), %xmm8 + pminub 32(%rax), %xmm8 + pminub 48(%rax), %xmm8 + pcmpeqb %xmm11, %xmm8 + pmovmskb %xmm8, %edx + testl %edx, %edx + jne L(exit0) + jmp L(loop) + .p2align 4 -L(exit48): - sub %rdi, %rax - bsf %rdx, %rdx - lea 48(%rdx,%rax), %rax +L(exit64): + addq $64, %rax +L(exit0): + pxor %xmm8, %xmm8 + FIND_ZERO + + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax ret + +#endif + END(strlen) libc_hidden_builtin_def (strlen) diff --git a/libc/sysdeps/x86_64/strnlen.S b/libc/sysdeps/x86_64/strnlen.S index 6e5350306..d3c43ac48 100644 --- a/libc/sysdeps/x86_64/strnlen.S +++ b/libc/sysdeps/x86_64/strnlen.S @@ -1,63 +1,6 @@ -/* strnlen(str,maxlen) -- determine the length of the string STR up to MAXLEN. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Ulrich Drepper <drepper@redhat.com>. - This file is part of the GNU C Library. +#define AS_STRNLEN +#define strlen __strnlen +#include "strlen.S" - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <sysdep.h> - - - .text -ENTRY(__strnlen) - movq %rsi, %rax - testq %rsi, %rsi - jz 3f - pxor %xmm2, %xmm2 - movq %rdi, %rcx - movq %rdi, %r8 - movq $16, %r9 - andq $~15, %rdi - movdqa %xmm2, %xmm1 - pcmpeqb (%rdi), %xmm2 - orl $0xffffffff, %r10d - subq %rdi, %rcx - shll %cl, %r10d - subq %rcx, %r9 - pmovmskb %xmm2, %edx - andl %r10d, %edx - jnz 1f - subq %r9, %rsi - jbe 3f - -2: movdqa 16(%rdi), %xmm0 - leaq 16(%rdi), %rdi - pcmpeqb %xmm1, %xmm0 - pmovmskb %xmm0, %edx - testl %edx, %edx - jnz 1f - subq $16, %rsi - jnbe 2b -3: ret - -1: subq %r8, %rdi - bsfl %edx, %edx - addq %rdi, %rdx - cmpq %rdx, %rax - cmovnbq %rdx, %rax - ret -END(__strnlen) -weak_alias (__strnlen, strnlen) -libc_hidden_def (strnlen) +weak_alias (__strnlen, strnlen); +libc_hidden_builtin_def (strnlen) |