diff options
Diffstat (limited to 'libc/sysdeps')
125 files changed, 5068 insertions, 1386 deletions
diff --git a/libc/sysdeps/generic/inttypes.h b/libc/sysdeps/generic/inttypes.h index 47fe751e2..dc9751905 100644 --- a/libc/sysdeps/generic/inttypes.h +++ b/libc/sysdeps/generic/inttypes.h @@ -286,8 +286,8 @@ typedef struct /* We have to define the `uintmax_t' type using `lldiv_t'. */ typedef struct { - long long int quot; /* Quotient. */ - long long int rem; /* Remainder. */ + __extension__ long long int quot; /* Quotient. */ + __extension__ long long int rem; /* Remainder. */ } imaxdiv_t; #endif diff --git a/libc/sysdeps/gnu/netinet/tcp.h b/libc/sysdeps/gnu/netinet/tcp.h index 278fc9d8a..b62a696e5 100644 --- a/libc/sysdeps/gnu/netinet/tcp.h +++ b/libc/sysdeps/gnu/netinet/tcp.h @@ -37,20 +37,29 @@ /* * User-settable options (used with setsockopt). */ -#define TCP_NODELAY 1 /* Don't delay send to coalesce packets */ -#define TCP_MAXSEG 2 /* Set maximum segment size */ -#define TCP_CORK 3 /* Control sending of partial frames */ -#define TCP_KEEPIDLE 4 /* Start keeplives after this period */ -#define TCP_KEEPINTVL 5 /* Interval between keepalives */ -#define TCP_KEEPCNT 6 /* Number of keepalives before death */ -#define TCP_SYNCNT 7 /* Number of SYN retransmits */ -#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ -#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ -#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ -#define TCP_INFO 11 /* Information about this connection. */ -#define TCP_QUICKACK 12 /* Bock/reenable quick ACKs. */ -#define TCP_CONGESTION 13 /* Congestion control algorithm. */ -#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ +#define TCP_NODELAY 1 /* Don't delay send to coalesce packets */ +#define TCP_MAXSEG 2 /* Set maximum segment size */ +#define TCP_CORK 3 /* Control sending of partial frames */ +#define TCP_KEEPIDLE 4 /* Start keeplives after this period */ +#define TCP_KEEPINTVL 5 /* Interval between keepalives */ +#define TCP_KEEPCNT 6 /* Number of keepalives before death */ +#define TCP_SYNCNT 7 /* Number of SYN retransmits */ +#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */ +#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */ +#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ +#define TCP_INFO 11 /* Information about this connection. */ +#define TCP_QUICKACK 12 /* Bock/reenable quick ACKs. */ +#define TCP_CONGESTION 13 /* Congestion control algorithm. */ +#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ +#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ +#define TCP_REPAIR 19 /* TCP sock is under repair right now */ +#define TCP_REPAIR_QUEUE 20 /* Set TCP queue to repair */ +#define TCP_QUEUE_SEQ 21 /* Set sequence number of repaired queue. */ +#define TCP_REPAIR_OPTIONS 22 /* Repair TCP connection options */ +#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ #ifdef __USE_MISC # include <sys/types.h> @@ -243,6 +252,49 @@ struct tcp_md5sig u_int8_t tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* Key (binary). */ }; +/* For socket repair options. */ +struct tcp_repair_opt +{ + u_int32_t opt_code; + u_int32_t opt_val; +}; + +/* Queue to repair, for TCP_REPAIR_QUEUE. */ +enum +{ + TCP_NO_QUEUE, + TCP_RECV_QUEUE, + TCP_SEND_QUEUE, + TCP_QUEUES_NR, +}; + +/* For cookie transactions socket options. */ +#define TCP_COOKIE_MIN 8 /* 64-bits */ +#define TCP_COOKIE_MAX 16 /* 128-bits */ +#define TCP_COOKIE_PAIR_SIZE (2*TCP_COOKIE_MAX) + +/* Flags for both getsockopt and setsockopt */ +#define TCP_COOKIE_IN_ALWAYS (1 << 0) /* Discard SYN without cookie */ +#define TCP_COOKIE_OUT_NEVER (1 << 1) /* Prohibit outgoing cookies, + * supercedes everything. */ + +/* Flags for getsockopt */ +#define TCP_S_DATA_IN (1 << 2) /* Was data received? */ +#define TCP_S_DATA_OUT (1 << 3) /* Was data sent? */ + +#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */ +#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */ + +struct tcp_cookie_transactions +{ + u_int16_t tcpct_flags; + u_int8_t __tcpct_pad1; + u_int8_t tcpct_cookie_desired; + u_int16_t tcpct_s_data_desired; + u_int16_t tcpct_used; + u_int8_t tcpct_value[TCP_MSS_DEFAULT]; +}; + #endif /* Misc. */ #endif /* netinet/tcp.h */ diff --git a/libc/sysdeps/i386/dl-machine.h b/libc/sysdeps/i386/dl-machine.h index 9e36687ef..a1e40d8eb 100644 --- a/libc/sysdeps/i386/dl-machine.h +++ b/libc/sysdeps/i386/dl-machine.h @@ -348,6 +348,12 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, switch (r_type) { +# ifndef RTLD_BOOTSTRAP + case R_386_SIZE32: + /* Set to symbol size plus addend. */ + *reloc_addr += sym->st_size; + break; +# endif case R_386_GLOB_DAT: case R_386_JMP_SLOT: *reloc_addr = value; @@ -507,6 +513,9 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc, switch (ELF32_R_TYPE (reloc->r_info)) { + case R_386_SIZE32: + /* Set to symbol size plus addend. */ + value = sym->st_size; case R_386_GLOB_DAT: case R_386_JMP_SLOT: case R_386_32: diff --git a/libc/sysdeps/i386/fpu/fenv_private.h b/libc/sysdeps/i386/fpu/fenv_private.h index 03f4c97a9..1f8336cf9 100644 --- a/libc/sysdeps/i386/fpu/fenv_private.h +++ b/libc/sysdeps/i386/fpu/fenv_private.h @@ -176,7 +176,7 @@ libc_feupdateenv_test_sse (fenv_t *e, int ex) /* Raise SIGFPE for any new exceptions since the hold. Expect that the normal environment has all exceptions masked. */ - if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0)) + if (__glibc_unlikely (~(old_mxcsr >> 7) & cur_ex)) __feraiseexcept (cur_ex); /* Test for exceptions raised since the hold. */ diff --git a/libc/sysdeps/i386/fpu/libm-test-ulps b/libc/sysdeps/i386/fpu/libm-test-ulps index 3fc30de46..1525b16f3 100644 --- a/libc/sysdeps/i386/fpu/libm-test-ulps +++ b/libc/sysdeps/i386/fpu/libm-test-ulps @@ -303,6 +303,12 @@ float: 1 ifloat: 1 ildouble: 2 ldouble: 2 +Test "Imaginary part of: cacos (0x1.fp1023 + 0x1.fp1023 i) == 7.853981633974483096156608458198757210493e-1 - 7.107906849659093345062145442726115449315e2 i": +double: 1 +idouble: 1 +Test "Imaginary part of: cacos (0x1.fp127 + 0x1.fp127 i) == 7.853981633974483096156608458198757210493e-1 - 8.973081118419833726837456344608533993585e1 i": +double: 1 +idouble: 1 Test "Imaginary part of: cacos (1.5 + +0 i) == +0 - 0.9624236501192068949955178268487368462704 i": double: 1 float: 1 diff --git a/libc/sysdeps/ieee754/dbl-64/atnat.h b/libc/sysdeps/ieee754/dbl-64/atnat.h index 2beb33b05..a1a3572f7 100644 --- a/libc/sysdeps/ieee754/dbl-64/atnat.h +++ b/libc/sysdeps/ieee754/dbl-64/atnat.h @@ -130,8 +130,6 @@ #endif #endif -#define ZERO 0.0 -#define ONE 1.0 #define A a.d #define B b.d #define C c.d @@ -152,7 +150,5 @@ #define U6 u6.d #define U7 u7.d #define U8 u8.d -#define TWO8 0x1.0p8 /* 2^8 */ -#define TWO52 0x1.0p52 /* 2^52 */ #endif diff --git a/libc/sysdeps/ieee754/dbl-64/atnat2.h b/libc/sysdeps/ieee754/dbl-64/atnat2.h index 10b32535b..f12498bf3 100644 --- a/libc/sysdeps/ieee754/dbl-64/atnat2.h +++ b/libc/sysdeps/ieee754/dbl-64/atnat2.h @@ -162,11 +162,4 @@ #endif #endif -#define ZERO 0.0 /* 0 */ -#define MZERO -0.0 /* 0 with the sign bit set */ -#define ONE 1.0 /* 1 */ -#define TWO8 0x1.0p8 /* 2^8 */ -#define TWO52 0x1.0p52 /* 2^52 */ -#define TWOM1022 0x1.0p-1022 /* 2^-1022 */ - #endif diff --git a/libc/sysdeps/ieee754/dbl-64/mpa.c b/libc/sysdeps/ieee754/dbl-64/mpa.c index 7abad6782..ede8ed198 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpa.c +++ b/libc/sysdeps/ieee754/dbl-64/mpa.c @@ -22,9 +22,7 @@ /* FUNCTIONS: */ /* mcr */ /* acr */ -/* cr */ /* cpy */ -/* cpymn */ /* norm */ /* denorm */ /* mp_dbl */ @@ -44,7 +42,6 @@ #include "endian.h" #include "mpa.h" -#include "mpa2.h" #include <sys/param.h> #ifndef SECTION @@ -60,30 +57,45 @@ const mp_no mptwo = {1, {1.0, 2.0}}; /* Compare mantissa of two multiple precision numbers regardless of the sign and exponent of the numbers. */ static int -mcr(const mp_no *x, const mp_no *y, int p) { +mcr (const mp_no *x, const mp_no *y, int p) +{ int i; - for (i=1; i<=p; i++) { - if (X[i] == Y[i]) continue; - else if (X[i] > Y[i]) return 1; - else return -1; } + for (i = 1; i <= p; i++) + { + if (X[i] == Y[i]) + continue; + else if (X[i] > Y[i]) + return 1; + else + return -1; + } return 0; } /* Compare the absolute values of two multiple precision numbers. */ int -__acr(const mp_no *x, const mp_no *y, int p) { +__acr (const mp_no *x, const mp_no *y, int p) +{ int i; - if (X[0] == ZERO) { - if (Y[0] == ZERO) i= 0; - else i=-1; - } - else if (Y[0] == ZERO) i= 1; - else { - if (EX > EY) i= 1; - else if (EX < EY) i=-1; - else i= mcr(x,y,p); - } + if (X[0] == ZERO) + { + if (Y[0] == ZERO) + i = 0; + else + i = -1; + } + else if (Y[0] == ZERO) + i = 1; + else + { + if (EX > EY) + i = 1; + else if (EX < EY) + i = -1; + else + i = mcr (x, y, p); + } return i; } @@ -92,59 +104,86 @@ __acr(const mp_no *x, const mp_no *y, int p) { #ifndef NO___CPY /* Copy multiple precision number X into Y. They could be the same number. */ -void __cpy(const mp_no *x, mp_no *y, int p) { +void +__cpy (const mp_no *x, mp_no *y, int p) +{ EY = EX; - for (int i=0; i <= p; i++) Y[i] = X[i]; + for (int i = 0; i <= p; i++) + Y[i] = X[i]; } #endif #ifndef NO___MP_DBL /* Convert a multiple precision number *X into a double precision number *Y, normalized case (|x| >= 2**(-1022))). */ -static void norm(const mp_no *x, double *y, int p) +static void +norm (const mp_no *x, double *y, int p) { - #define R RADIXI +#define R RADIXI int i; - double a,c,u,v,z[5]; - if (p<5) { - if (p==1) c = X[1]; - else if (p==2) c = X[1] + R* X[2]; - else if (p==3) c = X[1] + R*(X[2] + R* X[3]); - else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]); - } - else { - for (a=ONE, z[1]=X[1]; z[1] < TWO23; ) - {a *= TWO; z[1] *= TWO; } - - for (i=2; i<5; i++) { - z[i] = X[i]*a; - u = (z[i] + CUTTER)-CUTTER; - if (u > z[i]) u -= RADIX; - z[i] -= u; - z[i-1] += u*RADIXI; - } - - u = (z[3] + TWO71) - TWO71; - if (u > z[3]) u -= TWO19; - v = z[3]-u; - - if (v == TWO18) { - if (z[4] == ZERO) { - for (i=5; i <= p; i++) { - if (X[i] == ZERO) continue; - else {z[3] += ONE; break; } - } - } - else z[3] += ONE; + double a, c, u, v, z[5]; + if (p < 5) + { + if (p == 1) + c = X[1]; + else if (p == 2) + c = X[1] + R * X[2]; + else if (p == 3) + c = X[1] + R * (X[2] + R * X[3]); + else if (p == 4) + c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]); } + else + { + for (a = ONE, z[1] = X[1]; z[1] < TWO23;) + { + a *= TWO; + z[1] *= TWO; + } - c = (z[1] + R *(z[2] + R * z[3]))/a; - } + for (i = 2; i < 5; i++) + { + z[i] = X[i] * a; + u = (z[i] + CUTTER) - CUTTER; + if (u > z[i]) + u -= RADIX; + z[i] -= u; + z[i - 1] += u * RADIXI; + } + + u = (z[3] + TWO71) - TWO71; + if (u > z[3]) + u -= TWO19; + v = z[3] - u; + + if (v == TWO18) + { + if (z[4] == ZERO) + { + for (i = 5; i <= p; i++) + { + if (X[i] == ZERO) + continue; + else + { + z[3] += ONE; + break; + } + } + } + else + z[3] += ONE; + } + + c = (z[1] + R * (z[2] + R * z[3])) / a; + } c *= X[0]; - for (i=1; i<EX; i++) c *= RADIX; - for (i=1; i>EX; i--) c *= RADIXI; + for (i = 1; i < EX; i++) + c *= RADIX; + for (i = 1; i > EX; i--) + c *= RADIXI; *y = c; #undef R @@ -152,58 +191,129 @@ static void norm(const mp_no *x, double *y, int p) /* Convert a multiple precision number *X into a double precision number *Y, Denormal case (|x| < 2**(-1022))). */ -static void denorm(const mp_no *x, double *y, int p) +static void +denorm (const mp_no *x, double *y, int p) { - int i,k; - double c,u,z[5]; + int i, k; + double c, u, z[5]; #define R RADIXI - if (EX<-44 || (EX==-44 && X[1]<TWO5)) - { *y=ZERO; return; } - - if (p==1) { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;} - else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} - } - else if (p==2) { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;} - else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} - } - else { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;} - else {z[1]= TWO10; z[2]=ZERO; k=1;} - z[3] = X[k]; - } + if (EX < -44 || (EX == -44 && X[1] < TWO5)) + { + *y = ZERO; + return; + } + + if (p == 1) + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = ZERO; + z[3] = ZERO; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + z[3] = ZERO; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + z[3] = X[1]; + k = 1; + } + } + else if (p == 2) + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = X[2]; + z[3] = ZERO; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + z[3] = X[2]; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + z[3] = X[1]; + k = 1; + } + } + else + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = X[2]; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + k = 1; + } + z[3] = X[k]; + } u = (z[3] + TWO57) - TWO57; - if (u > z[3]) u -= TWO5; + if (u > z[3]) + u -= TWO5; - if (u==z[3]) { - for (i=k+1; i <= p; i++) { - if (X[i] == ZERO) continue; - else {z[3] += ONE; break; } + if (u == z[3]) + { + for (i = k + 1; i <= p; i++) + { + if (X[i] == ZERO) + continue; + else + { + z[3] += ONE; + break; + } + } } - } - c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); + c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10); - *y = c*TWOM1032; + *y = c * TWOM1032; #undef R } /* Convert multiple precision number *X into double precision number *Y. The result is correctly rounded to the nearest/even. */ -void __mp_dbl(const mp_no *x, double *y, int p) { - - if (X[0] == ZERO) {*y = ZERO; return; } +void +__mp_dbl (const mp_no *x, double *y, int p) +{ + if (X[0] == ZERO) + { + *y = ZERO; + return; + } if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10))) - norm(x,y,p); + norm (x, y, p); else - denorm(x,y,p); + denorm (x, y, p); } #endif @@ -211,27 +321,44 @@ void __mp_dbl(const mp_no *x, double *y, int p) { small, the result is truncated. */ void SECTION -__dbl_mp(double x, mp_no *y, int p) { - - int i,n; +__dbl_mp (double x, mp_no *y, int p) +{ + int i, n; double u; /* Sign. */ - if (x == ZERO) {Y[0] = ZERO; return; } - else if (x > ZERO) Y[0] = ONE; - else {Y[0] = MONE; x=-x; } + if (x == ZERO) + { + Y[0] = ZERO; + return; + } + else if (x > ZERO) + Y[0] = ONE; + else + { + Y[0] = MONE; + x = -x; + } /* Exponent. */ - for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI; - for ( ; x < ONE; EY -= ONE) x *= RADIX; + for (EY = ONE; x >= RADIX; EY += ONE) + x *= RADIXI; + for (; x < ONE; EY -= ONE) + x *= RADIX; /* Digits. */ - n=MIN(p,4); - for (i=1; i<=n; i++) { - u = (x + TWO52) - TWO52; - if (u>x) u -= ONE; - Y[i] = u; x -= u; x *= RADIX; } - for ( ; i<=p; i++) Y[i] = ZERO; + n = MIN (p, 4); + for (i = 1; i <= n; i++) + { + u = (x + TWO52) - TWO52; + if (u > x) + u -= ONE; + Y[i] = u; + x -= u; + x *= RADIX; + } + for (; i <= p; i++) + Y[i] = ZERO; } /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The @@ -240,39 +367,55 @@ __dbl_mp(double x, mp_no *y, int p) { truncated. */ static void SECTION -add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { - - int i,j,k; +add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + int i, j, k; EZ = EX; - i=p; j=p+ EY - EX; k=p+1; - - if (j<1) - {__cpy(x,z,p); return; } - else Z[k] = ZERO; - - for (; j>0; i--,j--) { - Z[k] += X[i] + Y[j]; - if (Z[k] >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; } - else - Z[--k] = ZERO; - } - - for (; i>0; i--) { - Z[k] += X[i]; - if (Z[k] >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; } - else - Z[--k] = ZERO; - } - - if (Z[1] == ZERO) { - for (i=1; i<=p; i++) Z[i] = Z[i+1]; } - else EZ += ONE; + i = p; + j = p + EY - EX; + k = p + 1; + + if (j < 1) + { + __cpy (x, z, p); + return; + } + else + Z[k] = ZERO; + + for (; j > 0; i--, j--) + { + Z[k] += X[i] + Y[j]; + if (Z[k] >= RADIX) + { + Z[k] -= RADIX; + Z[--k] = ONE; + } + else + Z[--k] = ZERO; + } + + for (; i > 0; i--) + { + Z[k] += X[i]; + if (Z[k] >= RADIX) + { + Z[k] -= RADIX; + Z[--k] = ONE; + } + else + Z[--k] = ZERO; + } + + if (Z[1] == ZERO) + { + for (i = 1; i <= p; i++) + Z[i] = Z[i + 1]; + } + else + EZ += ONE; } /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. @@ -281,52 +424,73 @@ add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { ULP. */ static void SECTION -sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { - - int i,j,k; +sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + int i, j, k; EZ = EX; - if (EX == EY) { - i=j=k=p; - Z[k] = Z[k+1] = ZERO; } - else { - j= EX - EY; - if (j > p) {__cpy(x,z,p); return; } - else { - i=p; j=p+1-j; k=p; - if (Y[j] > ZERO) { - Z[k+1] = RADIX - Y[j--]; - Z[k] = MONE; } - else { - Z[k+1] = ZERO; - Z[k] = ZERO; j--;} - } - } - - for (; j>0; i--,j--) { - Z[k] += (X[i] - Y[j]); - if (Z[k] < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; } - else - Z[--k] = ZERO; - } - - for (; i>0; i--) { - Z[k] += X[i]; - if (Z[k] < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; } - else - Z[--k] = ZERO; - } - - for (i=1; Z[i] == ZERO; i++) ; + if (EX == EY) + { + i = j = k = p; + Z[k] = Z[k + 1] = ZERO; + } + else + { + j = EX - EY; + if (j > p) + { + __cpy (x, z, p); + return; + } + else + { + i = p; + j = p + 1 - j; + k = p; + if (Y[j] > ZERO) + { + Z[k + 1] = RADIX - Y[j--]; + Z[k] = MONE; + } + else + { + Z[k + 1] = ZERO; + Z[k] = ZERO; + j--; + } + } + } + + for (; j > 0; i--, j--) + { + Z[k] += (X[i] - Y[j]); + if (Z[k] < ZERO) + { + Z[k] += RADIX; + Z[--k] = MONE; + } + else + Z[--k] = ZERO; + } + + for (; i > 0; i--) + { + Z[k] += X[i]; + if (Z[k] < ZERO) + { + Z[k] += RADIX; + Z[--k] = MONE; + } + else + Z[--k] = ZERO; + } + + for (i = 1; Z[i] == ZERO; i++); EZ = EZ - i + 1; - for (k=1; i <= p+1; ) + for (k = 1; i <= p + 1;) Z[k++] = Z[i++]; - for (; k <= p; ) + for (; k <= p;) Z[k++] = ZERO; } @@ -335,22 +499,49 @@ sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { ULP. */ void SECTION -__add(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +__add (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ int n; - if (X[0] == ZERO) {__cpy(y,z,p); return; } - else if (Y[0] == ZERO) {__cpy(x,z,p); return; } - - if (X[0] == Y[0]) { - if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } - else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; } - } - else { - if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } - else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } - else Z[0] = ZERO; - } + if (X[0] == ZERO) + { + __cpy (y, z, p); + return; + } + else if (Y[0] == ZERO) + { + __cpy (x, z, p); + return; + } + + if (X[0] == Y[0]) + { + if (__acr (x, y, p) > 0) + { + add_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else + { + add_magnitudes (y, x, z, p); + Z[0] = Y[0]; + } + } + else + { + if ((n = __acr (x, y, p)) == 1) + { + sub_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else if (n == -1) + { + sub_magnitudes (y, x, z, p); + Z[0] = Y[0]; + } + else + Z[0] = ZERO; + } } /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but @@ -358,22 +549,50 @@ __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { one ULP. */ void SECTION -__sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +__sub (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ int n; - if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; } - else if (Y[0] == ZERO) {__cpy(x,z,p); return; } - - if (X[0] != Y[0]) { - if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } - else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; } - } - else { - if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } - else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } - else Z[0] = ZERO; - } + if (X[0] == ZERO) + { + __cpy (y, z, p); + Z[0] = -Z[0]; + return; + } + else if (Y[0] == ZERO) + { + __cpy (x, z, p); + return; + } + + if (X[0] != Y[0]) + { + if (__acr (x, y, p) > 0) + { + add_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else + { + add_magnitudes (y, x, z, p); + Z[0] = -Y[0]; + } + } + else + { + if ((n = __acr (x, y, p)) == 1) + { + sub_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else if (n == -1) + { + sub_magnitudes (y, x, z, p); + Z[0] = -Y[0]; + } + else + Z[0] = ZERO; + } } /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X @@ -381,52 +600,54 @@ __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { digits. In case P > 3 the error is bounded by 1.001 ULP. */ void SECTION -__mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ int i, j, k, k2; - double u; + double u, zk; /* Is z=0? */ if (__glibc_unlikely (X[0] * Y[0] == ZERO)) { - Z[0]=ZERO; + Z[0] = ZERO; return; } /* Multiply, add and carry. */ k2 = (__glibc_unlikely (p < 3)) ? p + p : p + 3; - Z[k2] = ZERO; + zk = Z[k2] = ZERO; - for (k = k2; k > p; ) + for (k = k2; k > p; k--) { for (i = k - p, j = p; i < p + 1; i++, j--) - Z[k] += X[i] * Y[j]; + zk += X[i] * Y[j]; - u = (Z[k] + CUTTER) - CUTTER; - if (u > Z[k]) + u = (zk + CUTTER) - CUTTER; + if (u > zk) u -= RADIX; - Z[k] -= u; - Z[--k] = u * RADIXI; + Z[k] = zk - u; + zk = u * RADIXI; } while (k > 1) { - for (i = 1,j = k - 1; i < k; i++, j--) - Z[k] += X[i] * Y[j]; + for (i = 1, j = k - 1; i < k; i++, j--) + zk += X[i] * Y[j]; - u = (Z[k] + CUTTER) - CUTTER; - if (u > Z[k]) + u = (zk + CUTTER) - CUTTER; + if (u > zk) u -= RADIX; - Z[k] -= u; - Z[--k] = u * RADIXI; + Z[k] = zk - u; + zk = u * RADIXI; + k--; } + Z[k] = zk; EZ = EX + EY; /* Is there a carry beyond the most significant digit? */ if (__glibc_unlikely (Z[1] == ZERO)) { for (i = 1; i <= p; i++) - Z[i] = Z[i+1]; + Z[i] = Z[i + 1]; EZ--; } @@ -439,24 +660,32 @@ __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { - For P > 3: 2.001 * R ^ (1 - P) *X = 0 is not permissible. */ -static +static void SECTION -void __inv(const mp_no *x, mp_no *y, int p) { +__inv (const mp_no *x, mp_no *y, int p) +{ int i; double t; - mp_no z,w; - static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; - - __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); - t=ONE/t; __dbl_mp(t,y,p); EY -= EX; - - for (i=0; i<np1[p]; i++) { - __cpy(y,&w,p); - __mul(x,&w,y,p); - __sub(&mptwo,y,&z,p); - __mul(&w,&z,y,p); - } + mp_no z, w; + static const int np1[] = + { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 + }; + + __cpy (x, &z, p); + z.e = 0; + __mp_dbl (&z, &t, p); + t = ONE / t; + __dbl_mp (t, y, p); + EY -= EX; + + for (i = 0; i < np1[p]; i++) + { + __cpy (y, &w, p); + __mul (x, &w, y, p); + __sub (&mptwo, y, &z, p); + __mul (&w, &z, y, p); + } } /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z @@ -468,10 +697,15 @@ void __inv(const mp_no *x, mp_no *y, int p) { *X = 0 is not permissible. */ void SECTION -__dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ mp_no w; - if (X[0] == ZERO) Z[0] = ZERO; - else {__inv(y,&w,p); __mul(x,&w,z,p);} + if (X[0] == ZERO) + Z[0] = ZERO; + else + { + __inv (y, &w, p); + __mul (x, &w, z, p); + } } diff --git a/libc/sysdeps/ieee754/dbl-64/mpa.h b/libc/sysdeps/ieee754/dbl-64/mpa.h index 77715fc05..06343d46d 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpa.h +++ b/libc/sysdeps/ieee754/dbl-64/mpa.h @@ -82,6 +82,32 @@ extern const mp_no mptwo; #define ABS(x) ((x) < 0 ? -(x) : (x)) +#define RADIX 0x1.0p24 /* 2^24 */ +#define RADIXI 0x1.0p-24 /* 2^-24 */ +#define CUTTER 0x1.0p76 /* 2^76 */ + +#define ZERO 0.0 /* 0 */ +#define MZERO -0.0 /* 0 with the sign bit set */ +#define ONE 1.0 /* 1 */ +#define MONE -1.0 /* -1 */ +#define TWO 2.0 /* 2 */ + +#define TWO5 0x1.0p5 /* 2^5 */ +#define TWO8 0x1.0p8 /* 2^52 */ +#define TWO10 0x1.0p10 /* 2^10 */ +#define TWO18 0x1.0p18 /* 2^18 */ +#define TWO19 0x1.0p19 /* 2^19 */ +#define TWO23 0x1.0p23 /* 2^23 */ +#define TWO52 0x1.0p52 /* 2^52 */ +#define TWO57 0x1.0p57 /* 2^57 */ +#define TWO71 0x1.0p71 /* 2^71 */ +#define TWOM1032 0x1.0p-1032 /* 2^-1032 */ +#define TWOM1022 0x1.0p-1022 /* 2^-1022 */ + +#define HALF 0x1.0p-1 /* 1/2 */ +#define MHALF -0x1.0p-1 /* -1/2 */ +#define HALFRAD 0x1.0p23 /* 2^23 */ + int __acr (const mp_no *, const mp_no *, int); void __cpy (const mp_no *, mp_no *, int); void __mp_dbl (const mp_no *, double *, int); @@ -97,3 +123,33 @@ extern void __mpsqrt (mp_no *, mp_no *, int); extern void __mpexp (mp_no *, mp_no *, int); extern void __c32 (mp_no *, mp_no *, mp_no *, int); extern int __mpranred (double, mp_no *, int); + +/* Given a power POW, build a multiprecision number 2^POW. */ +static inline void +__pow_mp (int pow, mp_no *y, int p) +{ + int i, rem; + + /* The exponent is E such that E is a factor of 2^24. The remainder (of the + form 2^x) goes entirely into the first digit of the mantissa as it is + always less than 2^24. */ + EY = pow / 24; + rem = pow - EY * 24; + EY++; + + /* If the remainder is negative, it means that POW was negative since + |EY * 24| <= |pow|. Adjust so that REM is positive and still less than + 24 because of which, the mantissa digit is less than 2^24. */ + if (rem < 0) + { + EY--; + rem += 24; + } + /* The sign of any 2^x is always positive. */ + Y[0] = ONE; + Y[1] = 1 << rem; + + /* Everything else is ZERO. */ + for (i = 2; i <= p; i++) + Y[i] = ZERO; +} diff --git a/libc/sysdeps/ieee754/dbl-64/mpa2.h b/libc/sysdeps/ieee754/dbl-64/mpa2.h deleted file mode 100644 index b80bf412e..000000000 --- a/libc/sysdeps/ieee754/dbl-64/mpa2.h +++ /dev/null @@ -1,50 +0,0 @@ - -/* - * IBM Accurate Mathematical Library - * Written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -/**************************************************************************/ -/* */ -/* MODULE_NAME:mpa2.h */ -/* */ -/* */ -/* variables prototype and definition according to type of processor */ -/* types definition */ -/**************************************************************************/ - -#ifndef MPA2_H -#define MPA2_H - -#define RADIX 0x1.0p24 /* 2^24 */ -#define RADIXI 0x1.0p-24 /* 2^-24 */ -#define CUTTER 0x1.0p76 /* 2^76 */ -#define ZERO 0.0 /* 0 */ -#define ONE 1.0 /* 1 */ -#define MONE -1.0 /* -1 */ -#define TWO -2.0 /* -2 */ -#define TWO5 0x1.0p5 /* 2^5 */ -#define TWO10 0x1.0p10 /* 2^10 */ -#define TWO18 0x1.0p18 /* 2^18 */ -#define TWO19 0x1.0p19 /* 2^19 */ -#define TWO23 0x1.0p23 /* 2^23 */ -#define TWO52 0x1.0p52 /* 2^52 */ -#define TWO57 0x1.0p57 /* 2^57 */ -#define TWO71 0x1.0p71 /* 2^71 */ -#define TWOM1032 0x1.0p-1032 /* 2^-1032 */ - -#endif diff --git a/libc/sysdeps/ieee754/dbl-64/mpatan.h b/libc/sysdeps/ieee754/dbl-64/mpatan.h index d8e758916..743a1b98c 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpatan.h +++ b/libc/sysdeps/ieee754/dbl-64/mpatan.h @@ -143,6 +143,3 @@ __atan_twonm1[33] = { /* 2n-1 */ #endif #endif - -#define ONE 1.0 -#define TWO 2.0 diff --git a/libc/sysdeps/ieee754/dbl-64/mpatan2.c b/libc/sysdeps/ieee754/dbl-64/mpatan2.c index 9084d98d6..c0b9aea1e 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpatan2.c +++ b/libc/sysdeps/ieee754/dbl-64/mpatan2.c @@ -49,8 +49,6 @@ void SECTION __mpatan2(mp_no *y, mp_no *x, mp_no *z, int p) { - static const double ZERO = 0.0, ONE = 1.0; - mp_no mpt1,mpt2,mpt3; diff --git a/libc/sysdeps/ieee754/dbl-64/mpexp.c b/libc/sysdeps/ieee754/dbl-64/mpexp.c index c4048207e..8d288ff9a 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpexp.c +++ b/libc/sysdeps/ieee754/dbl-64/mpexp.c @@ -30,48 +30,75 @@ #include "endian.h" #include "mpa.h" -#include "mpexp.h" #include <assert.h> #ifndef SECTION # define SECTION #endif -/* Multi-Precision exponential function subroutine (for p >= 4, */ -/* 2**(-55) <= abs(x) <= 1024). */ +/* Multi-Precision exponential function subroutine (for p >= 4, + 2**(-55) <= abs(x) <= 1024). */ void SECTION -__mpexp(mp_no *x, mp_no *y, int p) { - - int i,j,k,m,m1,m2,n; - double a,b; - static const int np[33] = {0,0,0,0,3,3,4,4,5,4,4,5,5,5,6,6,6,6,6,6, - 6,6,6,6,7,7,7,7,8,8,8,8,8}; - static const int m1p[33]= {0,0,0,0,17,23,23,28,27,38,42,39,43,47,43,47,50,54, - 57,60,64,67,71,74,68,71,74,77,70,73,76,78,81}; - static const int m1np[7][18] = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0,36,48,60,72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0,24,32,40,48,56,64,72, 0, 0, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0,17,23,29,35,41,47,53,59,65, 0, 0, 0, 0, 0}, - { 0, 0, 0, 0, 0, 0,23,28,33,38,42,47,52,57,62,66, 0, 0}, - { 0, 0, 0, 0, 0, 0, 0, 0,27, 0, 0,39,43,47,51,55,59,63}, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,43,47,50,54}}; - mp_no mpk = {0,{0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; - mp_no mps,mpak,mpt1,mpt2; +__mpexp (mp_no *x, mp_no *y, int p) +{ + int i, j, k, m, m1, m2, n; + double b; + static const int np[33] = + { + 0, 0, 0, 0, 3, 3, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8 + }; + static const int m1p[33] = + { + 0, 0, 0, 0, + 17, 23, 23, 28, + 27, 38, 42, 39, + 43, 47, 43, 47, + 50, 54, 57, 60, + 64, 67, 71, 74, + 68, 71, 74, 77, + 70, 73, 76, 78, + 81 + }; + static const int m1np[7][18] = + { + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 36, 48, 60, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 24, 32, 40, 48, 56, 64, 72, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 17, 23, 29, 35, 41, 47, 53, 59, 65, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 23, 28, 33, 38, 42, 47, 52, 57, 62, 66, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 39, 43, 47, 51, 55, 59, 63}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 47, 50, 54} + }; + mp_no mpk = + { + 0, + { + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + } + }; + mp_no mps, mpak, mpt1, mpt2; - /* Choose m,n and compute a=2**(-m) */ - n = np[p]; m1 = m1p[p]; a = __mpexp_twomm1[p].d; - for (i=0; i<EX; i++) a *= RADIXI; - for ( ; i>EX; i--) a *= RADIX; - b = X[1]*RADIXI; m2 = 24*EX; - for (; b<HALF; m2--) { a *= TWO; b *= TWO; } - if (b == HALF) { - for (i=2; i<=p; i++) { if (X[i]!=ZERO) break; } - if (i==p+1) { m2--; a *= TWO; } - } + /* Choose m,n and compute a=2**(-m). */ + n = np[p]; + m1 = m1p[p]; + b = X[1]; + m2 = 24 * EX; + for (; b < HALFRAD; m2--) + b *= TWO; + if (b == HALFRAD) + { + for (i = 2; i <= p; i++) + { + if (X[i] != ZERO) + break; + } + if (i == p + 1) + m2--; + } m = m1 + m2; if (__glibc_unlikely (m <= 0)) @@ -83,36 +110,47 @@ __mpexp(mp_no *x, mp_no *y, int p) { than 2^-55. */ assert (p < 18); m = 0; - a = ONE; for (i = n - 1; i > 0; i--, n--) if (m1np[i][p] + m2 > 0) break; } - /* Compute s=x*2**(-m). Put result in mps */ - __dbl_mp(a,&mpt1,p); - __mul(x,&mpt1,&mps,p); + /* Compute s=x*2**(-m). Put result in mps. */ + __pow_mp (-m, &mpt1, p); + __mul (x, &mpt1, &mps, p); - /* Evaluate the polynomial. Put result in mpt2 */ - mpk.e = 1; mpk.d[0] = ONE; mpk.d[1]=n; - __dvd(&mps,&mpk,&mpt1,p); - __add(&mpone,&mpt1,&mpak,p); - for (k=n-1; k>1; k--) { - __mul(&mps,&mpak,&mpt1,p); - mpk.d[1] = k; - __dvd(&mpt1,&mpk,&mpt2,p); - __add(&mpone,&mpt2,&mpak,p); - } - __mul(&mps,&mpak,&mpt1,p); - __add(&mpone,&mpt1,&mpt2,p); + /* Evaluate the polynomial. Put result in mpt2. */ + mpk.e = 1; + mpk.d[0] = ONE; + mpk.d[1] = n; + __dvd (&mps, &mpk, &mpt1, p); + __add (&mpone, &mpt1, &mpak, p); + for (k = n - 1; k > 1; k--) + { + __mul (&mps, &mpak, &mpt1, p); + mpk.d[1] = k; + __dvd (&mpt1, &mpk, &mpt2, p); + __add (&mpone, &mpt2, &mpak, p); + } + __mul (&mps, &mpak, &mpt1, p); + __add (&mpone, &mpt1, &mpt2, p); - /* Raise polynomial value to the power of 2**m. Put result in y */ - for (k=0,j=0; k<m; ) { - __mul(&mpt2,&mpt2,&mpt1,p); k++; - if (k==m) { j=1; break; } - __mul(&mpt1,&mpt1,&mpt2,p); k++; - } - if (j) __cpy(&mpt1,y,p); - else __cpy(&mpt2,y,p); + /* Raise polynomial value to the power of 2**m. Put result in y. */ + for (k = 0, j = 0; k < m;) + { + __mul (&mpt2, &mpt2, &mpt1, p); + k++; + if (k == m) + { + j = 1; + break; + } + __mul (&mpt1, &mpt1, &mpt2, p); + k++; + } + if (j) + __cpy (&mpt1, y, p); + else + __cpy (&mpt2, y, p); return; } diff --git a/libc/sysdeps/ieee754/dbl-64/mpexp.h b/libc/sysdeps/ieee754/dbl-64/mpexp.h deleted file mode 100644 index 2b2638607..000000000 --- a/libc/sysdeps/ieee754/dbl-64/mpexp.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * Written by International Business Machines Corp. - * Copyright (C) 2001-2013 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -/******************************************************************/ -/* */ -/* MODULE_NAME:mpexp.h */ -/* */ -/* common data and variables prototype and definition */ -/******************************************************************/ - -#ifndef MPEXP_H -#define MPEXP_H - -extern const number __mpexp_twomm1[33] attribute_hidden; - -#ifndef AVOID_MPEXP_H -#ifdef BIG_ENDI - const number - __mpexp_twomm1[33] = { /* 2**-m1 */ -/**/ {{0x3ff00000, 0x00000000} }, /* 1 */ -/**/ {{0x3ff00000, 0x00000000} }, /* 1 */ -/**/ {{0x3ff00000, 0x00000000} }, /* 1 */ -/**/ {{0x3ff00000, 0x00000000} }, /* 1 */ -/**/ {{0x3ee00000, 0x00000000} }, /* 2**-17 */ -/**/ {{0x3e800000, 0x00000000} }, /* 2**-23 */ -/**/ {{0x3e800000, 0x00000000} }, /* 2**-23 */ -/**/ {{0x3e300000, 0x00000000} }, /* 2**-28 */ -/**/ {{0x3e400000, 0x00000000} }, /* 2**-27 */ -/**/ {{0x3d900000, 0x00000000} }, /* 2**-38 */ -/**/ {{0x3d500000, 0x00000000} }, /* 2**-42 */ -/**/ {{0x3d800000, 0x00000000} }, /* 2**-39 */ -/**/ {{0x3d400000, 0x00000000} }, /* 2**-43 */ -/**/ {{0x3d000000, 0x00000000} }, /* 2**-47 */ -/**/ {{0x3d400000, 0x00000000} }, /* 2**-43 */ -/**/ {{0x3d000000, 0x00000000} }, /* 2**-47 */ -/**/ {{0x3cd00000, 0x00000000} }, /* 2**-50 */ -/**/ {{0x3c900000, 0x00000000} }, /* 2**-54 */ -/**/ {{0x3c600000, 0x00000000} }, /* 2**-57 */ -/**/ {{0x3c300000, 0x00000000} }, /* 2**-60 */ -/**/ {{0x3bf00000, 0x00000000} }, /* 2**-64 */ -/**/ {{0x3bc00000, 0x00000000} }, /* 2**-67 */ -/**/ {{0x3b800000, 0x00000000} }, /* 2**-71 */ -/**/ {{0x3b500000, 0x00000000} }, /* 2**-74 */ -/**/ {{0x3bb00000, 0x00000000} }, /* 2**-68 */ -/**/ {{0x3b800000, 0x00000000} }, /* 2**-71 */ -/**/ {{0x3b500000, 0x00000000} }, /* 2**-74 */ -/**/ {{0x3b200000, 0x00000000} }, /* 2**-77 */ -/**/ {{0x3b900000, 0x00000000} }, /* 2**-70 */ -/**/ {{0x3b600000, 0x00000000} }, /* 2**-73 */ -/**/ {{0x3b300000, 0x00000000} }, /* 2**-76 */ -/**/ {{0x3b100000, 0x00000000} }, /* 2**-78 */ -/**/ {{0x3ae00000, 0x00000000} }, /* 2**-81 */ - }; - -#else -#ifdef LITTLE_ENDI - const number - __mpexp_twomm1[33] = { /* 2**-m1 */ -/**/ {{0x00000000, 0x3ff00000} }, /* 1 */ -/**/ {{0x00000000, 0x3ff00000} }, /* 1 */ -/**/ {{0x00000000, 0x3ff00000} }, /* 1 */ -/**/ {{0x00000000, 0x3ff00000} }, /* 1 */ -/**/ {{0x00000000, 0x3ee00000} }, /* 2**-17 */ -/**/ {{0x00000000, 0x3e800000} }, /* 2**-23 */ -/**/ {{0x00000000, 0x3e800000} }, /* 2**-23 */ -/**/ {{0x00000000, 0x3e300000} }, /* 2**-28 */ -/**/ {{0x00000000, 0x3e400000} }, /* 2**-27 */ -/**/ {{0x00000000, 0x3d900000} }, /* 2**-38 */ -/**/ {{0x00000000, 0x3d500000} }, /* 2**-42 */ -/**/ {{0x00000000, 0x3d800000} }, /* 2**-39 */ -/**/ {{0x00000000, 0x3d400000} }, /* 2**-43 */ -/**/ {{0x00000000, 0x3d000000} }, /* 2**-47 */ -/**/ {{0x00000000, 0x3d400000} }, /* 2**-43 */ -/**/ {{0x00000000, 0x3d000000} }, /* 2**-47 */ -/**/ {{0x00000000, 0x3cd00000} }, /* 2**-50 */ -/**/ {{0x00000000, 0x3c900000} }, /* 2**-54 */ -/**/ {{0x00000000, 0x3c600000} }, /* 2**-57 */ -/**/ {{0x00000000, 0x3c300000} }, /* 2**-60 */ -/**/ {{0x00000000, 0x3bf00000} }, /* 2**-64 */ -/**/ {{0x00000000, 0x3bc00000} }, /* 2**-67 */ -/**/ {{0x00000000, 0x3b800000} }, /* 2**-71 */ -/**/ {{0x00000000, 0x3b500000} }, /* 2**-74 */ -/**/ {{0x00000000, 0x3bb00000} }, /* 2**-68 */ -/**/ {{0x00000000, 0x3b800000} }, /* 2**-71 */ -/**/ {{0x00000000, 0x3b500000} }, /* 2**-74 */ -/**/ {{0x00000000, 0x3b200000} }, /* 2**-77 */ -/**/ {{0x00000000, 0x3b900000} }, /* 2**-70 */ -/**/ {{0x00000000, 0x3b600000} }, /* 2**-73 */ -/**/ {{0x00000000, 0x3b300000} }, /* 2**-76 */ -/**/ {{0x00000000, 0x3b100000} }, /* 2**-78 */ -/**/ {{0x00000000, 0x3ae00000} }, /* 2**-81 */ - }; - -#endif -#endif -#endif - -#define RADIX 0x1.0p24 /* 2^24 */ -#define RADIXI 0x1.0p-24 /* 2^-24 */ -#define ZERO 0.0 /* 0 */ -#define ONE 1.0 /* 1 */ -#define TWO 2.0 /* 2 */ -#define HALF 0x1.0p-1 /* 1/2 */ - -#endif diff --git a/libc/sysdeps/ieee754/dbl-64/mpsqrt.h b/libc/sysdeps/ieee754/dbl-64/mpsqrt.h index c7354970a..2b83c4cbf 100644 --- a/libc/sysdeps/ieee754/dbl-64/mpsqrt.h +++ b/libc/sysdeps/ieee754/dbl-64/mpsqrt.h @@ -35,7 +35,4 @@ extern const int __mpsqrt_mp[33] attribute_hidden; 4,4,4,4,4,4,4,4,4}; #endif -#define ONE 1.0 /* 1 */ -#define HALFRAD 0x1.0p23 /* 2^23 */ - #endif diff --git a/libc/sysdeps/ieee754/dbl-64/mptan.c b/libc/sysdeps/ieee754/dbl-64/mptan.c index 6e08b0dc8..234108e37 100644 --- a/libc/sysdeps/ieee754/dbl-64/mptan.c +++ b/libc/sysdeps/ieee754/dbl-64/mptan.c @@ -47,8 +47,6 @@ void SECTION __mptan(double x, mp_no *mpy, int p) { - static const double MONE = -1.0; - int n; mp_no mpw, mpc, mps; diff --git a/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c b/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c index 60afafded..eb40c298b 100644 --- a/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c +++ b/libc/sysdeps/ieee754/dbl-64/s_nearbyint.c @@ -44,10 +44,10 @@ double __nearbyint(double x) j0 = ((i0>>20)&0x7ff)-0x3ff; if(j0<52) { if(j0<0) { - feholdexcept (&env); + libc_feholdexcept (&env); w = TWO52[sx]+x; t = w-TWO52[sx]; - fesetenv (&env); + libc_fesetenv (&env); GET_HIGH_WORD(i0,t); SET_HIGH_WORD(t,(i0&0x7fffffff)|(sx<<31)); return t; @@ -56,10 +56,10 @@ double __nearbyint(double x) if(j0==0x400) return x+x; /* inf or NaN */ else return x; /* x is integral */ } - feholdexcept (&env); + libc_feholdexcept (&env); w = TWO52[sx]+x; t = w-TWO52[sx]; - fesetenv (&env); + libc_fesetenv (&env); return t; } weak_alias (__nearbyint, nearbyint) diff --git a/libc/sysdeps/ieee754/dbl-64/ulog.h b/libc/sysdeps/ieee754/dbl-64/ulog.h index 5afda3cda..eec1eef67 100644 --- a/libc/sysdeps/ieee754/dbl-64/ulog.h +++ b/libc/sysdeps/ieee754/dbl-64/ulog.h @@ -173,10 +173,6 @@ #endif #endif -#define ZERO 0.0 /* 0 */ -#define ONE 1.0 /* 1 */ -#define HALF 0x1.0p-1 /* 1/2 */ -#define MHALF -0x1.0p-1 /* -1/2 */ #define SQRT_2 sqrt_2.d #define DEL_U delu.d #define DEL_V delv.d diff --git a/libc/sysdeps/ieee754/dbl-64/utan.h b/libc/sysdeps/ieee754/dbl-64/utan.h index 5ab573ea2..3bdeee1c4 100644 --- a/libc/sysdeps/ieee754/dbl-64/utan.h +++ b/libc/sysdeps/ieee754/dbl-64/utan.h @@ -262,10 +262,4 @@ #endif #endif - -#define ZERO 0.0 -#define ONE 1.0 -#define MONE -1.0 -#define TWO8 0x1.0p8 /* 2^8 */ - #endif diff --git a/libc/sysdeps/powerpc/bits/mathdef.h b/libc/sysdeps/powerpc/bits/mathdef.h index 389d09902..b3c21fee6 100644 --- a/libc/sysdeps/powerpc/bits/mathdef.h +++ b/libc/sysdeps/powerpc/bits/mathdef.h @@ -37,10 +37,10 @@ typedef double double_t; # define FP_ILOGB0 (-2147483647) # define FP_ILOGBNAN (2147483647) -#ifndef __NO_FPRS__ +# if !defined _SOFT_FLOAT && !defined __NO_FPRS__ /* The powerpc has a combined multiply/add instruction. */ # define FP_FAST_FMA 1 # define FP_FAST_FMAF 1 -#endif +# endif #endif /* ISO C99 */ diff --git a/libc/sysdeps/powerpc/fpu/bits/fenvinline.h b/libc/sysdeps/powerpc/fpu/bits/fenvinline.h index 1bab7d64d..0720795d5 100644 --- a/libc/sysdeps/powerpc/fpu/bits/fenvinline.h +++ b/libc/sysdeps/powerpc/fpu/bits/fenvinline.h @@ -16,8 +16,8 @@ License along with the GNU C Library; if not, see <http://www.gnu.org/licenses/>. */ -#if defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__ \ - && !defined __NO_MATH_INLINES +#if (defined __GNUC__ && !defined _SOFT_FLOAT && !defined __NO_FPRS__ \ + && !defined __NO_MATH_INLINES) /* Inline definition for fegetround. */ # define fegetround() \ @@ -57,4 +57,4 @@ : 0) \ : (feclearexcept) (__excepts)) -#endif /* __GNUC__ && !__NO_FPRS__ */ +#endif /* __GNUC__ && !_SOFT_FLOAT && !__NO_FPRS__ */ diff --git a/libc/sysdeps/powerpc/fpu/bits/mathinline.h b/libc/sysdeps/powerpc/fpu/bits/mathinline.h index 75f2983f7..140fff08e 100644 --- a/libc/sysdeps/powerpc/fpu/bits/mathinline.h +++ b/libc/sysdeps/powerpc/fpu/bits/mathinline.h @@ -127,4 +127,4 @@ __NTH (fdimf (float __x, float __y)) #endif /* __USE_ISOC99 */ #endif /* !__NO_MATH_INLINES && __OPTIMIZE__ */ -#endif /* __GNUC__ && !__NO_FPRS__ */ +#endif /* __GNUC__ && !_SOFT_FLOAT && !__NO_FPRS__ */ diff --git a/libc/sysdeps/powerpc/fpu/fpu_control.h b/libc/sysdeps/powerpc/fpu/fpu_control.h index bb8375ab8..d03b8eb80 100644 --- a/libc/sysdeps/powerpc/fpu/fpu_control.h +++ b/libc/sysdeps/powerpc/fpu/fpu_control.h @@ -21,20 +21,20 @@ #ifdef _SOFT_FLOAT -#define _FPU_RESERVED 0xffffffff -#define _FPU_DEFAULT 0x00000000 /* Default value. */ +# define _FPU_RESERVED 0xffffffff +# define _FPU_DEFAULT 0x00000000 /* Default value. */ typedef unsigned int fpu_control_t; -#define _FPU_GETCW(cw) 0 -#define _FPU_SETCW(cw) do { } while (0) +# define _FPU_GETCW(cw) (cw) = 0 +# define _FPU_SETCW(cw) (void) (cw) extern fpu_control_t __fpu_control; #elif defined __NO_FPRS__ /* E500 */ /* rounding control */ -#define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ -#define _FPU_RC_DOWN 0x03 -#define _FPU_RC_UP 0x02 -#define _FPU_RC_ZERO 0x01 +# define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ +# define _FPU_RC_DOWN 0x03 +# define _FPU_RC_UP 0x02 +# define _FPU_RC_ZERO 0x01 /* masking of interrupts */ #define _FPU_MASK_ZM 0x10 /* zero divide */ @@ -71,41 +71,41 @@ extern fpu_control_t __fpu_control; #else /* PowerPC 6xx floating-point. */ /* rounding control */ -#define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ -#define _FPU_RC_DOWN 0x03 -#define _FPU_RC_UP 0x02 -#define _FPU_RC_ZERO 0x01 +# define _FPU_RC_NEAREST 0x00 /* RECOMMENDED */ +# define _FPU_RC_DOWN 0x03 +# define _FPU_RC_UP 0x02 +# define _FPU_RC_ZERO 0x01 -#define _FPU_MASK_NI 0x04 /* non-ieee mode */ +# define _FPU_MASK_NI 0x04 /* non-ieee mode */ /* masking of interrupts */ -#define _FPU_MASK_ZM 0x10 /* zero divide */ -#define _FPU_MASK_OM 0x40 /* overflow */ -#define _FPU_MASK_UM 0x20 /* underflow */ -#define _FPU_MASK_XM 0x08 /* inexact */ -#define _FPU_MASK_IM 0x80 /* invalid operation */ +# define _FPU_MASK_ZM 0x10 /* zero divide */ +# define _FPU_MASK_OM 0x40 /* overflow */ +# define _FPU_MASK_UM 0x20 /* underflow */ +# define _FPU_MASK_XM 0x08 /* inexact */ +# define _FPU_MASK_IM 0x80 /* invalid operation */ -#define _FPU_RESERVED 0xffffff00 /* These bits are reserved are not changed. */ +# define _FPU_RESERVED 0xffffff00 /* These bits are reserved are not changed. */ /* The fdlibm code requires no interrupts for exceptions. */ -#define _FPU_DEFAULT 0x00000000 /* Default value. */ +# define _FPU_DEFAULT 0x00000000 /* Default value. */ /* IEEE: same as above, but (some) exceptions; we leave the 'inexact' exception off. */ -#define _FPU_IEEE 0x000000f0 +# define _FPU_IEEE 0x000000f0 /* Type of the control word. */ typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__))); /* Macros for accessing the hardware control word. */ -#define _FPU_GETCW(__cw) ( { \ +# define _FPU_GETCW(__cw) ( { \ union { double d; fpu_control_t cw[2]; } \ tmp __attribute__ ((__aligned__(8))); \ __asm__ ("mffs 0; stfd%U0 0,%0" : "=m" (tmp.d) : : "fr0"); \ (__cw)=tmp.cw[1]; \ tmp.cw[1]; } ) -#define _FPU_SETCW(__cw) { \ +# define _FPU_SETCW(__cw) { \ union { double d; fpu_control_t cw[2]; } \ tmp __attribute__ ((__aligned__(8))); \ tmp.cw[0] = 0xFFF80000; /* More-or-less arbitrary; this is a QNaN. */ \ @@ -116,6 +116,6 @@ typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__))); /* Default control word set at startup. */ extern fpu_control_t __fpu_control; -#endif +#endif /* PowerPC 6xx floating-point. */ #endif /* _FPU_CONTROL_H */ diff --git a/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c b/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c index 9fcaa763c..16cb57785 100644 --- a/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c +++ b/libc/sysdeps/powerpc/powerpc32/power4/fpu/mpa.c @@ -51,91 +51,135 @@ const mp_no mptwo = {1, {1.0, 2.0}}; /* Compare mantissa of two multiple precision numbers regardless of the sign and exponent of the numbers. */ -static int mcr(const mp_no *x, const mp_no *y, int p) { +static int +mcr (const mp_no *x, const mp_no *y, int p) +{ long i; long p2 = p; - for (i=1; i<=p2; i++) { - if (X[i] == Y[i]) continue; - else if (X[i] > Y[i]) return 1; - else return -1; } + for (i = 1; i <= p2; i++) + { + if (X[i] == Y[i]) + continue; + else if (X[i] > Y[i]) + return 1; + else + return -1; + } return 0; } /* Compare the absolute values of two multiple precision numbers. */ -int __acr(const mp_no *x, const mp_no *y, int p) { +int +__acr (const mp_no *x, const mp_no *y, int p) +{ long i; - if (X[0] == ZERO) { - if (Y[0] == ZERO) i= 0; - else i=-1; - } - else if (Y[0] == ZERO) i= 1; - else { - if (EX > EY) i= 1; - else if (EX < EY) i=-1; - else i= mcr(x,y,p); - } + if (X[0] == ZERO) + { + if (Y[0] == ZERO) + i = 0; + else + i = -1; + } + else if (Y[0] == ZERO) + i = 1; + else + { + if (EX > EY) + i = 1; + else if (EX < EY) + i = -1; + else + i = mcr (x, y, p); + } return i; } /* Copy multiple precision number X into Y. They could be the same number. */ -void __cpy(const mp_no *x, mp_no *y, int p) { +void +__cpy (const mp_no *x, mp_no *y, int p) +{ long i; EY = EX; - for (i=0; i <= p; i++) Y[i] = X[i]; + for (i = 0; i <= p; i++) + Y[i] = X[i]; return; } /* Convert a multiple precision number *X into a double precision number *Y, normalized case (|x| >= 2**(-1022))). */ -static void norm(const mp_no *x, double *y, int p) +static void +norm (const mp_no *x, double *y, int p) { - #define R RADIXI +#define R RADIXI long i; - double a,c,u,v,z[5]; - if (p<5) { - if (p==1) c = X[1]; - else if (p==2) c = X[1] + R* X[2]; - else if (p==3) c = X[1] + R*(X[2] + R* X[3]); - else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]); - } - else { - for (a=ONE, z[1]=X[1]; z[1] < TWO23; ) - {a *= TWO; z[1] *= TWO; } - - for (i=2; i<5; i++) { - z[i] = X[i]*a; - u = (z[i] + CUTTER)-CUTTER; - if (u > z[i]) u -= RADIX; - z[i] -= u; - z[i-1] += u*RADIXI; - } - - u = (z[3] + TWO71) - TWO71; - if (u > z[3]) u -= TWO19; - v = z[3]-u; - - if (v == TWO18) { - if (z[4] == ZERO) { - for (i=5; i <= p; i++) { - if (X[i] == ZERO) continue; - else {z[3] += ONE; break; } - } - } - else z[3] += ONE; - } - - c = (z[1] + R *(z[2] + R * z[3]))/a; - } + double a, c, u, v, z[5]; + if (p < 5) + { + if (p == 1) + c = X[1]; + else if (p == 2) + c = X[1] + R * X[2]; + else if (p == 3) + c = X[1] + R * (X[2] + R * X[3]); + else if (p == 4) + c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]); + } + else + { + for (a = ONE, z[1] = X[1]; z[1] < TWO23;) + { + a *= TWO; + z[1] *= TWO; + } + + for (i = 2; i < 5; i++) + { + z[i] = X[i] * a; + u = (z[i] + CUTTER) - CUTTER; + if (u > z[i]) + u -= RADIX; + z[i] -= u; + z[i - 1] += u * RADIXI; + } + + u = (z[3] + TWO71) - TWO71; + if (u > z[3]) + u -= TWO19; + v = z[3] - u; + + if (v == TWO18) + { + if (z[4] == ZERO) + { + for (i = 5; i <= p; i++) + { + if (X[i] == ZERO) + continue; + else + { + z[3] += ONE; + break; + } + } + } + else + z[3] += ONE; + } + + c = (z[1] + R * (z[2] + R * z[3])) / a; + } c *= X[0]; - for (i=1; i<EX; i++) c *= RADIX; - for (i=1; i>EX; i--) c *= RADIXI; + for (i = 1; i < EX; i++) + c *= RADIX; + for (i = 1; i > EX; i--) + c *= RADIXI; *y = c; return; @@ -144,46 +188,112 @@ static void norm(const mp_no *x, double *y, int p) /* Convert a multiple precision number *X into a double precision number *Y, Denormal case (|x| < 2**(-1022))). */ -static void denorm(const mp_no *x, double *y, int p) +static void +denorm (const mp_no *x, double *y, int p) { - long i,k; + long i, k; long p2 = p; - double c,u,z[5]; + double c, u, z[5]; #define R RADIXI - if (EX<-44 || (EX==-44 && X[1]<TWO5)) - { *y=ZERO; return; } - - if (p2==1) { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;} - else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} - } - else if (p2==2) { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;} - else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} - } - else { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;} - else {z[1]= TWO10; z[2]=ZERO; k=1;} - z[3] = X[k]; - } + if (EX < -44 || (EX == -44 && X[1] < TWO5)) + { + *y = ZERO; + return; + } + + if (p2 == 1) + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = ZERO; + z[3] = ZERO; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + z[3] = ZERO; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + z[3] = X[1]; + k = 1; + } + } + else if (p2 == 2) + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = X[2]; + z[3] = ZERO; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + z[3] = X[2]; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + z[3] = X[1]; + k = 1; + } + } + else + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = X[2]; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + k = 1; + } + z[3] = X[k]; + } u = (z[3] + TWO57) - TWO57; - if (u > z[3]) u -= TWO5; + if (u > z[3]) + u -= TWO5; - if (u==z[3]) { - for (i=k+1; i <= p2; i++) { - if (X[i] == ZERO) continue; - else {z[3] += ONE; break; } + if (u == z[3]) + { + for (i = k + 1; i <= p2; i++) + { + if (X[i] == ZERO) + continue; + else + { + z[3] += ONE; + break; + } + } } - } - c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); + c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10); - *y = c*TWOM1032; + *y = c * TWOM1032; return; #undef R @@ -191,39 +301,65 @@ static void denorm(const mp_no *x, double *y, int p) /* Convert multiple precision number *X into double precision number *Y. The result is correctly rounded to the nearest/even. */ -void __mp_dbl(const mp_no *x, double *y, int p) { - - if (X[0] == ZERO) {*y = ZERO; return; } +void +__mp_dbl (const mp_no *x, double *y, int p) +{ + if (X[0] == ZERO) + { + *y = ZERO; + return; + } - if (EX> -42) norm(x,y,p); - else if (EX==-42 && X[1]>=TWO10) norm(x,y,p); - else denorm(x,y,p); + if (EX > -42) + norm (x, y, p); + else if (EX == -42 && X[1] >= TWO10) + norm (x, y, p); + else + denorm (x, y, p); } /* Get the multiple precision equivalent of X into *Y. If the precision is too small, the result is truncated. */ -void __dbl_mp(double x, mp_no *y, int p) { - - long i,n; +void +__dbl_mp (double x, mp_no *y, int p) +{ + long i, n; long p2 = p; double u; /* Sign. */ - if (x == ZERO) {Y[0] = ZERO; return; } - else if (x > ZERO) Y[0] = ONE; - else {Y[0] = MONE; x=-x; } + if (x == ZERO) + { + Y[0] = ZERO; + return; + } + else if (x > ZERO) + Y[0] = ONE; + else + { + Y[0] = MONE; + x = -x; + } /* Exponent. */ - for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI; - for ( ; x < ONE; EY -= ONE) x *= RADIX; + for (EY = ONE; x >= RADIX; EY += ONE) + x *= RADIXI; + for (; x < ONE; EY -= ONE) + x *= RADIX; /* Digits. */ - n=MIN(p2,4); - for (i=1; i<=n; i++) { - u = (x + TWO52) - TWO52; - if (u>x) u -= ONE; - Y[i] = u; x -= u; x *= RADIX; } - for ( ; i<=p2; i++) Y[i] = ZERO; + n = MIN (p2, 4); + for (i = 1; i <= n; i++) + { + u = (x + TWO52) - TWO52; + if (u > x) + u -= ONE; + Y[i] = u; + x -= u; + x *= RADIX; + } + for (; i <= p2; i++) + Y[i] = ZERO; return; } @@ -231,93 +367,132 @@ void __dbl_mp(double x, mp_no *y, int p) { sign of the sum *Z is not changed. X and Y may overlap but not X and Z or Y and Z. No guard digit is used. The result equals the exact sum, truncated. */ -static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { - - long i,j,k; +static void +add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, j, k; long p2 = p; EZ = EX; - i=p2; j=p2+ EY - EX; k=p2+1; - - if (j<1) - {__cpy(x,z,p); return; } - else Z[k] = ZERO; - - for (; j>0; i--,j--) { - Z[k] += X[i] + Y[j]; - if (Z[k] >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; } - else - Z[--k] = ZERO; - } - - for (; i>0; i--) { - Z[k] += X[i]; - if (Z[k] >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; } - else - Z[--k] = ZERO; - } - - if (Z[1] == ZERO) { - for (i=1; i<=p2; i++) Z[i] = Z[i+1]; } - else EZ += ONE; + i = p2; + j = p2 + EY - EX; + k = p2 + 1; + + if (j < 1) + { + __cpy (x, z, p); + return; + } + else + Z[k] = ZERO; + + for (; j > 0; i--, j--) + { + Z[k] += X[i] + Y[j]; + if (Z[k] >= RADIX) + { + Z[k] -= RADIX; + Z[--k] = ONE; + } + else + Z[--k] = ZERO; + } + + for (; i > 0; i--) + { + Z[k] += X[i]; + if (Z[k] >= RADIX) + { + Z[k] -= RADIX; + Z[--k] = ONE; + } + else + Z[--k] = ZERO; + } + + if (Z[1] == ZERO) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + } + else + EZ += ONE; } /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. The sign of the difference *Z is not changed. X and Y may overlap but not X and Z or Y and Z. One guard digit is used. The error is less than one ULP. */ -static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { - - long i,j,k; +static void +sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, j, k; long p2 = p; EZ = EX; - if (EX == EY) { - i=j=k=p2; - Z[k] = Z[k+1] = ZERO; } - else { - j= EX - EY; - if (j > p2) {__cpy(x,z,p); return; } - else { - i=p2; j=p2+1-j; k=p2; - if (Y[j] > ZERO) { - Z[k+1] = RADIX - Y[j--]; - Z[k] = MONE; } - else { - Z[k+1] = ZERO; - Z[k] = ZERO; j--;} - } - } - - for (; j>0; i--,j--) { - Z[k] += (X[i] - Y[j]); - if (Z[k] < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; } - else - Z[--k] = ZERO; - } - - for (; i>0; i--) { - Z[k] += X[i]; - if (Z[k] < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; } - else - Z[--k] = ZERO; - } - - for (i=1; Z[i] == ZERO; i++) ; + if (EX == EY) + { + i = j = k = p2; + Z[k] = Z[k + 1] = ZERO; + } + else + { + j = EX - EY; + if (j > p2) + { + __cpy (x, z, p); + return; + } + else + { + i = p2; + j = p2 + 1 - j; + k = p2; + if (Y[j] > ZERO) + { + Z[k + 1] = RADIX - Y[j--]; + Z[k] = MONE; + } + else + { + Z[k + 1] = ZERO; + Z[k] = ZERO; + j--; + } + } + } + + for (; j > 0; i--, j--) + { + Z[k] += (X[i] - Y[j]); + if (Z[k] < ZERO) + { + Z[k] += RADIX; + Z[--k] = MONE; + } + else + Z[--k] = ZERO; + } + + for (; i > 0; i--) + { + Z[k] += X[i]; + if (Z[k] < ZERO) + { + Z[k] += RADIX; + Z[--k] = MONE; + } + else + Z[--k] = ZERO; + } + + for (i = 1; Z[i] == ZERO; i++); EZ = EZ - i + 1; - for (k=1; i <= p2+1; ) + for (k = 1; i <= p2 + 1;) Z[k++] = Z[i++]; - for (; k <= p2; ) + for (; k <= p2;) Z[k++] = ZERO; return; @@ -326,111 +501,186 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X and Z or Y and Z. One guard digit is used. The error is less than one ULP. */ -void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__add (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ int n; - if (X[0] == ZERO) {__cpy(y,z,p); return; } - else if (Y[0] == ZERO) {__cpy(x,z,p); return; } - - if (X[0] == Y[0]) { - if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } - else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; } - } - else { - if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } - else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } - else Z[0] = ZERO; - } + if (X[0] == ZERO) + { + __cpy (y, z, p); + return; + } + else if (Y[0] == ZERO) + { + __cpy (x, z, p); + return; + } + + if (X[0] == Y[0]) + { + if (__acr (x, y, p) > 0) + { + add_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else + { + add_magnitudes (y, x, z, p); + Z[0] = Y[0]; + } + } + else + { + if ((n = __acr (x, y, p)) == 1) + { + sub_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else if (n == -1) + { + sub_magnitudes (y, x, z, p); + Z[0] = Y[0]; + } + else + Z[0] = ZERO; + } return; } /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but not X and Z or Y and Z. One guard digit is used. The error is less than one ULP. */ -void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__sub (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ int n; - if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; } - else if (Y[0] == ZERO) {__cpy(x,z,p); return; } - - if (X[0] != Y[0]) { - if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } - else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; } - } - else { - if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } - else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } - else Z[0] = ZERO; - } + if (X[0] == ZERO) + { + __cpy (y, z, p); + Z[0] = -Z[0]; + return; + } + else if (Y[0] == ZERO) + { + __cpy (x, z, p); + return; + } + + if (X[0] != Y[0]) + { + if (__acr (x, y, p) > 0) + { + add_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else + { + add_magnitudes (y, x, z, p); + Z[0] = -Y[0]; + } + } + else + { + if ((n = __acr (x, y, p)) == 1) + { + sub_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else if (n == -1) + { + sub_magnitudes (y, x, z, p); + Z[0] = -Y[0]; + } + else + Z[0] = ZERO; + } return; } /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the error is bounded by 1.001 ULP. */ -void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ long i, i1, i2, j, k, k2; long p2 = p; double u, zk, zk2; - /* Is z=0? */ - if (X[0]*Y[0]==ZERO) - { Z[0]=ZERO; return; } + /* Is z=0? */ + if (X[0] * Y[0] == ZERO) + { + Z[0] = ZERO; + return; + } - /* Multiply, add and carry */ - k2 = (p2<3) ? p2+p2 : p2+3; - zk = Z[k2]=ZERO; - for (k=k2; k>1; ) { - if (k > p2) {i1=k-p2; i2=p2+1; } - else {i1=1; i2=k; } -#if 1 - /* Rearrange this inner loop to allow the fmadd instructions to be - independent and execute in parallel on processors that have - dual symmetrical FP pipelines. */ - if (i1 < (i2-1)) + /* Multiply, add and carry */ + k2 = (p2 < 3) ? p2 + p2 : p2 + 3; + zk = Z[k2] = ZERO; + for (k = k2; k > 1;) { - /* Make sure we have at least 2 iterations. */ - if (((i2 - i1) & 1L) == 1L) + if (k > p2) { - /* Handle the odd iterations case. */ - zk2 = x->d[i2-1]*y->d[i1]; + i1 = k - p2; + i2 = p2 + 1; } - else - zk2 = 0.0; - /* Do two multiply/adds per loop iteration, using independent - accumulators; zk and zk2. */ - for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2) + else { - zk += x->d[i]*y->d[j]; - zk2 += x->d[i+1]*y->d[j-1]; + i1 = 1; + i2 = k; + } +#if 1 + /* Rearrange this inner loop to allow the fmadd instructions to be + independent and execute in parallel on processors that have + dual symmetrical FP pipelines. */ + if (i1 < (i2 - 1)) + { + /* Make sure we have at least 2 iterations. */ + if (((i2 - i1) & 1L) == 1L) + { + /* Handle the odd iterations case. */ + zk2 = x->d[i2 - 1] * y->d[i1]; + } + else + zk2 = 0.0; + /* Do two multiply/adds per loop iteration, using independent + accumulators; zk and zk2. */ + for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) + { + zk += x->d[i] * y->d[j]; + zk2 += x->d[i + 1] * y->d[j - 1]; + } + zk += zk2; /* Final sum. */ + } + else + { + /* Special case when iterations is 1. */ + zk += x->d[i1] * y->d[i1]; } - zk += zk2; /* Final sum. */ - } - else - { - /* Special case when iterations is 1. */ - zk += x->d[i1]*y->d[i1]; - } #else - /* The original code. */ - for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j]; + /* The original code. */ + for (i = i1, j = i2 - 1; i < i2; i++, j--) + zk += X[i] * Y[j]; #endif - u = (zk + CUTTER)-CUTTER; - if (u > zk) u -= RADIX; - Z[k] = zk - u; - zk = u*RADIXI; - --k; - } + u = (zk + CUTTER) - CUTTER; + if (u > zk) + u -= RADIX; + Z[k] = zk - u; + zk = u * RADIXI; + --k; + } Z[k] = zk; /* Is there a carry beyond the most significant digit? */ - if (Z[1] == ZERO) { - for (i=1; i<=p2; i++) Z[i]=Z[i+1]; - EZ = EX + EY - 1; } + if (Z[1] == ZERO) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + EZ = EX + EY - 1; + } else EZ = EX + EY; @@ -444,26 +694,31 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { - For P > 3: 2.001 * R ^ (1 - P) *X = 0 is not permissible. */ -void __inv(const mp_no *x, mp_no *y, int p) { +void +__inv (const mp_no *x, mp_no *y, int p) +{ long i; double t; - mp_no z,w; - static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; - const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; - - __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); - t=ONE/t; __dbl_mp(t,y,p); EY -= EX; - - for (i=0; i<np1[p]; i++) { - __cpy(y,&w,p); - __mul(x,&w,y,p); - __sub(&mptwo,y,&z,p); - __mul(&w,&z,y,p); - } + mp_no z, w; + static const int np1[] = + { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 + }; + + __cpy (x, &z, p); + z.e = 0; + __mp_dbl (&z, &t, p); + t = ONE / t; + __dbl_mp (t, y, p); + EY -= EX; + + for (i = 0; i < np1[p]; i++) + { + __cpy (y, &w, p); + __mul (x, &w, y, p); + __sub (&mptwo, y, &z, p); + __mul (&w, &z, y, p); + } return; } @@ -474,11 +729,17 @@ void __inv(const mp_no *x, mp_no *y, int p) { - For P > 3: 3.001 * R ^ (1 - P) *X = 0 is not permissible. */ -void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ mp_no w; - if (X[0] == ZERO) Z[0] = ZERO; - else {__inv(y,&w,p); __mul(x,&w,z,p);} + if (X[0] == ZERO) + Z[0] = ZERO; + else + { + __inv (y, &w, p); + __mul (x, &w, z, p); + } return; } diff --git a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c index 9fcaa763c..16cb57785 100644 --- a/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c +++ b/libc/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c @@ -51,91 +51,135 @@ const mp_no mptwo = {1, {1.0, 2.0}}; /* Compare mantissa of two multiple precision numbers regardless of the sign and exponent of the numbers. */ -static int mcr(const mp_no *x, const mp_no *y, int p) { +static int +mcr (const mp_no *x, const mp_no *y, int p) +{ long i; long p2 = p; - for (i=1; i<=p2; i++) { - if (X[i] == Y[i]) continue; - else if (X[i] > Y[i]) return 1; - else return -1; } + for (i = 1; i <= p2; i++) + { + if (X[i] == Y[i]) + continue; + else if (X[i] > Y[i]) + return 1; + else + return -1; + } return 0; } /* Compare the absolute values of two multiple precision numbers. */ -int __acr(const mp_no *x, const mp_no *y, int p) { +int +__acr (const mp_no *x, const mp_no *y, int p) +{ long i; - if (X[0] == ZERO) { - if (Y[0] == ZERO) i= 0; - else i=-1; - } - else if (Y[0] == ZERO) i= 1; - else { - if (EX > EY) i= 1; - else if (EX < EY) i=-1; - else i= mcr(x,y,p); - } + if (X[0] == ZERO) + { + if (Y[0] == ZERO) + i = 0; + else + i = -1; + } + else if (Y[0] == ZERO) + i = 1; + else + { + if (EX > EY) + i = 1; + else if (EX < EY) + i = -1; + else + i = mcr (x, y, p); + } return i; } /* Copy multiple precision number X into Y. They could be the same number. */ -void __cpy(const mp_no *x, mp_no *y, int p) { +void +__cpy (const mp_no *x, mp_no *y, int p) +{ long i; EY = EX; - for (i=0; i <= p; i++) Y[i] = X[i]; + for (i = 0; i <= p; i++) + Y[i] = X[i]; return; } /* Convert a multiple precision number *X into a double precision number *Y, normalized case (|x| >= 2**(-1022))). */ -static void norm(const mp_no *x, double *y, int p) +static void +norm (const mp_no *x, double *y, int p) { - #define R RADIXI +#define R RADIXI long i; - double a,c,u,v,z[5]; - if (p<5) { - if (p==1) c = X[1]; - else if (p==2) c = X[1] + R* X[2]; - else if (p==3) c = X[1] + R*(X[2] + R* X[3]); - else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]); - } - else { - for (a=ONE, z[1]=X[1]; z[1] < TWO23; ) - {a *= TWO; z[1] *= TWO; } - - for (i=2; i<5; i++) { - z[i] = X[i]*a; - u = (z[i] + CUTTER)-CUTTER; - if (u > z[i]) u -= RADIX; - z[i] -= u; - z[i-1] += u*RADIXI; - } - - u = (z[3] + TWO71) - TWO71; - if (u > z[3]) u -= TWO19; - v = z[3]-u; - - if (v == TWO18) { - if (z[4] == ZERO) { - for (i=5; i <= p; i++) { - if (X[i] == ZERO) continue; - else {z[3] += ONE; break; } - } - } - else z[3] += ONE; - } - - c = (z[1] + R *(z[2] + R * z[3]))/a; - } + double a, c, u, v, z[5]; + if (p < 5) + { + if (p == 1) + c = X[1]; + else if (p == 2) + c = X[1] + R * X[2]; + else if (p == 3) + c = X[1] + R * (X[2] + R * X[3]); + else if (p == 4) + c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]); + } + else + { + for (a = ONE, z[1] = X[1]; z[1] < TWO23;) + { + a *= TWO; + z[1] *= TWO; + } + + for (i = 2; i < 5; i++) + { + z[i] = X[i] * a; + u = (z[i] + CUTTER) - CUTTER; + if (u > z[i]) + u -= RADIX; + z[i] -= u; + z[i - 1] += u * RADIXI; + } + + u = (z[3] + TWO71) - TWO71; + if (u > z[3]) + u -= TWO19; + v = z[3] - u; + + if (v == TWO18) + { + if (z[4] == ZERO) + { + for (i = 5; i <= p; i++) + { + if (X[i] == ZERO) + continue; + else + { + z[3] += ONE; + break; + } + } + } + else + z[3] += ONE; + } + + c = (z[1] + R * (z[2] + R * z[3])) / a; + } c *= X[0]; - for (i=1; i<EX; i++) c *= RADIX; - for (i=1; i>EX; i--) c *= RADIXI; + for (i = 1; i < EX; i++) + c *= RADIX; + for (i = 1; i > EX; i--) + c *= RADIXI; *y = c; return; @@ -144,46 +188,112 @@ static void norm(const mp_no *x, double *y, int p) /* Convert a multiple precision number *X into a double precision number *Y, Denormal case (|x| < 2**(-1022))). */ -static void denorm(const mp_no *x, double *y, int p) +static void +denorm (const mp_no *x, double *y, int p) { - long i,k; + long i, k; long p2 = p; - double c,u,z[5]; + double c, u, z[5]; #define R RADIXI - if (EX<-44 || (EX==-44 && X[1]<TWO5)) - { *y=ZERO; return; } - - if (p2==1) { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;} - else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} - } - else if (p2==2) { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;} - else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;} - } - else { - if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;} - else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;} - else {z[1]= TWO10; z[2]=ZERO; k=1;} - z[3] = X[k]; - } + if (EX < -44 || (EX == -44 && X[1] < TWO5)) + { + *y = ZERO; + return; + } + + if (p2 == 1) + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = ZERO; + z[3] = ZERO; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + z[3] = ZERO; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + z[3] = X[1]; + k = 1; + } + } + else if (p2 == 2) + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = X[2]; + z[3] = ZERO; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + z[3] = X[2]; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + z[3] = X[1]; + k = 1; + } + } + else + { + if (EX == -42) + { + z[1] = X[1] + TWO10; + z[2] = X[2]; + k = 3; + } + else if (EX == -43) + { + z[1] = TWO10; + z[2] = X[1]; + k = 2; + } + else + { + z[1] = TWO10; + z[2] = ZERO; + k = 1; + } + z[3] = X[k]; + } u = (z[3] + TWO57) - TWO57; - if (u > z[3]) u -= TWO5; + if (u > z[3]) + u -= TWO5; - if (u==z[3]) { - for (i=k+1; i <= p2; i++) { - if (X[i] == ZERO) continue; - else {z[3] += ONE; break; } + if (u == z[3]) + { + for (i = k + 1; i <= p2; i++) + { + if (X[i] == ZERO) + continue; + else + { + z[3] += ONE; + break; + } + } } - } - c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10); + c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10); - *y = c*TWOM1032; + *y = c * TWOM1032; return; #undef R @@ -191,39 +301,65 @@ static void denorm(const mp_no *x, double *y, int p) /* Convert multiple precision number *X into double precision number *Y. The result is correctly rounded to the nearest/even. */ -void __mp_dbl(const mp_no *x, double *y, int p) { - - if (X[0] == ZERO) {*y = ZERO; return; } +void +__mp_dbl (const mp_no *x, double *y, int p) +{ + if (X[0] == ZERO) + { + *y = ZERO; + return; + } - if (EX> -42) norm(x,y,p); - else if (EX==-42 && X[1]>=TWO10) norm(x,y,p); - else denorm(x,y,p); + if (EX > -42) + norm (x, y, p); + else if (EX == -42 && X[1] >= TWO10) + norm (x, y, p); + else + denorm (x, y, p); } /* Get the multiple precision equivalent of X into *Y. If the precision is too small, the result is truncated. */ -void __dbl_mp(double x, mp_no *y, int p) { - - long i,n; +void +__dbl_mp (double x, mp_no *y, int p) +{ + long i, n; long p2 = p; double u; /* Sign. */ - if (x == ZERO) {Y[0] = ZERO; return; } - else if (x > ZERO) Y[0] = ONE; - else {Y[0] = MONE; x=-x; } + if (x == ZERO) + { + Y[0] = ZERO; + return; + } + else if (x > ZERO) + Y[0] = ONE; + else + { + Y[0] = MONE; + x = -x; + } /* Exponent. */ - for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI; - for ( ; x < ONE; EY -= ONE) x *= RADIX; + for (EY = ONE; x >= RADIX; EY += ONE) + x *= RADIXI; + for (; x < ONE; EY -= ONE) + x *= RADIX; /* Digits. */ - n=MIN(p2,4); - for (i=1; i<=n; i++) { - u = (x + TWO52) - TWO52; - if (u>x) u -= ONE; - Y[i] = u; x -= u; x *= RADIX; } - for ( ; i<=p2; i++) Y[i] = ZERO; + n = MIN (p2, 4); + for (i = 1; i <= n; i++) + { + u = (x + TWO52) - TWO52; + if (u > x) + u -= ONE; + Y[i] = u; + x -= u; + x *= RADIX; + } + for (; i <= p2; i++) + Y[i] = ZERO; return; } @@ -231,93 +367,132 @@ void __dbl_mp(double x, mp_no *y, int p) { sign of the sum *Z is not changed. X and Y may overlap but not X and Z or Y and Z. No guard digit is used. The result equals the exact sum, truncated. */ -static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { - - long i,j,k; +static void +add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, j, k; long p2 = p; EZ = EX; - i=p2; j=p2+ EY - EX; k=p2+1; - - if (j<1) - {__cpy(x,z,p); return; } - else Z[k] = ZERO; - - for (; j>0; i--,j--) { - Z[k] += X[i] + Y[j]; - if (Z[k] >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; } - else - Z[--k] = ZERO; - } - - for (; i>0; i--) { - Z[k] += X[i]; - if (Z[k] >= RADIX) { - Z[k] -= RADIX; - Z[--k] = ONE; } - else - Z[--k] = ZERO; - } - - if (Z[1] == ZERO) { - for (i=1; i<=p2; i++) Z[i] = Z[i+1]; } - else EZ += ONE; + i = p2; + j = p2 + EY - EX; + k = p2 + 1; + + if (j < 1) + { + __cpy (x, z, p); + return; + } + else + Z[k] = ZERO; + + for (; j > 0; i--, j--) + { + Z[k] += X[i] + Y[j]; + if (Z[k] >= RADIX) + { + Z[k] -= RADIX; + Z[--k] = ONE; + } + else + Z[--k] = ZERO; + } + + for (; i > 0; i--) + { + Z[k] += X[i]; + if (Z[k] >= RADIX) + { + Z[k] -= RADIX; + Z[--k] = ONE; + } + else + Z[--k] = ZERO; + } + + if (Z[1] == ZERO) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + } + else + EZ += ONE; } /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0. The sign of the difference *Z is not changed. X and Y may overlap but not X and Z or Y and Z. One guard digit is used. The error is less than one ULP. */ -static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { - - long i,j,k; +static void +sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, j, k; long p2 = p; EZ = EX; - if (EX == EY) { - i=j=k=p2; - Z[k] = Z[k+1] = ZERO; } - else { - j= EX - EY; - if (j > p2) {__cpy(x,z,p); return; } - else { - i=p2; j=p2+1-j; k=p2; - if (Y[j] > ZERO) { - Z[k+1] = RADIX - Y[j--]; - Z[k] = MONE; } - else { - Z[k+1] = ZERO; - Z[k] = ZERO; j--;} - } - } - - for (; j>0; i--,j--) { - Z[k] += (X[i] - Y[j]); - if (Z[k] < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; } - else - Z[--k] = ZERO; - } - - for (; i>0; i--) { - Z[k] += X[i]; - if (Z[k] < ZERO) { - Z[k] += RADIX; - Z[--k] = MONE; } - else - Z[--k] = ZERO; - } - - for (i=1; Z[i] == ZERO; i++) ; + if (EX == EY) + { + i = j = k = p2; + Z[k] = Z[k + 1] = ZERO; + } + else + { + j = EX - EY; + if (j > p2) + { + __cpy (x, z, p); + return; + } + else + { + i = p2; + j = p2 + 1 - j; + k = p2; + if (Y[j] > ZERO) + { + Z[k + 1] = RADIX - Y[j--]; + Z[k] = MONE; + } + else + { + Z[k + 1] = ZERO; + Z[k] = ZERO; + j--; + } + } + } + + for (; j > 0; i--, j--) + { + Z[k] += (X[i] - Y[j]); + if (Z[k] < ZERO) + { + Z[k] += RADIX; + Z[--k] = MONE; + } + else + Z[--k] = ZERO; + } + + for (; i > 0; i--) + { + Z[k] += X[i]; + if (Z[k] < ZERO) + { + Z[k] += RADIX; + Z[--k] = MONE; + } + else + Z[--k] = ZERO; + } + + for (i = 1; Z[i] == ZERO; i++); EZ = EZ - i + 1; - for (k=1; i <= p2+1; ) + for (k = 1; i <= p2 + 1;) Z[k++] = Z[i++]; - for (; k <= p2; ) + for (; k <= p2;) Z[k++] = ZERO; return; @@ -326,111 +501,186 @@ static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) { /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X and Z or Y and Z. One guard digit is used. The error is less than one ULP. */ -void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__add (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ int n; - if (X[0] == ZERO) {__cpy(y,z,p); return; } - else if (Y[0] == ZERO) {__cpy(x,z,p); return; } - - if (X[0] == Y[0]) { - if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } - else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; } - } - else { - if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } - else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; } - else Z[0] = ZERO; - } + if (X[0] == ZERO) + { + __cpy (y, z, p); + return; + } + else if (Y[0] == ZERO) + { + __cpy (x, z, p); + return; + } + + if (X[0] == Y[0]) + { + if (__acr (x, y, p) > 0) + { + add_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else + { + add_magnitudes (y, x, z, p); + Z[0] = Y[0]; + } + } + else + { + if ((n = __acr (x, y, p)) == 1) + { + sub_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else if (n == -1) + { + sub_magnitudes (y, x, z, p); + Z[0] = Y[0]; + } + else + Z[0] = ZERO; + } return; } /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but not X and Z or Y and Z. One guard digit is used. The error is less than one ULP. */ -void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__sub (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ int n; - if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; } - else if (Y[0] == ZERO) {__cpy(x,z,p); return; } - - if (X[0] != Y[0]) { - if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; } - else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; } - } - else { - if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; } - else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; } - else Z[0] = ZERO; - } + if (X[0] == ZERO) + { + __cpy (y, z, p); + Z[0] = -Z[0]; + return; + } + else if (Y[0] == ZERO) + { + __cpy (x, z, p); + return; + } + + if (X[0] != Y[0]) + { + if (__acr (x, y, p) > 0) + { + add_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else + { + add_magnitudes (y, x, z, p); + Z[0] = -Y[0]; + } + } + else + { + if ((n = __acr (x, y, p)) == 1) + { + sub_magnitudes (x, y, z, p); + Z[0] = X[0]; + } + else if (n == -1) + { + sub_magnitudes (y, x, z, p); + Z[0] = -Y[0]; + } + else + Z[0] = ZERO; + } return; } /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the error is bounded by 1.001 ULP. */ -void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ long i, i1, i2, j, k, k2; long p2 = p; double u, zk, zk2; - /* Is z=0? */ - if (X[0]*Y[0]==ZERO) - { Z[0]=ZERO; return; } + /* Is z=0? */ + if (X[0] * Y[0] == ZERO) + { + Z[0] = ZERO; + return; + } - /* Multiply, add and carry */ - k2 = (p2<3) ? p2+p2 : p2+3; - zk = Z[k2]=ZERO; - for (k=k2; k>1; ) { - if (k > p2) {i1=k-p2; i2=p2+1; } - else {i1=1; i2=k; } -#if 1 - /* Rearrange this inner loop to allow the fmadd instructions to be - independent and execute in parallel on processors that have - dual symmetrical FP pipelines. */ - if (i1 < (i2-1)) + /* Multiply, add and carry */ + k2 = (p2 < 3) ? p2 + p2 : p2 + 3; + zk = Z[k2] = ZERO; + for (k = k2; k > 1;) { - /* Make sure we have at least 2 iterations. */ - if (((i2 - i1) & 1L) == 1L) + if (k > p2) { - /* Handle the odd iterations case. */ - zk2 = x->d[i2-1]*y->d[i1]; + i1 = k - p2; + i2 = p2 + 1; } - else - zk2 = 0.0; - /* Do two multiply/adds per loop iteration, using independent - accumulators; zk and zk2. */ - for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2) + else { - zk += x->d[i]*y->d[j]; - zk2 += x->d[i+1]*y->d[j-1]; + i1 = 1; + i2 = k; + } +#if 1 + /* Rearrange this inner loop to allow the fmadd instructions to be + independent and execute in parallel on processors that have + dual symmetrical FP pipelines. */ + if (i1 < (i2 - 1)) + { + /* Make sure we have at least 2 iterations. */ + if (((i2 - i1) & 1L) == 1L) + { + /* Handle the odd iterations case. */ + zk2 = x->d[i2 - 1] * y->d[i1]; + } + else + zk2 = 0.0; + /* Do two multiply/adds per loop iteration, using independent + accumulators; zk and zk2. */ + for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) + { + zk += x->d[i] * y->d[j]; + zk2 += x->d[i + 1] * y->d[j - 1]; + } + zk += zk2; /* Final sum. */ + } + else + { + /* Special case when iterations is 1. */ + zk += x->d[i1] * y->d[i1]; } - zk += zk2; /* Final sum. */ - } - else - { - /* Special case when iterations is 1. */ - zk += x->d[i1]*y->d[i1]; - } #else - /* The original code. */ - for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j]; + /* The original code. */ + for (i = i1, j = i2 - 1; i < i2; i++, j--) + zk += X[i] * Y[j]; #endif - u = (zk + CUTTER)-CUTTER; - if (u > zk) u -= RADIX; - Z[k] = zk - u; - zk = u*RADIXI; - --k; - } + u = (zk + CUTTER) - CUTTER; + if (u > zk) + u -= RADIX; + Z[k] = zk - u; + zk = u * RADIXI; + --k; + } Z[k] = zk; /* Is there a carry beyond the most significant digit? */ - if (Z[1] == ZERO) { - for (i=1; i<=p2; i++) Z[i]=Z[i+1]; - EZ = EX + EY - 1; } + if (Z[1] == ZERO) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + EZ = EX + EY - 1; + } else EZ = EX + EY; @@ -444,26 +694,31 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { - For P > 3: 2.001 * R ^ (1 - P) *X = 0 is not permissible. */ -void __inv(const mp_no *x, mp_no *y, int p) { +void +__inv (const mp_no *x, mp_no *y, int p) +{ long i; double t; - mp_no z,w; - static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4}; - const mp_no mptwo = {1,{1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0, - 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}}; - - __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p); - t=ONE/t; __dbl_mp(t,y,p); EY -= EX; - - for (i=0; i<np1[p]; i++) { - __cpy(y,&w,p); - __mul(x,&w,y,p); - __sub(&mptwo,y,&z,p); - __mul(&w,&z,y,p); - } + mp_no z, w; + static const int np1[] = + { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 + }; + + __cpy (x, &z, p); + z.e = 0; + __mp_dbl (&z, &t, p); + t = ONE / t; + __dbl_mp (t, y, p); + EY -= EX; + + for (i = 0; i < np1[p]; i++) + { + __cpy (y, &w, p); + __mul (x, &w, y, p); + __sub (&mptwo, y, &z, p); + __mul (&w, &z, y, p); + } return; } @@ -474,11 +729,17 @@ void __inv(const mp_no *x, mp_no *y, int p) { - For P > 3: 3.001 * R ^ (1 - P) *X = 0 is not permissible. */ -void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) { - +void +__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ mp_no w; - if (X[0] == ZERO) Z[0] = ZERO; - else {__inv(y,&w,p); __mul(x,&w,z,p);} + if (X[0] == ZERO) + Z[0] = ZERO; + else + { + __inv (y, &w, p); + __mul (x, &w, z, p); + } return; } diff --git a/libc/sysdeps/s390/bits/byteswap.h b/libc/sysdeps/s390/bits/byteswap.h index 6f9625700..5d289120f 100644 --- a/libc/sysdeps/s390/bits/byteswap.h +++ b/libc/sysdeps/s390/bits/byteswap.h @@ -112,7 +112,7 @@ __bswap_32 (unsigned int __bsx) __r.__l[1] = __bswap_32 (__w.__l[0]); \ __r.__ll; }) # endif -#elif __GLIBC_HAVE_LONG_LONG +#else # define __bswap_constant_64(x) \ ((((x) & 0xff00000000000000ull) >> 56) \ | (((x) & 0x00ff000000000000ull) >> 40) \ @@ -123,6 +123,7 @@ __bswap_32 (unsigned int __bsx) | (((x) & 0x000000000000ff00ull) << 40) \ | (((x) & 0x00000000000000ffull) << 56)) +__extension__ static __inline unsigned long long int __bswap_64 (unsigned long long int __bsx) { diff --git a/libc/sysdeps/sh/Makefile b/libc/sysdeps/sh/Makefile index 34c636a99..bb7c55362 100644 --- a/libc/sysdeps/sh/Makefile +++ b/libc/sysdeps/sh/Makefile @@ -4,9 +4,4 @@ endif ifeq ($(subdir),debug) CFLAGS-backtrace.c += -funwind-tables -CFLAGS-tst-backtrace2.c += -funwind-tables -CFLAGS-tst-backtrace3.c += -funwind-tables -CFLAGS-tst-backtrace4.c += -funwind-tables -CFLAGS-tst-backtrace5.c += -funwind-tables -CFLAGS-tst-backtrace6.c += -funwind-tables endif diff --git a/libc/sysdeps/sparc/fpu/libm-test-ulps b/libc/sysdeps/sparc/fpu/libm-test-ulps index d2770442a..f282a160a 100644 --- a/libc/sysdeps/sparc/fpu/libm-test-ulps +++ b/libc/sysdeps/sparc/fpu/libm-test-ulps @@ -198,6 +198,42 @@ double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "Real part of: cacos (-0.5 + +0 i) == 2.094395102393195492308428922186335256131 - 0 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: cacos (-0.5 - 0 i) == 2.094395102393195492308428922186335256131 + +0 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (-0x1p500 + 1.0 i) == 3.141592653589793238462643383279502884197 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (-0x1p500 - 1.0 i) == 3.141592653589793238462643383279502884197 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (-0x1p5000 + 1.0 i) == 3.141592653589793238462643383279502884197 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (-0x1p5000 - 1.0 i) == 3.141592653589793238462643383279502884197 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: cacos (-1.0 + 0x1p50 i) == 1.570796326794897507409741391764983781004 - 3.535050620855721078027883819436759661753e1 i": +float: 1 +ifloat: 1 +Test "Imaginary part of: cacos (-1.0 + 0x1p500 i) == 1.570796326794896619231321691639751442099 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (-1.0 + 0x1p5000 i) == 1.570796326794896619231321691639751442099 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: cacos (-1.0 - 0x1p50 i) == 1.570796326794897507409741391764983781004 + 3.535050620855721078027883819436759661753e1 i": +float: 1 +ifloat: 1 +Test "Imaginary part of: cacos (-1.0 - 0x1p500 i) == 1.570796326794896619231321691639751442099 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (-1.0 - 0x1p5000 i) == 1.570796326794896619231321691639751442099 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 Test "Imaginary part of: cacos (-1.5 + +0 i) == pi - 0.9624236501192068949955178268487368462704 i": double: 1 float: 1 @@ -205,6 +241,11 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "Real part of: cacos (-2 - 3 i) == 2.1414491111159960199416055713254211 + 1.9833870299165354323470769028940395 i": +float: 1 +ifloat: 1 +ildouble: 1 +ldouble: 1 Test "Real part of: cacos (0.5 + +0 i) == 1.047197551196597746154214461093167628066 - 0 i": double: 1 idouble: 1 @@ -221,6 +262,39 @@ ifloat: 1 Test "Imaginary part of: cacos (0.75 + 1.25 i) == 1.11752014915610270578240049553777969 - 1.13239363160530819522266333696834467 i": ildouble: 1 ldouble: 1 +Test "Imaginary part of: cacos (0x1.fp1023 + 0x1.fp1023 i) == 7.853981633974483096156608458198757210493e-1 - 7.107906849659093345062145442726115449315e2 i": +double: 1 +idouble: 1 +Test "Imaginary part of: cacos (0x1.fp127 + 0x1.fp127 i) == 7.853981633974483096156608458198757210493e-1 - 8.973081118419833726837456344608533993585e1 i": +double: 1 +idouble: 1 +Test "Imaginary part of: cacos (0x1.fp16383 + 0x1.fp16383 i) == 7.853981633974483096156608458198757210493e-1 - 1.135753137836666928715489992987020363057e4 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (0x1p500 + 1.0 i) == 3.054936363499604682051979393213617699789e-151 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (0x1p500 - 1.0 i) == 3.054936363499604682051979393213617699789e-151 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (0x1p5000 + 1.0 i) == 7.079811261048172892385615158694057552948e-1506 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (0x1p5000 - 1.0 i) == 7.079811261048172892385615158694057552948e-1506 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (1.0 + 0x1p500 i) == 1.570796326794896619231321691639751442099 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (1.0 + 0x1p5000 i) == 1.570796326794896619231321691639751442099 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (1.0 - 0x1p500 i) == 1.570796326794896619231321691639751442099 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: cacos (1.0 - 0x1p5000 i) == 1.570796326794896619231321691639751442099 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 Test "Imaginary part of: cacos (1.5 + +0 i) == +0 - 0.9624236501192068949955178268487368462704 i": double: 1 float: 1 @@ -407,6 +481,30 @@ double: 1 idouble: 1 ildouble: 1 ldouble: 1 +Test "Imaginary part of: casin (-0x1p500 + 1.0 i) == -1.570796326794896619231321691639751442099 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (-0x1p500 - 1.0 i) == -1.570796326794896619231321691639751442099 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (-0x1p5000 + 1.0 i) == -1.570796326794896619231321691639751442099 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (-0x1p5000 - 1.0 i) == -1.570796326794896619231321691639751442099 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (-1.0 + 0x1p500 i) == -3.054936363499604682051979393213617699789e-151 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (-1.0 + 0x1p5000 i) == -7.079811261048172892385615158694057552948e-1506 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (-1.0 - 0x1p500 i) == -3.054936363499604682051979393213617699789e-151 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (-1.0 - 0x1p5000 i) == -7.079811261048172892385615158694057552948e-1506 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 Test "Imaginary part of: casin (-1.5 + +0 i) == -pi/2 + 0.9624236501192068949955178268487368462704 i": double: 1 float: 1 @@ -422,6 +520,39 @@ ifloat: 1 Test "Imaginary part of: casin (0.75 + 1.25 i) == 0.453276177638793913448921196101971749 + 1.13239363160530819522266333696834467 i": ildouble: 1 ldouble: 1 +Test "Imaginary part of: casin (0x1.fp1023 + 0x1.fp1023 i) == 7.853981633974483096156608458198757210493e-1 + 7.107906849659093345062145442726115449315e2 i": +double: 1 +idouble: 1 +Test "Imaginary part of: casin (0x1.fp127 + 0x1.fp127 i) == 7.853981633974483096156608458198757210493e-1 + 8.973081118419833726837456344608533993585e1 i": +double: 1 +idouble: 1 +Test "Imaginary part of: casin (0x1.fp16383 + 0x1.fp16383 i) == 7.853981633974483096156608458198757210493e-1 + 1.135753137836666928715489992987020363057e4 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (0x1p500 + 1.0 i) == 1.570796326794896619231321691639751442099 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (0x1p500 - 1.0 i) == 1.570796326794896619231321691639751442099 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (0x1p5000 + 1.0 i) == 1.570796326794896619231321691639751442099 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (0x1p5000 - 1.0 i) == 1.570796326794896619231321691639751442099 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (1.0 + 0x1p500 i) == 3.054936363499604682051979393213617699789e-151 + 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (1.0 + 0x1p5000 i) == 7.079811261048172892385615158694057552948e-1506 + 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (1.0 - 0x1p500 i) == 3.054936363499604682051979393213617699789e-151 - 3.472667374605326000180332928505464606058e2 i": +ildouble: 1 +ldouble: 1 +Test "Imaginary part of: casin (1.0 - 0x1p5000 i) == 7.079811261048172892385615158694057552948e-1506 - 3.466429049980286492395577839412341016946e3 i": +ildouble: 1 +ldouble: 1 Test "Imaginary part of: casin (1.5 + +0 i) == pi/2 + 0.9624236501192068949955178268487368462704 i": double: 1 float: 1 @@ -459,6 +590,18 @@ idouble: 2 ifloat: 1 ildouble: 1 ldouble: 1 +Test "Real part of: casinh (-0x1p500 + 1.0 i) == -3.472667374605326000180332928505464606058e2 + 3.054936363499604682051979393213617699789e-151 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (-0x1p500 - 1.0 i) == -3.472667374605326000180332928505464606058e2 - 3.054936363499604682051979393213617699789e-151 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (-0x1p5000 + 1.0 i) == -3.466429049980286492395577839412341016946e3 + 7.079811261048172892385615158694057552948e-1506 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (-0x1p5000 - 1.0 i) == -3.466429049980286492395577839412341016946e3 - 7.079811261048172892385615158694057552948e-1506 i": +ildouble: 1 +ldouble: 1 Test "Real part of: casinh (-1.0 + +0 i) == -0.8813735870195430252326093249797923090282 + +0 i": double: 2 float: 1 @@ -466,6 +609,12 @@ idouble: 2 ifloat: 1 ildouble: 3 ldouble: 3 +Test "Real part of: casinh (-1.0 + 0x1p500 i) == -3.472667374605326000180332928505464606058e2 + 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (-1.0 + 0x1p5000 i) == -3.466429049980286492395577839412341016946e3 + 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 Test "Real part of: casinh (-1.0 - 0 i) == -0.8813735870195430252326093249797923090282 - 0 i": double: 2 float: 1 @@ -473,6 +622,12 @@ idouble: 2 ifloat: 1 ildouble: 3 ldouble: 3 +Test "Real part of: casinh (-1.0 - 0x1p500 i) == -3.472667374605326000180332928505464606058e2 - 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (-1.0 - 0x1p5000 i) == -3.466429049980286492395577839412341016946e3 - 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 Test "Real part of: casinh (-1.5 + +0 i) == -1.194763217287109304111930828519090523536 + +0 i": double: 2 float: 1 @@ -523,6 +678,27 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "Real part of: casinh (0x1.fp1023 + 0x1.fp1023 i) == 7.107906849659093345062145442726115449315e2 + 7.853981633974483096156608458198757210493e-1 i": +double: 1 +idouble: 1 +Test "Real part of: casinh (0x1.fp127 + 0x1.fp127 i) == 8.973081118419833726837456344608533993585e1 + 7.853981633974483096156608458198757210493e-1 i": +double: 1 +idouble: 1 +Test "Real part of: casinh (0x1.fp16383 + 0x1.fp16383 i) == 1.135753137836666928715489992987020363057e4 + 7.853981633974483096156608458198757210493e-1 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (0x1p500 + 1.0 i) == 3.472667374605326000180332928505464606058e2 + 3.054936363499604682051979393213617699789e-151 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (0x1p500 - 1.0 i) == 3.472667374605326000180332928505464606058e2 - 3.054936363499604682051979393213617699789e-151 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (0x1p5000 + 1.0 i) == 3.466429049980286492395577839412341016946e3 + 7.079811261048172892385615158694057552948e-1506 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (0x1p5000 - 1.0 i) == 3.466429049980286492395577839412341016946e3 - 7.079811261048172892385615158694057552948e-1506 i": +ildouble: 1 +ldouble: 1 Test "Real part of: casinh (1.0 + +0 i) == 0.8813735870195430252326093249797923090282 + +0 i": double: 1 float: 1 @@ -530,6 +706,12 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "Real part of: casinh (1.0 + 0x1p500 i) == 3.472667374605326000180332928505464606058e2 + 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (1.0 + 0x1p5000 i) == 3.466429049980286492395577839412341016946e3 + 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 Test "Real part of: casinh (1.0 - 0 i) == 0.8813735870195430252326093249797923090282 - 0 i": double: 1 float: 1 @@ -537,6 +719,12 @@ idouble: 1 ifloat: 1 ildouble: 1 ldouble: 1 +Test "Real part of: casinh (1.0 - 0x1p500 i) == 3.472667374605326000180332928505464606058e2 - 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 +Test "Real part of: casinh (1.0 - 0x1p5000 i) == 3.466429049980286492395577839412341016946e3 - 1.570796326794896619231321691639751442099 i": +ildouble: 1 +ldouble: 1 Test "Real part of: casinh (1.5 + +0 i) == 1.194763217287109304111930828519090523536 + +0 i": double: 1 idouble: 1 diff --git a/libc/sysdeps/sparc/sparc-ifunc.h b/libc/sysdeps/sparc/sparc-ifunc.h index edff5c880..f68161fc5 100644 --- a/libc/sysdeps/sparc/sparc-ifunc.h +++ b/libc/sysdeps/sparc/sparc-ifunc.h @@ -51,6 +51,33 @@ ENTRY (__##name) \ mov %o1, %o0; \ END (__##name) +# define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt) \ +ENTRY (__##name) \ + .type __##name, @gnu_indirect_function; \ + SETUP_PIC_REG_LEAF(o3, o5); \ + set m1, %o1; \ + andcc %o0, %o1, %g0; \ + be 8f; \ + nop; \ + sethi %gdop_hix22(f1), %o1; \ + xor %o1, %gdop_lox10(f1), %o1; \ + ba 10f; \ + nop; \ +8: set m2, %o1; \ + andcc %o0, %o1, %g0; \ + be 9f; \ + nop; \ + sethi %gdop_hix22(f2), %o1; \ + xor %o1, %gdop_lox10(f2), %o1; \ + ba 10f; \ + nop; \ +9: sethi %gdop_hix22(dflt), %o1; \ + xor %o1, %gdop_lox10(dflt), %o1; \ +10: add %o3, %o1, %o1; \ + retl; \ + mov %o1, %o0; \ +END (__##name) + # else /* SHARED */ # ifdef __arch64__ @@ -82,19 +109,54 @@ ENTRY (__##name) \ mov %o1, %o0; \ END (__##name) +# define SPARC_ASM_IFUNC2(name, m1, f1, m2, f2, dflt) \ +ENTRY (__##name) \ + .type __##name, @gnu_indirect_function; \ + set m1, %o1; \ + andcc %o0, %o1, %g0; \ + be 8f; \ + nop; \ + SET(f1, %g1, %o1); \ + ba 10f; \ + nop; \ +8: set m2, %o1; \ + andcc %o0, %o1, %g0; \ + be 9f; \ + nop; \ + SET(f2, %g1, %o1); \ + ba 10f; \ + nop; \ +9: SET(dflt, %g1, %o1); \ +10: retl; \ + mov %o1, %o0; \ +END (__##name) + # endif /* SHARED */ +#define SPARC_ASM_VIS2_IFUNC(name) \ + SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS2, \ + __##name##_vis2, __##name##_generic) + # ifdef HAVE_AS_VIS3_SUPPORT #define SPARC_ASM_VIS3_IFUNC(name) \ SPARC_ASM_IFUNC1(name, HWCAP_SPARC_VIS3, \ __##name##_vis3, __##name##_generic) +#define SPARC_ASM_VIS3_VIS2_IFUNC(name) \ + SPARC_ASM_IFUNC2(name, HWCAP_SPARC_VIS3, \ + __##name##_vis3, \ + HWCAP_SPARC_VIS2, \ + __##name##_vis2, __##name##_generic) + # else /* HAVE_AS_VIS3_SUPPORT */ #define SPARC_ASM_VIS3_IFUNC(name) \ SPARC_ASM_IFUNC_DFLT(name, __##name##_generic) +#define SPARC_ASM_VIS3_VIS2_IFUNC(name) \ + SPARC_ASM_VIS2_IFUNC(name) + # endif /* HAVE_AS_VIS3_SUPPORT */ diff --git a/libc/sysdeps/sparc/sparc32/fpu/s_fdim.S b/libc/sysdeps/sparc/sparc32/fpu/s_fdim.S new file mode 100644 index 000000000..2f0c5ce58 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/fpu/s_fdim.S @@ -0,0 +1,41 @@ +/* Compute positive difference, sparc 32-bit. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fdim) + std %o0, [%sp + 72] + std %o2, [%sp + 80] + ldd [%sp + 72], %f0 + ldd [%sp + 80], %f2 + fcmpd %f0, %f2 + st %g0, [%sp + 72] + fbug 1f + st %g0, [%sp + 76] + ldd [%sp + 72], %f0 + fnegd %f0, %f2 +1: retl + fsubd %f0, %f2, %f0 +END(__fdim) +weak_alias (__fdim, fdim) + +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fdim, fdiml, GLIBC_2_1); +#endif diff --git a/libc/sysdeps/sparc/sparc32/fpu/s_fdimf.S b/libc/sysdeps/sparc/sparc32/fpu/s_fdimf.S new file mode 100644 index 000000000..570fdc30b --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/fpu/s_fdimf.S @@ -0,0 +1,35 @@ +/* Compute positive difference, sparc 32-bit. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fdimf) + st %o0, [%sp + 72] + st %o1, [%sp + 76] + ld [%sp + 72], %f0 + ld [%sp + 76], %f1 + fcmps %f0, %f1 + fbug 1f + st %g0, [%sp + 72] + ld [%sp + 72], %f0 + fnegs %f0, %f1 +1: retl + fsubs %f0, %f1, %f0 +END(__fdimf) +weak_alias (__fdimf, fdimf) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/addmul_1.S b/libc/sysdeps/sparc/sparc32/sparcv9/addmul_1.S index 563bfb1c0..7ba81d8a6 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/addmul_1.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/addmul_1.S @@ -1 +1,81 @@ -#include <sparcv8/addmul_1.S> +! SPARC v9 32-bit __mpn_addmul_1 -- Multiply a limb vector with a limb +! and add the result to a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz_arg %i2 +#define s2l_arg %i3 +#define sz %o4 +#define carry %o5 +#define s2_limb %g1 +#define tmp1 %l0 +#define tmp2 %l1 +#define tmp3 %l2 +#define tmp4 %l3 +#define tmp64_1 %g3 +#define tmp64_2 %o3 + +ENTRY(__mpn_addmul_1) + save %sp, -96, %sp + srl sz_arg, 0, sz + srl s2l_arg, 0, s2_limb + subcc sz, 1, sz + be,pn %icc, .Lfinal_limb + clr carry + +.Lloop: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + lduw [s1_ptr + 0x04], tmp2 + lduw [res_ptr + 0x04], tmp4 + mulx tmp1, s2_limb, tmp64_1 + add s1_ptr, 8, s1_ptr + mulx tmp2, s2_limb, tmp64_2 + sub sz, 2, sz + add res_ptr, 8, res_ptr + add tmp3, tmp64_1, tmp64_1 + add carry, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr - 0x08] + srlx tmp64_1, 32, carry + add tmp4, tmp64_2, tmp64_2 + add carry, tmp64_2, tmp64_2 + stw tmp64_2, [res_ptr - 0x04] + brgz sz, .Lloop + srlx tmp64_2, 32, carry + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp64_1 + add tmp3, tmp64_1, tmp64_1 + add carry, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr + 0x00] + srlx tmp64_1, 32, carry + +.Lfinish: + jmpl %i7 + 0x8, %g0 + restore carry, 0, %o0 +END(__mpn_addmul_1) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile index ee9f6ffce..0d92813d7 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/Makefile @@ -1,11 +1,15 @@ ifeq ($(subdir),math) +libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \ + s_floor-vis2 s_floorf-vis2 ifeq ($(have-as-vis3),yes) libm-sysdep_routines += m_copysignf-vis3 m_copysign-vis3 s_ceilf-vis3 \ s_ceil-vis3 s_fabs-vis3 s_fabsf-vis3 s_floor-vis3 \ s_floorf-vis3 s_llrintf-vis3 s_llrint-vis3 \ s_rintf-vis3 s_rint-vis3 w_sqrt-vis3 w_sqrtf-vis3 \ s_fminf-vis3 s_fmin-vis3 s_fmaxf-vis3 s_fmax-vis3 \ - s_fmaf-vis3 s_fma-vis3 + s_fmaf-vis3 s_fma-vis3 s_fdimf-vis3 s_fdim-vis3 \ + s_nearbyint-vis3 s_nearbyintf-vis3 s_truncf-vis3 \ + s_trunc-vis3 sysdep_routines += s_copysignf-vis3 s_copysign-vis3 endif endif diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S new file mode 100644 index 000000000..94388003d --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis2.S @@ -0,0 +1,61 @@ +/* ceil function, sparc32 v9 vis2 version. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__ceil_vis2) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o0, 32, %o0 + or %o0, %o1, %o0 + stx %o0, [%sp + 72] + sllx %o2, 32, %o2 + fzero ZERO + ldd [%sp + 72], %f0 + fnegd ZERO, SIGN_BIT + stx %o2, [%sp + 72] + fabsd %f0, %f14 + ldd [%sp + 72], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 + faddd %f0, %f16, %f18 + siam (1 << 2) | 0 + fsubd %f18, %f16, %f18 + siam (0 << 2) + retl + for %f18, SIGN_BIT, %f0 +END (__ceil_vis2) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S index 0c2140d95..aebff5cae 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil-vis3.S @@ -19,27 +19,21 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */ -#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ @@ -47,32 +41,22 @@ ENTRY (__ceil_vis3) sethi %hi(TWO_FIFTYTWO), %o2 sllx %o0, 32, %o0 - sethi %hi(ONE_DOT_ZERO), %o3 + sllx %o2, 32, %o2 or %o0, %o1, %o0 movxtod %o0, %f0 - sllx %o2, 32, %o2 fzero ZERO - sllx %o3, 32, %o3 - fnegd ZERO, SIGN_BIT - movxtod %o2, %f16 fabsd %f0, %f14 - fcmpd %fcc3, %f14, %f16 - fmovduge %fcc3, ZERO, %f16 fand %f0, SIGN_BIT, SIGN_BIT - for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 faddd %f0, %f16, %f18 + siam (1 << 2) | 0 fsubd %f18, %f16, %f18 - fcmpd %fcc2, %f18, %f0 - movxtod %o3, %f20 - - fmovduge %fcc2, ZERO, %f20 - faddd %f18, %f20, %f0 - fabsd %f0, %f0 + siam (0 << 2) retl - for %f0, SIGN_BIT, %f0 + for %f18, SIGN_BIT, %f0 END (__ceil_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S index 835703fb9..efc8d4936 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceil.S @@ -1,7 +1,7 @@ #include <sparc-ifunc.h> #include <math_ldbl_opt.h> -SPARC_ASM_VIS3_IFUNC(ceil) +SPARC_ASM_VIS3_VIS2_IFUNC(ceil) weak_alias (__ceil, ceil) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S new file mode 100644 index 000000000..bc516765f --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis2.S @@ -0,0 +1,58 @@ +/* Float ceil function, sparc32 v9 vis2 version. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__ceilf_vis2) + st %o0, [%sp + 68] + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + ld [%sp + 68], %f0 + fnegs ZERO, SIGN_BIT + st %o2, [%sp + 68] + fabss %f0, %f14 + ld [%sp + 68], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f0, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 + fadds %f0, %f16, %f1 + siam (1 << 2) | 0 + fsubs %f1, %f16, %f1 + siam (0 << 2) + retl + fors %f1, SIGN_BIT, %f0 +END (__ceilf_vis2) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S index 7d30c0b84..0a6768ca1 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf-vis3.S @@ -19,27 +19,21 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ -#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ @@ -47,28 +41,19 @@ ENTRY (__ceilf_vis3) movwtos %o0, %f0 sethi %hi(TWO_TWENTYTHREE), %o2 - sethi %hi(ONE_DOT_ZERO), %o3 fzeros ZERO - fnegs ZERO, SIGN_BIT - movwtos %o2, %f16 fabss %f0, %f14 - fcmps %fcc3, %f14, %f16 - fmovsuge %fcc3, ZERO, %f16 fands %f0, SIGN_BIT, SIGN_BIT - fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 fadds %f0, %f16, %f1 + siam (1 << 2) | 0 fsubs %f1, %f16, %f1 - fcmps %fcc2, %f1, %f0 - movwtos %o3, %f9 - - fmovsuge %fcc2, ZERO, %f9 - fadds %f1, %f9, %f0 - fabss %f0, %f0 + siam (0 << 2) retl - fors %f0, SIGN_BIT, %f0 + fors %f1, SIGN_BIT, %f0 END (__ceilf_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S index 3047dd8fa..1c72a5728 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_ceilf.S @@ -1,6 +1,6 @@ #include <sparc-ifunc.h> -SPARC_ASM_VIS3_IFUNC(ceilf) +SPARC_ASM_VIS3_VIS2_IFUNC(ceilf) weak_alias (__ceilf, ceilf) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.S new file mode 100644 index 000000000..5e011a121 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim-vis3.S @@ -0,0 +1,34 @@ +/* Compute positive difference, sparc 32-bit+v9+vis3. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fdim_vis3) + movwtos %o0, %f0 + movwtos %o1, %f1 + movwtos %o2, %f2 + movwtos %o3, %f3 + fcmpd %f0, %f2 + fbug 1f + nop + fzero %f0 + fnegd %f0, %f2 +1: retl + fsubd %f0, %f2, %f0 +END(__fdim_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim.S new file mode 100644 index 000000000..4b1340824 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdim.S @@ -0,0 +1,19 @@ +#include <sparc-ifunc.h> +#include <math_ldbl_opt.h> + +SPARC_ASM_VIS3_IFUNC(fdim) + +weak_alias (__fdim, fdim) + +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fdim, fdiml, GLIBC_2_1); +#endif + +# undef weak_alias +# define weak_alias(a, b) +# undef compat_symbol +# define compat_symbol(a, b, c, d) + +#define __fdim __fdim_generic + +#include "../s_fdim.S" diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf-vis3.S new file mode 100644 index 000000000..c6d571297 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf-vis3.S @@ -0,0 +1,32 @@ +/* Compute positive difference, sparc 32-bit+v9+vis3. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fdimf_vis3) + movwtos %o0, %f0 + movwtos %o1, %f1 + fcmps %f0, %f1 + fbug 1f + nop + fzeros %f0 + fnegs %f0, %f1 +1: retl + fsubs %f0, %f1, %f0 +END(__fdimf_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf.S new file mode 100644 index 000000000..30381d6a5 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_fdimf.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(fdimf) + +weak_alias (__fdimf, fdimf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __fdimf __fdimf_generic + +#include "../s_fdimf.S" diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S new file mode 100644 index 000000000..3b5e8fd7c --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis2.S @@ -0,0 +1,61 @@ +/* floor function, sparc32 v9 vis2 version. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__floor_vis2) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o0, 32, %o0 + or %o0, %o1, %o0 + stx %o0, [%sp + 72] + sllx %o2, 32, %o2 + fzero ZERO + ldd [%sp + 72], %f0 + fnegd ZERO, SIGN_BIT + stx %o2, [%sp + 72] + fabsd %f0, %f14 + ldd [%sp + 72], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 + faddd %f0, %f16, %f18 + siam (1 << 2) | 0 + fsubd %f18, %f16, %f18 + siam (0 << 2) + retl + for %f18, SIGN_BIT, %f0 +END (__floor_vis2) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S index 8445f1d7a..41fdfac3b 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor-vis3.S @@ -19,27 +19,21 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */ -#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ @@ -47,32 +41,22 @@ ENTRY (__floor_vis3) sethi %hi(TWO_FIFTYTWO), %o2 sllx %o0, 32, %o0 - sethi %hi(ONE_DOT_ZERO), %o3 + sllx %o2, 32, %o2 or %o0, %o1, %o0 movxtod %o0, %f0 - sllx %o2, 32, %o2 fzero ZERO - sllx %o3, 32, %o3 - fnegd ZERO, SIGN_BIT - movxtod %o2, %f16 fabsd %f0, %f14 - fcmpd %fcc3, %f14, %f16 - fmovduge %fcc3, ZERO, %f16 fand %f0, SIGN_BIT, SIGN_BIT - for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 faddd %f0, %f16, %f18 + siam (1 << 2) | 0 fsubd %f18, %f16, %f18 - fcmpd %fcc2, %f18, %f0 - movxtod %o3, %f20 - - fmovdule %fcc2, ZERO, %f20 - fsubd %f18, %f20, %f0 - fabsd %f0, %f0 + siam (0 << 2) retl - for %f0, SIGN_BIT, %f0 + for %f18, SIGN_BIT, %f0 END (__floor_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S index 37aeb43b9..1fe4b95ea 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floor.S @@ -1,7 +1,7 @@ #include <sparc-ifunc.h> #include <math_ldbl_opt.h> -SPARC_ASM_VIS3_IFUNC(floor) +SPARC_ASM_VIS3_VIS2_IFUNC(floor) weak_alias (__floor, floor) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S new file mode 100644 index 000000000..4f731212e --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis2.S @@ -0,0 +1,58 @@ +/* Float floor function, sparc32 v9 vis2 version. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__floorf_vis2) + st %o0, [%sp + 68] + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + ld [%sp + 68], %f0 + fnegs ZERO, SIGN_BIT + st %o2, [%sp + 68] + fabss %f0, %f14 + ld [%sp + 68], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f0, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 + fadds %f0, %f16, %f1 + siam (1 << 2) | 0 + fsubs %f1, %f16, %f1 + siam (0 << 2) + retl + fors %f1, SIGN_BIT, %f0 +END (__floorf_vis2) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S index 133a0a4a9..fe2d2da20 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf-vis3.S @@ -19,27 +19,21 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ -#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ @@ -47,28 +41,19 @@ ENTRY (__floorf_vis3) movwtos %o0, %f0 sethi %hi(TWO_TWENTYTHREE), %o2 - sethi %hi(ONE_DOT_ZERO), %o3 fzeros ZERO - fnegs ZERO, SIGN_BIT - movwtos %o2, %f16 fabss %f0, %f14 - fcmps %fcc3, %f14, %f16 - fmovsuge %fcc3, ZERO, %f16 fands %f0, SIGN_BIT, SIGN_BIT - fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 fadds %f0, %f16, %f1 + siam (1 << 2) | 0 fsubs %f1, %f16, %f1 - fcmps %fcc2, %f1, %f0 - movwtos %o3, %f9 - - fmovsule %fcc2, ZERO, %f9 - fsubs %f1, %f9, %f0 - fabss %f0, %f0 + siam (0 << 2) retl - fors %f0, SIGN_BIT, %f0 + fors %f1, SIGN_BIT, %f0 END (__floorf_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S index 31cda385b..d2a83cb9b 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_floorf.S @@ -1,6 +1,6 @@ #include <sparc-ifunc.h> -SPARC_ASM_VIS3_IFUNC(floorf) +SPARC_ASM_VIS3_VIS2_IFUNC(floorf) weak_alias (__floorf, floorf) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint-vis3.S new file mode 100644 index 000000000..b509500ed --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint-vis3.S @@ -0,0 +1,65 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc32 v9 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyint_vis3) + st %fsr, [%sp + 88] + sethi %hi(TWO_FIFTYTWO), %o2 + sethi %hi(0xf8003e0), %o5 + ld [%sp + 88], %o4 + sllx %o0, 32, %o0 + or %o5, %lo(0xf8003e0), %o5 + or %o0, %o1, %o0 + movxtod %o0, %f0 + andn %o4, %o5, %o4 + fzero ZERO + st %o4, [%sp + 80] + sllx %o2, 32, %o2 + fnegd ZERO, SIGN_BIT + ld [%sp + 80], %fsr + movxtod %o2, %f16 + fabsd %f0, %f14 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + retl + ld [%sp + 88], %fsr +END (__nearbyint_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint.S new file mode 100644 index 000000000..47da9eaaf --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyint.S @@ -0,0 +1,19 @@ +#include <sparc-ifunc.h> +#include <math_ldbl_opt.h> + +SPARC_ASM_VIS3_IFUNC(nearbyint) + +weak_alias (__nearbyint, nearbyint) + +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1) +#endif + +# undef weak_alias +# define weak_alias(a, b) +# undef compat_symbol +# define compat_symbol(a, b, c, d) + +#define __nearbyint __nearbyint_generic + +#include "../s_nearbyint.S" diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf-vis3.S new file mode 100644 index 000000000..336126dee --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf-vis3.S @@ -0,0 +1,61 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc32 v9 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyintf_vis3) + st %fsr, [%sp + 88] + movwtos %o0, %f1 + sethi %hi(TWO_TWENTYTHREE), %o2 + sethi %hi(0xf8003e0), %o5 + ld [%sp + 88], %o4 + fzeros ZERO + or %o5, %lo(0xf8003e0), %o5 + fnegs ZERO, SIGN_BIT + andn %o4, %o5, %o4 + st %o4, [%sp + 80] + ld [%sp + 80], %fsr + movwtos %o2, %f16 + fabss %f1, %f14 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + retl + ld [%sp + 88], %fsr +END (__nearbyintf_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf.S new file mode 100644 index 000000000..95100c1bf --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_nearbyintf.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __nearbyintf __nearbyintf_generic + +#include "../s_nearbyintf.S" diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_trunc-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_trunc-vis3.S new file mode 100644 index 000000000..72ec2826e --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_trunc-vis3.S @@ -0,0 +1,57 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc32 v9 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__trunc_vis3) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o0, 32, %o0 + or %o0, %o1, %o0 + fzero ZERO + movxtod %o0, %f0 + sllx %o2, 32, %o2 + fnegd ZERO, SIGN_BIT + movxtod %o2, %f16 + fabsd %f0, %f14 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f14 + fand %f0, SIGN_BIT, SIGN_BIT + fdtox %f14, %f14 + fxtod %f14, %f14 + faddd %f0, ZERO, %f18 + fmovduge %fcc3, %f18, %f14 + retl + for %f14, SIGN_BIT, %f0 +END (__trunc_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_trunc.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_trunc.S new file mode 100644 index 000000000..3787fa1f1 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_trunc.S @@ -0,0 +1,19 @@ +#include <sparc-ifunc.h> +#include <math_ldbl_opt.h> + +SPARC_ASM_VIS3_IFUNC(trunc) + +weak_alias (__trunc, trunc) + +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif + +# undef weak_alias +# define weak_alias(a, b) +# undef compat_symbol +# define compat_symbol(a, b, c, d) + +#define __trunc __trunc_generic + +#include "../s_trunc.S" diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_truncf-vis3.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_truncf-vis3.S new file mode 100644 index 000000000..60445dfaa --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_truncf-vis3.S @@ -0,0 +1,53 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc32 v9 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__truncf_vis3) + movwtos %o0, %f1 + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + fnegs ZERO, SIGN_BIT + movwtos %o2, %f16 + fabss %f1, %f14 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f14 + fands %f1, SIGN_BIT, SIGN_BIT + fstoi %f14, %f14 + fitos %f14, %f14 + fadds %f1, ZERO, %f18 + fmovsuge %fcc3, %f18, %f14 + retl + fors %f14, SIGN_BIT, %f0 +END (__truncf_vis3) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_truncf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_truncf.S new file mode 100644 index 000000000..2ca251733 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/multiarch/s_truncf.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(truncf) + +weak_alias (__truncf, truncf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __truncf __truncf_generic + +#include "../s_truncf.S" diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fdim.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fdim.S new file mode 100644 index 000000000..6f26ab7af --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fdim.S @@ -0,0 +1,40 @@ +/* Compute positive difference, sparc 32-bit+v9. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fdim) + std %o0, [%sp + 72] + std %o2, [%sp + 80] + ldd [%sp + 72], %f0 + ldd [%sp + 80], %f2 + fcmpd %f0, %f2 + fbug 1f + nop + fzero %f0 + fnegd %f0, %f2 +1: retl + fsubd %f0, %f2, %f0 +END(__fdim) +weak_alias (__fdim, fdim) + +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __fdim, fdiml, GLIBC_2_1); +#endif diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fdimf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fdimf.S new file mode 100644 index 000000000..fc55867cd --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_fdimf.S @@ -0,0 +1,35 @@ +/* Compute positive difference, sparc 32-bit+v9. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fdimf) + st %o0, [%sp + 72] + st %o1, [%sp + 76] + ld [%sp + 72], %f0 + ld [%sp + 76], %f1 + fcmps %f0, %f1 + fbug 1f + nop + fzeros %f0 + fnegs %f0, %f1 +1: retl + fsubs %f0, %f1, %f0 +END(__fdimf) +weak_alias (__fdimf, fdimf) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyint.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyint.S new file mode 100644 index 000000000..ee6a575e1 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyint.S @@ -0,0 +1,72 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc32 v9 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyint) + st %fsr, [%sp + 88] + sethi %hi(TWO_FIFTYTWO), %o2 + sethi %hi(0xf8003e0), %o5 + ld [%sp + 88], %o4 + sllx %o0, 32, %o0 + or %o5, %lo(0xf8003e0), %o5 + or %o0, %o1, %o0 + andn %o4, %o5, %o4 + fzero ZERO + st %o4, [%sp + 80] + stx %o0, [%sp + 72] + sllx %o2, 32, %o2 + fnegd ZERO, SIGN_BIT + ldd [%sp + 72], %f0 + ld [%sp + 80], %fsr + stx %o2, [%sp + 72] + fabsd %f0, %f14 + ldd [%sp + 72], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + retl + ld [%sp + 88], %fsr +END (__nearbyint) +weak_alias (__nearbyint, nearbyint) + +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __nearbyint, nearbyintl, GLIBC_2_1) +#endif diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyintf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyintf.S new file mode 100644 index 000000000..4225b5449 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_nearbyintf.S @@ -0,0 +1,64 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc32 v9 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyintf) + st %fsr, [%sp + 88] + st %o0, [%sp + 68] + sethi %hi(TWO_TWENTYTHREE), %o2 + sethi %hi(0xf8003e0), %o5 + ld [%sp + 88], %o4 + fzeros ZERO + or %o5, %lo(0xf8003e0), %o5 + fnegs ZERO, SIGN_BIT + andn %o4, %o5, %o4 + st %o4, [%sp + 80] + ld [%sp + 68], %f1 + ld [%sp + 80], %fsr + st %o2, [%sp + 68] + fabss %f1, %f14 + ld [%sp + 68], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + retl + ld [%sp + 88], %fsr +END (__nearbyintf) +weak_alias (__nearbyintf, nearbyintf) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_trunc.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_trunc.S new file mode 100644 index 000000000..c451d1d99 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_trunc.S @@ -0,0 +1,64 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc32 v9 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__trunc) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o0, 32, %o0 + or %o0, %o1, %o0 + fzero ZERO + stx %o0, [%sp + 72] + sllx %o2, 32, %o2 + fnegd ZERO, SIGN_BIT + ldd [%sp + 72], %f0 + stx %o2, [%sp + 72] + fabsd %f0, %f14 + ldd [%sp + 72], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f14 + fand %f0, SIGN_BIT, SIGN_BIT + fdtox %f14, %f14 + fxtod %f14, %f14 + faddd %f0, ZERO, %f18 + fmovduge %fcc3, %f18, %f14 + retl + for %f14, SIGN_BIT, %f0 +END (__trunc) +weak_alias (__trunc, trunc) + +#if LONG_DOUBLE_COMPAT(libm, GLIBC_2_1) +compat_symbol (libm, __trunc, truncl, GLIBC_2_1) +#endif diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_truncf.S b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_truncf.S new file mode 100644 index 000000000..4e6e25b26 --- /dev/null +++ b/libc/sysdeps/sparc/sparc32/sparcv9/fpu/s_truncf.S @@ -0,0 +1,56 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc32 v9 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__truncf) + st %o0, [%sp + 68] + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + ld [%sp + 68], %f1 + fnegs ZERO, SIGN_BIT + st %o2, [%sp + 68] + fabss %f1, %f14 + ld [%sp + 68], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f14 + fands %f1, SIGN_BIT, SIGN_BIT + fstoi %f14, %f14 + fitos %f14, %f14 + fadds %f1, ZERO, %f18 + fmovsuge %fcc3, %f18, %f14 + retl + fors %f14, SIGN_BIT, %f0 +END (__truncf) +weak_alias (__truncf, truncf) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/mul_1.S b/libc/sysdeps/sparc/sparc32/sparcv9/mul_1.S index 42284eada..e9a537196 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/mul_1.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/mul_1.S @@ -1 +1,70 @@ -#include <sparcv8/mul_1.S> +! SPARC v9 32-bit __mpn_mul_1 -- Multiply a limb vector with a single +! limb and store the product in a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define sz %o2 +#define s2_limb %o3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + +ENTRY(__mpn_mul_1) + srl sz, 0, sz + srl s2_limb, 0, s2_limb + subcc sz, 1, sz + be,pn %icc, .Lfinal_limb + clr carry + +.Lloop: + lduw [s1_ptr + 0x00], tmp1 + lduw [s1_ptr + 0x04], tmp2 + mulx tmp1, s2_limb, tmp3 + add s1_ptr, 8, s1_ptr + mulx tmp2, s2_limb, tmp4 + sub sz, 2, sz + add res_ptr, 8, res_ptr + add carry, tmp3, tmp3 + stw tmp3, [res_ptr - 0x08] + srlx tmp3, 32, carry + add carry, tmp4, tmp4 + stw tmp4, [res_ptr - 0x04] + brgz sz, .Lloop + srlx tmp4, 32, carry + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + lduw [s1_ptr + 0x00], tmp1 + mulx tmp1, s2_limb, tmp3 + add carry, tmp3, tmp3 + stw tmp3, [res_ptr + 0x00] + srlx tmp3, 32, carry + +.Lfinish: + retl + mov carry, %o0 +END(__mpn_mul_1) diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/submul_1.S b/libc/sysdeps/sparc/sparc32/sparcv9/submul_1.S index de69533f6..8985e2a4c 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/submul_1.S +++ b/libc/sysdeps/sparc/sparc32/sparcv9/submul_1.S @@ -1 +1,82 @@ -#include <sparcv8/submul_1.S> +! SPARC v9 32-bit __mpn_submul_1 -- Multiply a limb vector with a limb +! and subtract the result from a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz_arg %i2 +#define s2l_arg %i3 +#define sz %o4 +#define carry %o5 +#define s2_limb %g1 +#define tmp1 %l0 +#define tmp2 %l1 +#define tmp3 %l2 +#define tmp4 %l3 +#define tmp64_1 %g3 +#define tmp64_2 %o3 + +ENTRY(__mpn_submul_1) + save %sp, -96, %sp + srl sz_arg, 0, sz + srl s2l_arg, 0, s2_limb + subcc sz, 1, sz + be,pn %icc, .Lfinal_limb + subcc %g0, 0, carry + +.Lloop: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + lduw [s1_ptr + 0x04], tmp2 + lduw [res_ptr + 0x04], tmp4 + mulx tmp1, s2_limb, tmp64_1 + add s1_ptr, 8, s1_ptr + mulx tmp2, s2_limb, tmp64_2 + sub sz, 2, sz + add res_ptr, 8, res_ptr + addx carry, tmp64_1, tmp64_1 + srlx tmp64_1, 32, carry + subcc tmp3, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr - 0x08] + addx carry, tmp64_2, tmp64_2 + srlx tmp64_2, 32, carry + subcc tmp4, tmp64_2, tmp64_2 + brgz sz, .Lloop + stw tmp64_2, [res_ptr - 0x04] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + lduw [s1_ptr + 0x00], tmp1 + lduw [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp64_1 + addx carry, tmp64_1, tmp64_1 + srlx tmp64_1, 32, carry + subcc tmp3, tmp64_1, tmp64_1 + stw tmp64_1, [res_ptr + 0x00] + +.Lfinish: + addx carry, 0, carry + jmpl %i7 + 0x8, %g0 + restore carry, 0, %o0 +END(__mpn_submul_1) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/Makefile b/libc/sysdeps/sparc/sparc64/fpu/multiarch/Makefile index 83458484a..6a9274b4f 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/Makefile +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/Makefile @@ -1,4 +1,6 @@ ifeq ($(subdir),math) +libm-sysdep_routines += s_ceil-vis2 s_ceilf-vis2 \ + s_floor-vis2 s_floorf-vis2 ifeq ($(have-as-vis3),yes) libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 s_ceilf-vis3 \ s_ceil-vis3 m_finitef-vis3 m_finite-vis3 \ @@ -7,7 +9,8 @@ libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 s_ceilf-vis3 \ s_lrintf-vis3 s_lrint-vis3 s_rintf-vis3 \ s_rint-vis3 s_fminf-vis3 s_fmin-vis3 \ s_fmaxf-vis3 s_fmax-vis3 s_fmaf-vis3 \ - s_fma-vis3 + s_fma-vis3 s_nearbyint-vis3 s_nearbyintf-vis3 \ + s_truncf-vis3 s_trunc-vis3 sysdep_routines += s_signbitf-vis3 s_signbit-vis3 s_finitef-vis3 \ s_finite-vis3 s_isinff-vis3 s_isinf-vis3 \ s_isnanf-vis3 s_isnan-vis3 diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S new file mode 100644 index 000000000..50d96a6ef --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis2.S @@ -0,0 +1,57 @@ +/* ceil function, sparc64 vis2 version. + Copyright (C) 2012-2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__ceil_vis2) + sethi %hi(TWO_FIFTYTWO), %o2 + fzero ZERO + sllx %o2, 32, %o2 + fnegd ZERO, SIGN_BIT + stx %o2, [%sp + STACK_BIAS + 128] + fabsd %f0, %f14 + ldd [%sp + STACK_BIAS + 128], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 + faddd %f0, %f16, %f18 + siam (1 << 2) | 0 + fsubd %f18, %f16, %f18 + siam (0 << 2) + retl + for %f18, SIGN_BIT, %f0 +END (__ceil_vis2) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S index 9a598ea6f..6acff0999 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.S @@ -19,57 +19,41 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */ -#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ ENTRY (__ceil_vis3) sethi %hi(TWO_FIFTYTWO), %o2 - sethi %hi(ONE_DOT_ZERO), %o3 fzero ZERO - sllx %o2, 32, %o2 fnegd ZERO, SIGN_BIT - - sllx %o3, 32, %o3 movxtod %o2, %f16 fabsd %f0, %f14 - fcmpd %fcc3, %f14, %f16 - fmovduge %fcc3, ZERO, %f16 fand %f0, SIGN_BIT, SIGN_BIT - for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 faddd %f0, %f16, %f18 + siam (1 << 2) | 0 fsubd %f18, %f16, %f18 - fcmpd %fcc2, %f18, %f0 - movxtod %o3, %f20 - - fmovduge %fcc2, ZERO, %f20 - faddd %f18, %f20, %f0 - fabsd %f0, %f0 + siam (0 << 2) retl - for %f0, SIGN_BIT, %f0 + for %f18, SIGN_BIT, %f0 END (__ceil_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S index fa4c7c353..e7822bc30 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.S @@ -1,6 +1,6 @@ #include <sparc-ifunc.h> -SPARC_ASM_VIS3_IFUNC(ceil) +SPARC_ASM_VIS3_VIS2_IFUNC(ceil) weak_alias (__ceil, ceil) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S new file mode 100644 index 000000000..cd5937b38 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis2.S @@ -0,0 +1,56 @@ +/* Float ceil function, sparc64 vis2 version. + Copyright (C) 2012-2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__ceilf_vis2) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + fnegs ZERO, SIGN_BIT + st %o2, [%sp + STACK_BIAS + 128] + fabss %f1, %f14 + ld [%sp + STACK_BIAS + 128], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 + fadds %f1, %f16, %f5 + siam (1 << 2) | 0 + fsubs %f5, %f16, %f5 + siam (0 << 2) + retl + fors %f5, SIGN_BIT, %f0 +END (__ceilf_vis2) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S index 8c635907d..b3ec3484d 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.S @@ -19,55 +19,40 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ -#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ ENTRY (__ceilf_vis3) sethi %hi(TWO_TWENTYTHREE), %o2 - sethi %hi(ONE_DOT_ZERO), %o3 fzeros ZERO - fnegs ZERO, SIGN_BIT - movwtos %o2, %f16 fabss %f1, %f14 - fcmps %fcc3, %f14, %f16 - fmovsuge %fcc3, ZERO, %f16 fands %f1, SIGN_BIT, SIGN_BIT - fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 2 fadds %f1, %f16, %f5 + siam (1 << 2) | 0 fsubs %f5, %f16, %f5 - fcmps %fcc2, %f5, %f1 - movwtos %o3, %f9 - - fmovsuge %fcc2, ZERO, %f9 - fadds %f5, %f9, %f0 - fabss %f0, %f0 + siam (0 << 2) retl - fors %f0, SIGN_BIT, %f0 + fors %f5, SIGN_BIT, %f0 END (__ceilf_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S index 3047dd8fa..1c72a5728 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.S @@ -1,6 +1,6 @@ #include <sparc-ifunc.h> -SPARC_ASM_VIS3_IFUNC(ceilf) +SPARC_ASM_VIS3_VIS2_IFUNC(ceilf) weak_alias (__ceilf, ceilf) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S new file mode 100644 index 000000000..5479ceddf --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis2.S @@ -0,0 +1,57 @@ +/* floor function, sparc64 vis2 version. + Copyright (C) 2012-2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__floor_vis2) + sethi %hi(TWO_FIFTYTWO), %o2 + fzero ZERO + sllx %o2, 32, %o2 + fnegd ZERO, SIGN_BIT + stx %o2, [%sp + STACK_BIAS + 128] + fabsd %f0, %f14 + ldd [%sp + STACK_BIAS + 128], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 + faddd %f0, %f16, %f18 + siam (1 << 2) | 0 + fsubd %f18, %f16, %f18 + siam (0 << 2) + retl + for %f18, SIGN_BIT, %f0 +END (__floor_vis2) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S index 169d20654..c2ffe9f41 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.S @@ -19,57 +19,41 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_FIFTYTWO 0x43300000 /* 2**52 */ -#define ONE_DOT_ZERO 0x3ff00000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ ENTRY (__floor_vis3) sethi %hi(TWO_FIFTYTWO), %o2 - sethi %hi(ONE_DOT_ZERO), %o3 fzero ZERO - sllx %o2, 32, %o2 fnegd ZERO, SIGN_BIT - - sllx %o3, 32, %o3 movxtod %o2, %f16 fabsd %f0, %f14 - fcmpd %fcc3, %f14, %f16 - fmovduge %fcc3, ZERO, %f16 fand %f0, SIGN_BIT, SIGN_BIT - for %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 faddd %f0, %f16, %f18 + siam (1 << 2) | 0 fsubd %f18, %f16, %f18 - fcmpd %fcc2, %f18, %f0 - movxtod %o3, %f20 - - fmovdule %fcc2, ZERO, %f20 - fsubd %f18, %f20, %f0 - fabsd %f0, %f0 + siam (0 << 2) retl - for %f0, SIGN_BIT, %f0 + for %f18, SIGN_BIT, %f0 END (__floor_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S index 6ae9947a7..989ccab56 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.S @@ -1,6 +1,6 @@ #include <sparc-ifunc.h> -SPARC_ASM_VIS3_IFUNC(floor) +SPARC_ASM_VIS3_VIS2_IFUNC(floor) weak_alias (__floor, floor) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S new file mode 100644 index 000000000..935fa853d --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis2.S @@ -0,0 +1,56 @@ +/* Float floor function, sparc64 vis2 version. + Copyright (C) 2012-2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. + + We add then subtract (or subtract than add if the initial + value was negative) 2**23 to the value, then subtract it + back out. + + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__floorf_vis2) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + fnegs ZERO, SIGN_BIT + st %o2, [%sp + STACK_BIAS + 128] + fabss %f1, %f14 + ld [%sp + STACK_BIAS + 128], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 + fadds %f1, %f16, %f5 + siam (1 << 2) | 0 + fsubs %f5, %f16, %f5 + siam (0 << 2) + retl + fors %f5, SIGN_BIT, %f0 +END (__floorf_vis2) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S index 65be2977a..225e17ec0 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.S @@ -19,55 +19,40 @@ #include <sysdep.h> - /* Since changing the rounding mode is extremely expensive, we - try to round up using a method that is rounding mode - agnostic. + /* 'siam' (Set Interval Arithmetic Mode) is used to quickly override + the rounding mode during this routine. We add then subtract (or subtract than add if the initial value was negative) 2**23 to the value, then subtract it back out. - This will clear out the fractional portion of the value. - One of two things will happen for non-whole initial values. - Either the rounding mode will round it up, or it will be - rounded down. If the value started out whole, it will be - equal after the addition and subtraction. This means we - can accurately detect with one test whether we need to add - another 1.0 to round it up properly. + This will clear out the fractional portion of the value and, + with suitable 'siam' initiated rouding mode settings, round + the final result in the proper direction. - VIS instructions are used to facilitate the formation of - easier constants, and the propagation of the sign bit. */ + We also use VIS3 moves to avoid using the stack to transfer + values between float and integer registers. */ #define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ -#define ONE_DOT_ZERO 0x3f800000 /* 1.0 */ #define ZERO %f10 /* 0.0 */ #define SIGN_BIT %f12 /* -0.0 */ ENTRY (__floorf_vis3) sethi %hi(TWO_TWENTYTHREE), %o2 - sethi %hi(ONE_DOT_ZERO), %o3 fzeros ZERO - fnegs ZERO, SIGN_BIT - - movwtos %o2, %f16 + movwtos %o2, %f16 fabss %f1, %f14 - fcmps %fcc3, %f14, %f16 - fmovsuge %fcc3, ZERO, %f16 fands %f1, SIGN_BIT, SIGN_BIT - fors %f16, SIGN_BIT, %f16 + siam (1 << 2) | 3 fadds %f1, %f16, %f5 + siam (1 << 2) | 0 fsubs %f5, %f16, %f5 - fcmps %fcc2, %f5, %f1 - movwtos %o3, %f9 - - fmovsule %fcc2, ZERO, %f9 - fsubs %f5, %f9, %f0 - fabss %f0, %f0 + siam (0 << 2) retl - fors %f0, SIGN_BIT, %f0 + fors %f5, SIGN_BIT, %f0 END (__floorf_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S index 31cda385b..d2a83cb9b 100644 --- a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.S @@ -1,6 +1,6 @@ #include <sparc-ifunc.h> -SPARC_ASM_VIS3_IFUNC(floorf) +SPARC_ASM_VIS3_VIS2_IFUNC(floorf) weak_alias (__floorf, floorf) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint-vis3.S new file mode 100644 index 000000000..f2071d66c --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint-vis3.S @@ -0,0 +1,61 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyint_vis3) + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + sethi %hi(0xf8003e0), %o5 + fzero ZERO + or %o5, %lo(0xf8003e0), %o5 + fnegd ZERO, SIGN_BIT + andn %o4, %o5, %o4 + movxtod %o2, %f16 + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabsd %f0, %f14 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyint_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint.S new file mode 100644 index 000000000..bb75ab360 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(nearbyint) + +weak_alias (__nearbyint, nearbyint) + +# undef weak_alias +# define weak_alias(a, b) + +#define __nearbyint __nearbyint_generic + +#include "../s_nearbyint.S" diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf-vis3.S new file mode 100644 index 000000000..b08928f6f --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf-vis3.S @@ -0,0 +1,60 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyintf_vis3) + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(0xf8003e0), %o5 + sethi %hi(TWO_TWENTYTHREE), %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + or %o5, %lo(0xf8003e0), %o5 + fzeros ZERO + andn %o4, %o5, %o4 + fnegs ZERO, SIGN_BIT + movwtos %o2, %f16 + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabss %f1, %f14 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyintf_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf.S new file mode 100644 index 000000000..95100c1bf --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __nearbyintf __nearbyintf_generic + +#include "../s_nearbyintf.S" diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc-vis3.S new file mode 100644 index 000000000..34ff42da8 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc-vis3.S @@ -0,0 +1,53 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc64 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__trunc_vis3) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + fzero ZERO + fnegd ZERO, SIGN_BIT + movxtod %o2, %f16 + fabsd %f0, %f14 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f14 + fand %f0, SIGN_BIT, SIGN_BIT + fdtox %f14, %f14 + fxtod %f14, %f14 + faddd %f0, ZERO, %f18 + fmovduge %fcc3, %f18, %f14 + retl + for %f14, SIGN_BIT, %f0 +END (__trunc_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc.S new file mode 100644 index 000000000..0d6f43a18 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(trunc) + +weak_alias (__trunc, trunc) + +# undef weak_alias +# define weak_alias(a, b) + +#define __trunc __trunc_generic + +#include "../s_trunc.S" diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf-vis3.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf-vis3.S new file mode 100644 index 000000000..e566b6ba8 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf-vis3.S @@ -0,0 +1,52 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc64 vis3 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__truncf_vis3) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + fnegs ZERO, SIGN_BIT + movwtos %o2,%f16 + fabss %f1, %f14 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f14 + fands %f1, SIGN_BIT, SIGN_BIT + fstoi %f14, %f14 + fitos %f14, %f14 + fadds %f1, ZERO, %f18 + fmovsuge %fcc3, %f18, %f14 + retl + fors %f14, SIGN_BIT, %f0 +END (__truncf_vis3) diff --git a/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf.S b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf.S new file mode 100644 index 000000000..2ca251733 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(truncf) + +weak_alias (__truncf, truncf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __truncf __truncf_generic + +#include "../s_truncf.S" diff --git a/libc/sysdeps/sparc/sparc64/fpu/s_fdim.S b/libc/sysdeps/sparc/sparc64/fpu/s_fdim.S new file mode 100644 index 000000000..a1c53dfb1 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/s_fdim.S @@ -0,0 +1,32 @@ +/* Compute positive difference, sparc 64-bit. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <math_ldbl_opt.h> + +ENTRY(__fdim) + fcmpd %f0, %f2 + fbug 1f + nop + fzero %f0 + fnegd %f0, %f2 +1: retl + fsubd %f0, %f2, %f0 +END(__fdim) +weak_alias (__fdim, fdim) diff --git a/libc/sysdeps/sparc/sparc64/fpu/s_fdimf.S b/libc/sysdeps/sparc/sparc64/fpu/s_fdimf.S new file mode 100644 index 000000000..61782a5f3 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/s_fdimf.S @@ -0,0 +1,31 @@ +/* Compute positive difference, sparc 64-bit. + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__fdimf) + fcmps %f1, %f3 + fbug 1f + nop + fzeros %f1 + fnegs %f1, %f3 +1: retl + fsubs %f1, %f3, %f0 +END(__fdimf) +weak_alias (__fdimf, fdimf) diff --git a/libc/sysdeps/sparc/sparc64/fpu/s_nearbyint.S b/libc/sysdeps/sparc/sparc64/fpu/s_nearbyint.S new file mode 100644 index 000000000..963e4bc7b --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/s_nearbyint.S @@ -0,0 +1,63 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyint) + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + sethi %hi(0xf8003e0), %o5 + fzero ZERO + or %o5, %lo(0xf8003e0), %o5 + fnegd ZERO, SIGN_BIT + andn %o4, %o5, %o4 + stx %o2, [%sp + STACK_BIAS + 128] + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabsd %f0, %f14 + ldd [%sp + STACK_BIAS + 128], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyint) +weak_alias (__nearbyint, nearbyint) diff --git a/libc/sysdeps/sparc/sparc64/fpu/s_nearbyintf.S b/libc/sysdeps/sparc/sparc64/fpu/s_nearbyintf.S new file mode 100644 index 000000000..4ff29058e --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/s_nearbyintf.S @@ -0,0 +1,62 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyintf) + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(0xf8003e0), %o5 + sethi %hi(TWO_TWENTYTHREE), %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + or %o5, %lo(0xf8003e0), %o5 + fzeros ZERO + andn %o4, %o5, %o4 + fnegs ZERO, SIGN_BIT + st %o2, [%sp + STACK_BIAS + 128] + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabss %f1, %f14 + ld [%sp + STACK_BIAS + 128], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyintf) +weak_alias (__nearbyintf, nearbyintf) diff --git a/libc/sysdeps/sparc/sparc64/fpu/s_trunc.S b/libc/sysdeps/sparc/sparc64/fpu/s_trunc.S new file mode 100644 index 000000000..13d47eb97 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/s_trunc.S @@ -0,0 +1,55 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc64 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__trunc) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + fzero ZERO + fnegd ZERO, SIGN_BIT + stx %o2, [%sp + STACK_BIAS + 128] + fabsd %f0, %f14 + ldd [%sp + STACK_BIAS + 128], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f14 + fand %f0, SIGN_BIT, SIGN_BIT + fdtox %f14, %f14 + fxtod %f14, %f14 + faddd %f0, ZERO, %f18 + fmovduge %fcc3, %f18, %f14 + retl + for %f14, SIGN_BIT, %f0 +END (__trunc) +weak_alias (__trunc, trunc) diff --git a/libc/sysdeps/sparc/sparc64/fpu/s_truncf.S b/libc/sysdeps/sparc/sparc64/fpu/s_truncf.S new file mode 100644 index 000000000..e25a1f595 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/fpu/s_truncf.S @@ -0,0 +1,54 @@ +/* Truncate argument to nearest integral value not larger than + the argument, sparc64 version. + + Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__truncf) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + fnegs ZERO, SIGN_BIT + st %o2, [%sp + STACK_BIAS + 128] + fabss %f1, %f14 + ld [%sp + STACK_BIAS + 128], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f14 + fands %f1, SIGN_BIT, SIGN_BIT + fstoi %f14, %f14 + fitos %f14, %f14 + fadds %f1, ZERO, %f18 + fmovsuge %fcc3, %f18, %f14 + retl + fors %f14, SIGN_BIT, %f0 +END (__truncf) +weak_alias (__truncf, truncf) diff --git a/libc/sysdeps/sparc/sparc64/multiarch/Makefile b/libc/sysdeps/sparc/sparc64/multiarch/Makefile index 4ad7aff91..55b757f9a 100644 --- a/libc/sysdeps/sparc/sparc64/multiarch/Makefile +++ b/libc/sysdeps/sparc/sparc64/multiarch/Makefile @@ -10,3 +10,12 @@ ifeq ($(subdir),string) sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ memset-niagara1 memcpy-niagara4 memset-niagara4 endif + +ifeq ($(subdir),stdlib) +sysdep_routines += mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 sub_n-vis3 +endif + +ifeq ($(subdir),math) +gmp-sysdep_routines = mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 \ + sub_n-vis3 +endif diff --git a/libc/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S b/libc/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S new file mode 100644 index 000000000..185f31169 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S @@ -0,0 +1,67 @@ +! SPARC v9 64-bit VIS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and +! store sum in a third limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define sz %o3 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_add_n_vis3) + subcc sz, 1, sz + be .Lfinal_limb + cmp %g0, 0 + +.Lloop: + ldx [s2_ptr + 0x00], tmp1 + add s2_ptr, 0x10, s2_ptr + ldx [s1_ptr + 0x00], tmp2 + add s1_ptr, 0x10, s1_ptr + ldx [s2_ptr - 0x08], tmp3 + add res_ptr, 0x10, res_ptr + ldx [s1_ptr - 0x08], tmp4 + sub sz, 2, sz + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr - 0x10] + addxccc tmp3, tmp4, tmp3 + brgz sz, .Lloop + stx tmp3, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s2_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x00], tmp2 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr + 0x00] + +.Lfinish: + retl + addxc %g0, %g0, %o0 +END(__mpn_add_n_vis3) diff --git a/libc/sysdeps/sparc/sparc64/multiarch/add_n.S b/libc/sysdeps/sparc/sparc64/multiarch/add_n.S new file mode 100644 index 000000000..25cae3977 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/add_n.S @@ -0,0 +1,56 @@ +/* Multiple versions of add_n + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_add_n) + .type __mpn_add_n, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_add_n_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_add_n_vis3), %o1 +# else + set __mpn_add_n_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_add_n_generic), %o1 + xor %o1, %gdop_lox10(__mpn_add_n_generic), %o1 +# else + set __mpn_add_n_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_add_n) + +#define __mpn_add_n __mpn_add_n_generic +#include "../add_n.S" diff --git a/libc/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S b/libc/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S new file mode 100644 index 000000000..f955b27c0 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S @@ -0,0 +1,87 @@ +! SPARC v9 64-bit VIS3 __mpn_addmul_1 -- Multiply a limb vector with a +! limb and add the result to a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz %i2 +#define s2_limb %i3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 +#define tmp5 %l0 +#define tmp6 %l1 +#define tmp7 %l2 +#define tmp8 %l3 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_addmul_1_vis3) + save %sp, -176, %sp + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + ldx [s1_ptr + 0x08], tmp2 + ldx [res_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp5 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp6 + add res_ptr, 0x10, res_ptr + mulx tmp2, s2_limb, tmp7 + sub sz, 2, sz + umulxhi tmp2, s2_limb, tmp8 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + addcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr - 0x10] + addcc carry, tmp7, tmp7 + addxc %g0, tmp8, carry + addcc tmp4, tmp7, tmp7 + addxc %g0, carry, carry + brgz sz, .Lloop + stx tmp7, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp5 + umulxhi tmp1, s2_limb, tmp6 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + addcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr + 0x00] + +.Lfinish: + jmpl %i7 + 8, %g0 + restore carry, 0, %o0 +END(__mpn_addmul_1_vis3) diff --git a/libc/sysdeps/sparc/sparc64/multiarch/addmul_1.S b/libc/sysdeps/sparc/sparc64/multiarch/addmul_1.S new file mode 100644 index 000000000..a1659e40e --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/addmul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of addmul_1 + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_addmul_1) + .type __mpn_addmul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_addmul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_addmul_1_vis3), %o1 +# else + set __mpn_addmul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_addmul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_addmul_1_generic), %o1 +# else + set __mpn_addmul_1_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_addmul_1) + +#define __mpn_addmul_1 __mpn_addmul_1_generic +#include "../addmul_1.S" diff --git a/libc/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S b/libc/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S new file mode 100644 index 000000000..61fbe27a4 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S @@ -0,0 +1,73 @@ +! SPARC v9 64-bit VIS3 __mpn_mul_1 -- Multiply a limb vector with a single +! limb and store the product in a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define sz %o2 +#define s2_limb %o3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_mul_1_vis3) + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp3 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp2 + sub sz, 2, sz + mulx tmp4, s2_limb, tmp1 + add res_ptr, 0x10, res_ptr + umulxhi tmp4, s2_limb, tmp4 + addcc carry, tmp3, tmp3 + stx tmp3, [res_ptr - 0x10] + addxc %g0, tmp2, carry + addcc carry, tmp1, tmp1 + addxc %g0, tmp4, carry + brgz sz, .Lloop + stx tmp1, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + mulx tmp1, s2_limb, tmp3 + umulxhi tmp1, s2_limb, tmp2 + addcc carry, tmp3, tmp3 + addxc %g0, tmp2, carry + stx tmp3, [res_ptr + 0x00] + +.Lfinish: + retl + mov carry, %o0 +END(__mpn_mul_1_vis3) diff --git a/libc/sysdeps/sparc/sparc64/multiarch/mul_1.S b/libc/sysdeps/sparc/sparc64/multiarch/mul_1.S new file mode 100644 index 000000000..25f51bf81 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/mul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of mul_1 + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_mul_1) + .type __mpn_mul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_mul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_mul_1_vis3), %o1 +# else + set __mpn_mul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_mul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_mul_1_generic), %o1 +# else + set __mpn_mul_1_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_mul_1) + +#define __mpn_mul_1 __mpn_mul_1_generic +#include "../mul_1.S" diff --git a/libc/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S b/libc/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S new file mode 100644 index 000000000..4e9a786d3 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S @@ -0,0 +1,71 @@ +! SPARC v9 64-bit VIS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 +! and store difference in a third limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define sz %o3 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_sub_n_vis3) + subcc sz, 1, sz + be .Lfinal_limb + cmp %g0, 1 + +.Lloop: + ldx [s2_ptr + 0x00], tmp1 + add s2_ptr, 0x10, s2_ptr + ldx [s1_ptr + 0x00], tmp2 + add s1_ptr, 0x10, s1_ptr + ldx [s2_ptr - 0x08], tmp3 + add res_ptr, 0x10, res_ptr + ldx [s1_ptr - 0x08], tmp4 + sub sz, 2, sz + xnor tmp1, %g0, tmp1 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr - 0x10] + xnor tmp3, %g0, tmp3 + addxccc tmp3, tmp4, tmp3 + brgz sz, .Lloop + stx tmp3, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s2_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x00], tmp2 + xnor tmp1, %g0, tmp1 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr + 0x00] + +.Lfinish: + clr %o0 + retl + movcc %xcc, 1, %o0 +END(__mpn_sub_n_vis3) diff --git a/libc/sysdeps/sparc/sparc64/multiarch/sub_n.S b/libc/sysdeps/sparc/sparc64/multiarch/sub_n.S new file mode 100644 index 000000000..5e15bea10 --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/sub_n.S @@ -0,0 +1,56 @@ +/* Multiple versions of sub_n + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_sub_n) + .type __mpn_sub_n, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_sub_n_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_sub_n_vis3), %o1 +# else + set __mpn_sub_n_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_sub_n_generic), %o1 + xor %o1, %gdop_lox10(__mpn_sub_n_generic), %o1 +# else + set __mpn_sub_n_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_sub_n) + +#define __mpn_sub_n __mpn_sub_n_generic +#include "../sub_n.S" diff --git a/libc/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S b/libc/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S new file mode 100644 index 000000000..8f10f918a --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S @@ -0,0 +1,87 @@ +! SPARC v9 64-bit VIS3 __mpn_submul_1 -- Multiply a limb vector with a +! limb and subtract the result from a second limb vector. +! +! Copyright (C) 2013 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz %i2 +#define s2_limb %i3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 +#define tmp5 %l0 +#define tmp6 %l1 +#define tmp7 %l2 +#define tmp8 %l3 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_submul_1_vis3) + save %sp, -176, %sp + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + ldx [s1_ptr + 0x08], tmp2 + ldx [res_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp5 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp6 + add res_ptr, 0x10, res_ptr + mulx tmp2, s2_limb, tmp7 + sub sz, 2, sz + umulxhi tmp2, s2_limb, tmp8 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + subcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr - 0x10] + addcc carry, tmp7, tmp7 + addxc %g0, tmp8, carry + subcc tmp4, tmp7, tmp7 + addxc %g0, carry, carry + brgz sz, .Lloop + stx tmp7, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp5 + umulxhi tmp1, s2_limb, tmp6 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + subcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr + 0x00] + +.Lfinish: + jmpl %i7 + 8, %g0 + restore carry, 0, %o0 +END(__mpn_submul_1_vis3) diff --git a/libc/sysdeps/sparc/sparc64/multiarch/submul_1.S b/libc/sysdeps/sparc/sparc64/multiarch/submul_1.S new file mode 100644 index 000000000..68552e9af --- /dev/null +++ b/libc/sysdeps/sparc/sparc64/multiarch/submul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of submul_1 + + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_submul_1) + .type __mpn_submul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_submul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_submul_1_vis3), %o1 +# else + set __mpn_submul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_submul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_submul_1_generic), %o1 +# else + set __mpn_submul_1_vis3, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_submul_1) + +#define __mpn_submul_1 __mpn_submul_1_generic +#include "../submul_1.S" diff --git a/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h b/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h index a8bb09075..b5929bd29 100644 --- a/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h +++ b/libc/sysdeps/unix/sysv/linux/bits/fcntl-linux.h @@ -296,6 +296,13 @@ struct f_owner_ex # define SPLICE_F_GIFT 8 /* Pages passed in are a gift. */ +/* Flags for fallocate. */ +# define FALLOC_FL_KEEP_SIZE 1 /* Don't extend size of file + even if offset + len is + greater than file size. */ +# define FALLOC_FL_PUNCH_HOLE 2 /* Create a hole in the file. */ + + /* File handle structure. */ struct file_handle { diff --git a/libc/sysdeps/unix/sysv/linux/bits/socket.h b/libc/sysdeps/unix/sysv/linux/bits/socket.h index 25b115e50..eadd7d932 100644 --- a/libc/sysdeps/unix/sysv/linux/bits/socket.h +++ b/libc/sysdeps/unix/sysv/linux/bits/socket.h @@ -207,6 +207,8 @@ enum #define MSG_MORE MSG_MORE MSG_WAITFORONE = 0x10000, /* Wait for at least one packet to return.*/ #define MSG_WAITFORONE MSG_WAITFORONE + MSG_FASTOPEN = 0x20000000, /* Send data in TCP SYN. */ +#define MSG_FASTOPEN MSG_FASTOPEN MSG_CMSG_CLOEXEC = 0x40000000 /* Set close_on_exit for file descriptor received through diff --git a/libc/sysdeps/unix/sysv/linux/malloc-sysdep.h b/libc/sysdeps/unix/sysv/linux/malloc-sysdep.h index 0a876dfa6..737ca0e65 100644 --- a/libc/sysdeps/unix/sysv/linux/malloc-sysdep.h +++ b/libc/sysdeps/unix/sysv/linux/malloc-sysdep.h @@ -55,3 +55,5 @@ check_may_shrink_heap (void) return may_shrink_heap; } + +#define HAVE_MREMAP 1 diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/Versions b/libc/sysdeps/unix/sysv/linux/powerpc/Versions index 1ef53b9e9..396a4236c 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/Versions +++ b/libc/sysdeps/unix/sysv/linux/powerpc/Versions @@ -3,5 +3,6 @@ libc { __vdso_get_tbfreq; __vdso_clock_gettime; __vdso_clock_getres; + __vdso_getcpu; } } diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h b/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h index 6f79841ce..545fda462 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h @@ -30,6 +30,8 @@ extern void *__vdso_clock_getres; extern void *__vdso_get_tbfreq; +extern void *__vdso_getcpu; + #endif #endif /* _LIBC_VDSO_H */ diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c b/libc/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c index 616342aec..5e88b83b5 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c +++ b/libc/sysdeps/unix/sysv/linux/powerpc/get_clockfreq.c @@ -41,7 +41,8 @@ __get_clockfreq (void) /* If we can use the vDSO to obtain the timebase even better. */ #ifdef SHARED INTERNAL_SYSCALL_DECL (err); - timebase_freq = INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, 0); + timebase_freq = + INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK (get_tbfreq, err, hp_timing_t, 0); if (INTERNAL_SYSCALL_ERROR_P (timebase_freq, err) && INTERNAL_SYSCALL_ERRNO (timebase_freq, err) == ENOSYS) #endif diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/init-first.c b/libc/sysdeps/unix/sysv/linux/powerpc/init-first.c index 5202e7d2f..204c0c60a 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/init-first.c +++ b/libc/sysdeps/unix/sysv/linux/powerpc/init-first.c @@ -27,6 +27,7 @@ void *__vdso_gettimeofday attribute_hidden; void *__vdso_clock_gettime; void *__vdso_clock_getres; void *__vdso_get_tbfreq; +void *__vdso_getcpu; static inline void @@ -40,7 +41,9 @@ _libc_vdso_platform_setup (void) __vdso_clock_getres = _dl_vdso_vsym ("__kernel_clock_getres", &linux2615); - __vdso_get_tbfreq = _dl_vdso_vsym ("__kernel_vdso_get_tbfreq", &linux2615); + __vdso_get_tbfreq = _dl_vdso_vsym ("__kernel_get_tbfreq", &linux2615); + + __vdso_getcpu = _dl_vdso_vsym ("__kernel_getcpu", &linux2615); } # define VDSO_SETUP _libc_vdso_platform_setup diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h index e047bf7bd..250f4fc8c 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h @@ -60,7 +60,8 @@ \ if (__vdso_##name != NULL) \ { \ - sc_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, sc_err, nr, ##args); \ + sc_ret = \ + INTERNAL_VSYSCALL_NCS (__vdso_##name, sc_err, long int, nr, ##args);\ if (!INTERNAL_SYSCALL_ERROR_P (sc_ret, sc_err)) \ goto out; \ if (INTERNAL_SYSCALL_ERRNO (sc_ret, sc_err) != ENOSYS) \ @@ -90,7 +91,8 @@ \ if (__vdso_##name != NULL) \ { \ - v_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, err, nr, ##args); \ + v_ret = \ + INTERNAL_VSYSCALL_NCS (__vdso_##name, err, long int, nr, ##args); \ if (!INTERNAL_SYSCALL_ERROR_P (v_ret, err) \ || INTERNAL_SYSCALL_ERRNO (v_ret, err) != ENOSYS) \ goto out; \ @@ -104,12 +106,12 @@ INTERNAL_SYSCALL (name, err, nr, ##args) # endif -# define INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK(name, err, nr, args...) \ +# define INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK(name, err, type, nr, args...) \ ({ \ - long int sc_ret = ENOSYS; \ + type sc_ret = ENOSYS; \ \ if (__vdso_##name != NULL) \ - sc_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, err, nr, ##args); \ + sc_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, err, type, nr, ##args); \ else \ err = 1 << 28; \ sc_ret; \ @@ -126,7 +128,7 @@ function call, with the exception of LR (which is needed for the "sc; bnslr+" sequence) and CR (where only CR0.SO is clobbered to signal an error return status). */ -# define INTERNAL_VSYSCALL_NCS(funcptr, err, nr, args...) \ +# define INTERNAL_VSYSCALL_NCS(funcptr, err, type, nr, args...) \ ({ \ register void *r0 __asm__ ("r0"); \ register long int r3 __asm__ ("r3"); \ @@ -139,18 +141,18 @@ register long int r10 __asm__ ("r10"); \ register long int r11 __asm__ ("r11"); \ register long int r12 __asm__ ("r12"); \ + register type rval __asm__ ("r3"); \ LOADARGS_##nr (funcptr, args); \ __asm__ __volatile__ \ ("mtctr %0\n\t" \ "bctrl\n\t" \ "mfcr %0" \ - : "=&r" (r0), \ - "=&r" (r3), "=&r" (r4), "=&r" (r5), "=&r" (r6), "=&r" (r7), \ - "=&r" (r8), "=&r" (r9), "=&r" (r10), "=&r" (r11), "=&r" (r12) \ - : ASM_INPUT_##nr \ - : "cr0", "ctr", "lr", "memory"); \ + : "+r" (r0), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), \ + "+r" (r8), "+r" (r9), "+r" (r10), "+r" (r11), "+r" (r12) \ + : : "cr0", "ctr", "lr", "memory"); \ err = (long int) r0; \ - (int) r3; \ + __asm__ __volatile__ ("" : "=r" (rval) : "r" (r3), "r" (r4)); \ + rval; \ }) # undef INLINE_SYSCALL @@ -191,7 +193,7 @@ register long int r10 __asm__ ("r10"); \ register long int r11 __asm__ ("r11"); \ register long int r12 __asm__ ("r12"); \ - LOADARGS_##nr(name, args); \ + LOADARGS_##nr(name, args); \ __asm__ __volatile__ \ ("sc \n\t" \ "mfcr %0" \ diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h index 1f0c3a225..6ebab742c 100644 --- a/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h +++ b/libc/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h @@ -75,7 +75,8 @@ \ if (__vdso_##name != NULL) \ { \ - sc_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, sc_err, nr, ##args); \ + sc_ret = \ + INTERNAL_VSYSCALL_NCS (__vdso_##name, sc_err, long int, nr, ##args);\ if (!INTERNAL_SYSCALL_ERROR_P (sc_ret, sc_err)) \ goto out; \ if (INTERNAL_SYSCALL_ERRNO (sc_ret, sc_err) != ENOSYS) \ @@ -105,7 +106,8 @@ \ if (__vdso_##name != NULL) \ { \ - v_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, err, nr, ##args); \ + v_ret = \ + INTERNAL_VSYSCALL_NCS (__vdso_##name, err, long int, nr, ##args); \ if (!INTERNAL_SYSCALL_ERROR_P (v_ret, err) \ || INTERNAL_SYSCALL_ERRNO (v_ret, err) != ENOSYS) \ goto out; \ @@ -121,12 +123,12 @@ /* This version is for internal uses when there is no desire to set errno */ -#define INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK(name, err, nr, args...) \ +#define INTERNAL_VSYSCALL_NO_SYSCALL_FALLBACK(name, err, type, nr, args...) \ ({ \ - long int sc_ret = ENOSYS; \ + type sc_ret = ENOSYS; \ \ if (__vdso_##name != NULL) \ - sc_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, err, nr, ##args); \ + sc_ret = INTERNAL_VSYSCALL_NCS (__vdso_##name, err, type, nr, ##args); \ else \ err = 1 << 28; \ sc_ret; \ @@ -142,7 +144,7 @@ gave back in the non-error (CR0.SO cleared) case, otherwise (CR0.SO set) the negation of the return value in the kernel gets reverted. */ -#define INTERNAL_VSYSCALL_NCS(funcptr, err, nr, args...) \ +#define INTERNAL_VSYSCALL_NCS(funcptr, err, type, nr, args...) \ ({ \ register void *r0 __asm__ ("r0"); \ register long int r3 __asm__ ("r3"); \ @@ -151,20 +153,19 @@ register long int r6 __asm__ ("r6"); \ register long int r7 __asm__ ("r7"); \ register long int r8 __asm__ ("r8"); \ + register type rval __asm__ ("r3"); \ LOADARGS_##nr (funcptr, args); \ __asm__ __volatile__ \ ("mtctr %0\n\t" \ "bctrl\n\t" \ "mfcr %0\n\t" \ "0:" \ - : "=&r" (r0), \ - "=&r" (r3), "=&r" (r4), "=&r" (r5), \ - "=&r" (r6), "=&r" (r7), "=&r" (r8) \ - : ASM_INPUT_##nr \ - : "r9", "r10", "r11", "r12", \ - "cr0", "ctr", "lr", "memory"); \ - err = (long int) r0; \ - r3; \ + : "+r" (r0), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), \ + "+r" (r7), "+r" (r8) \ + : : "r9", "r10", "r11", "r12", "cr0", "ctr", "lr", "memory"); \ + err = (long int) r0; \ + __asm__ __volatile__ ("" : "=r" (rval) : "r" (r3)); \ + rval; \ }) #undef INLINE_SYSCALL diff --git a/libc/sysdeps/unix/sysv/linux/powerpc/sched_getcpu.c b/libc/sysdeps/unix/sysv/linux/powerpc/sched_getcpu.c new file mode 100644 index 000000000..617e6f121 --- /dev/null +++ b/libc/sysdeps/unix/sysv/linux/powerpc/sched_getcpu.c @@ -0,0 +1,30 @@ +/* Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sched.h> +#include <sysdep.h> +#include <bits/libc-vdso.h> + + +int +sched_getcpu (void) +{ + unsigned int cpu; + int r = INLINE_VSYSCALL (getcpu, 3, &cpu, NULL, NULL); + + return r == -1 ? r : cpu; +} diff --git a/libc/sysdeps/unix/sysv/linux/scsi/sg.h b/libc/sysdeps/unix/sysv/linux/scsi/sg.h index 9cad76ebf..68f57f29f 100644 --- a/libc/sysdeps/unix/sysv/linux/scsi/sg.h +++ b/libc/sysdeps/unix/sysv/linux/scsi/sg.h @@ -26,6 +26,8 @@ #define _SCSI_SG_H 1 #include <features.h> +#define __need_size_t +#include <stddef.h> /* New interface introduced in the 3.x SG drivers follows */ diff --git a/libc/sysdeps/unix/sysv/linux/sparc/bits/ipc.h b/libc/sysdeps/unix/sysv/linux/sparc/bits/ipc.h index 757d0472b..e59f96abc 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/bits/ipc.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/bits/ipc.h @@ -56,6 +56,6 @@ struct ipc_perm unsigned short int __pad1; #endif unsigned short int __seq; /* Sequence number. */ - unsigned long long int __unused1; - unsigned long long int __unused2; + __extension__ unsigned long long int __unused1; + __extension__ unsigned long long int __unused2; }; diff --git a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/unix/sysv/linux/multiarch/Implies b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/fpu/multiarch/Implies index a380d8a73..a380d8a73 100644 --- a/libc/sysdeps/sparc/sparc32/sparcv9/fpu/unix/sysv/linux/multiarch/Implies +++ b/libc/sysdeps/unix/sysv/linux/sparc/sparc32/sparcv9/fpu/multiarch/Implies diff --git a/libc/sysdeps/unix/sysv/linux/sparc/sys/ucontext.h b/libc/sysdeps/unix/sysv/linux/sparc/sys/ucontext.h index 74c729166..1a3d4b54a 100644 --- a/libc/sysdeps/unix/sysv/linux/sparc/sys/ucontext.h +++ b/libc/sysdeps/unix/sysv/linux/sparc/sys/ucontext.h @@ -217,7 +217,7 @@ typedef struct fpu typedef struct fpu { union { /* FPU floating point regs */ - unsigned long long fpu_regs[32]; /* 32 singles */ + __extension__ unsigned long long fpu_regs[32]; /* 32 singles */ double fpu_dregs[16]; /* 16 doubles */ } fpu_fr; struct fq *fpu_q; /* ptr to array of FQ entries */ diff --git a/libc/sysdeps/unix/sysv/linux/sys/sysmacros.h b/libc/sysdeps/unix/sysv/linux/sys/sysmacros.h index 76eaf9f9d..a1f1b2697 100644 --- a/libc/sysdeps/unix/sysv/linux/sys/sysmacros.h +++ b/libc/sysdeps/unix/sysv/linux/sys/sysmacros.h @@ -21,10 +21,6 @@ #include <features.h> -/* If the compiler does not know long long it is out of luck. We are - not going to hack weird hacks to support the dev_t representation - they need. */ -#ifdef __GLIBC_HAVE_LONG_LONG __BEGIN_DECLS __extension__ @@ -38,7 +34,7 @@ extern unsigned long long int gnu_dev_makedev (unsigned int __major, unsigned int __minor) __THROW __attribute_const__; -# ifdef __USE_EXTERN_INLINES +#ifdef __USE_EXTERN_INLINES __extension__ __extern_inline __attribute_const__ unsigned int __NTH (gnu_dev_major (unsigned long long int __dev)) { @@ -58,13 +54,12 @@ __NTH (gnu_dev_makedev (unsigned int __major, unsigned int __minor)) | (((unsigned long long int) (__minor & ~0xff)) << 12) | (((unsigned long long int) (__major & ~0xfff)) << 32)); } -# endif +#endif __END_DECLS /* Access the functions with their traditional names. */ -# define major(dev) gnu_dev_major (dev) -# define minor(dev) gnu_dev_minor (dev) -# define makedev(maj, min) gnu_dev_makedev (maj, min) -#endif +#define major(dev) gnu_dev_major (dev) +#define minor(dev) gnu_dev_minor (dev) +#define makedev(maj, min) gnu_dev_makedev (maj, min) #endif /* sys/sysmacros.h */ diff --git a/libc/sysdeps/unix/sysv/linux/x86/bits/environments.h b/libc/sysdeps/unix/sysv/linux/x86/bits/environments.h index 0fe1e3f5f..27b37b30e 100644 --- a/libc/sysdeps/unix/sysv/linux/x86/bits/environments.h +++ b/libc/sysdeps/unix/sysv/linux/x86/bits/environments.h @@ -64,15 +64,19 @@ #else /* __WORDSIZE == 32 */ -/* By default we have 32-bit wide `int', `long int', pointers and `off_t' - and all platforms support LFS. */ -# define _POSIX_V7_ILP32_OFF32 1 +/* We have 32-bit wide `int', `long int' and pointers and all platforms + support LFS. -mx32 has 64-bit wide `off_t'. */ # define _POSIX_V7_ILP32_OFFBIG 1 -# define _POSIX_V6_ILP32_OFF32 1 -# define _POSIX_V6_ILP32_OFFBIG 1 -# define _XBS5_ILP32_OFF32 1 +# define _POSIX_V6_ILP32_OFFBIG 1 # define _XBS5_ILP32_OFFBIG 1 +# ifndef __x86_64__ +/* -m32 has 32-bit wide `off_t'. */ +# define _POSIX_V7_ILP32_OFF32 1 +# define _POSIX_V6_ILP32_OFF32 1 +# define _XBS5_ILP32_OFF32 1 +# endif + /* We optionally provide an environment with the above size but an 64-bit side `off_t'. Therefore we don't define _POSIX_V7_ILP32_OFFBIG. */ @@ -89,8 +93,13 @@ #endif /* __WORDSIZE == 32 */ #define __ILP32_OFF32_CFLAGS "-m32" -#define __ILP32_OFFBIG_CFLAGS "-m32 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" #define __ILP32_OFF32_LDFLAGS "-m32" -#define __ILP32_OFFBIG_LDFLAGS "-m32" +#if defined __x86_64__ && defined __ILP32__ +# define __ILP32_OFFBIG_CFLAGS "-mx32" +# define __ILP32_OFFBIG_LDFLAGS "-mx32" +#else +# define __ILP32_OFFBIG_CFLAGS "-m32 -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64" +# define __ILP32_OFFBIG_LDFLAGS "-m32" +#endif #define __LP64_OFF64_CFLAGS "-m64" #define __LP64_OFF64_LDFLAGS "-m64" diff --git a/libc/sysdeps/unix/sysv/linux/x86/sys/procfs.h b/libc/sysdeps/unix/sysv/linux/x86/sys/procfs.h index dddbced57..ec318ad93 100644 --- a/libc/sysdeps/unix/sysv/linux/x86/sys/procfs.h +++ b/libc/sysdeps/unix/sysv/linux/x86/sys/procfs.h @@ -36,7 +36,7 @@ __BEGIN_DECLS /* Type for a general-purpose register. */ #ifdef __x86_64__ -typedef unsigned long long elf_greg_t; +__extension__ typedef unsigned long long elf_greg_t; #else typedef unsigned long elf_greg_t; #endif diff --git a/libc/sysdeps/x86/bits/byteswap.h b/libc/sysdeps/x86/bits/byteswap.h index 0f96ba302..9e2effc1a 100644 --- a/libc/sysdeps/x86/bits/byteswap.h +++ b/libc/sysdeps/x86/bits/byteswap.h @@ -134,7 +134,7 @@ __bswap_64 (__uint64_t __bsx) } \ __r.__ll; })) # endif -#elif __GLIBC_HAVE_LONG_LONG +#else # define __bswap_constant_64(x) \ ((((x) & 0xff00000000000000ull) >> 56) \ | (((x) & 0x00ff000000000000ull) >> 40) \ diff --git a/libc/sysdeps/x86/bits/setjmp.h b/libc/sysdeps/x86/bits/setjmp.h index 8a7e5dcc9..7c666e20d 100644 --- a/libc/sysdeps/x86/bits/setjmp.h +++ b/libc/sysdeps/x86/bits/setjmp.h @@ -30,7 +30,7 @@ # if __WORDSIZE == 64 typedef long int __jmp_buf[8]; # elif defined __x86_64__ -typedef long long int __jmp_buf[8]; +__extension__ typedef long long int __jmp_buf[8]; # else typedef int __jmp_buf[6]; # endif diff --git a/libc/sysdeps/x86/fpu/bits/mathinline.h b/libc/sysdeps/x86/fpu/bits/mathinline.h index 838c13cd8..fed64149f 100644 --- a/libc/sysdeps/x86/fpu/bits/mathinline.h +++ b/libc/sysdeps/x86/fpu/bits/mathinline.h @@ -198,6 +198,7 @@ __NTH (lrint (double __x)) } # endif # ifdef __x86_64__ +__extension__ __MATH_INLINE long long int __NTH (llrintf (float __x)) { @@ -209,6 +210,7 @@ __NTH (llrintf (float __x)) __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); return __res; } +__extension__ __MATH_INLINE long long int __NTH (llrint (double __x)) { @@ -896,16 +898,19 @@ __NTH (lrintl (long double __x)) ("fistpll %0" \ : "=m" (__llrintres) : "t" (__x) : "st"); \ return __llrintres +__extension__ __MATH_INLINE long long int __NTH (llrintf (float __x)) { __llrint_code; } +__extension__ __MATH_INLINE long long int __NTH (llrint (double __x)) { __llrint_code; } +__extension__ __MATH_INLINE long long int __NTH (llrintl (long double __x)) { diff --git a/libc/sysdeps/x86_64/dl-machine.h b/libc/sysdeps/x86_64/dl-machine.h index 660f1aa1e..4768c6954 100644 --- a/libc/sysdeps/x86_64/dl-machine.h +++ b/libc/sysdeps/x86_64/dl-machine.h @@ -286,6 +286,21 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, switch (r_type) { +# ifndef RTLD_BOOTSTRAP +# ifdef __ILP32__ + case R_X86_64_SIZE64: + /* Set to symbol size plus addend. */ + *(Elf64_Addr *) (uintptr_t) reloc_addr + = (Elf64_Addr) sym->st_size + reloc->r_addend; + break; + + case R_X86_64_SIZE32: +# else + case R_X86_64_SIZE64: +# endif + /* Set to symbol size plus addend. */ + value = sym->st_size; +# endif case R_X86_64_GLOB_DAT: case R_X86_64_JUMP_SLOT: *reloc_addr = value + reloc->r_addend; @@ -394,6 +409,11 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, relocation updates the whole 64-bit entry. */ *(Elf64_Addr *) reloc_addr = (Elf64_Addr) value + reloc->r_addend; break; +# ifndef __ILP32__ + case R_X86_64_SIZE32: + /* Set to symbol size plus addend. */ + value = sym->st_size; +# endif case R_X86_64_32: value += reloc->r_addend; *(unsigned int *) reloc_addr = value; diff --git a/libc/sysdeps/x86_64/fpu/libm-test-ulps b/libc/sysdeps/x86_64/fpu/libm-test-ulps index 95b6aec81..63c6aed2a 100644 --- a/libc/sysdeps/x86_64/fpu/libm-test-ulps +++ b/libc/sysdeps/x86_64/fpu/libm-test-ulps @@ -244,6 +244,12 @@ ifloat: 1 Test "Imaginary part of: cacos (-0 - 1.5 i) == pi/2 + 1.194763217287109304111930828519090523536 i": double: 1 idouble: 1 +Test "Real part of: cacos (-1.0 + 0x1p50 i) == 1.570796326794897507409741391764983781004 - 3.535050620855721078027883819436759661753e1 i": +float: 1 +ifloat: 1 +Test "Real part of: cacos (-1.0 - 0x1p50 i) == 1.570796326794897507409741391764983781004 + 3.535050620855721078027883819436759661753e1 i": +float: 1 +ifloat: 1 Test "Imaginary part of: cacos (-1.5 + +0 i) == pi - 0.9624236501192068949955178268487368462704 i": double: 1 float: 1 @@ -254,6 +260,9 @@ ldouble: 1 Test "Imaginary part of: cacos (-1.5 - 0 i) == pi + 0.9624236501192068949955178268487368462704 i": ildouble: 1 ldouble: 1 +Test "Real part of: cacos (-2 - 3 i) == 2.1414491111159960199416055713254211 + 1.9833870299165354323470769028940395 i": +float: 1 +ifloat: 1 Test "Real part of: cacos (0.5 + +0 i) == 1.047197551196597746154214461093167628066 - 0 i": double: 1 idouble: 1 @@ -272,6 +281,12 @@ float: 1 ifloat: 1 ildouble: 2 ldouble: 2 +Test "Imaginary part of: cacos (0x1.fp1023 + 0x1.fp1023 i) == 7.853981633974483096156608458198757210493e-1 - 7.107906849659093345062145442726115449315e2 i": +double: 1 +idouble: 1 +Test "Imaginary part of: cacos (0x1.fp127 + 0x1.fp127 i) == 7.853981633974483096156608458198757210493e-1 - 8.973081118419833726837456344608533993585e1 i": +double: 1 +idouble: 1 Test "Imaginary part of: cacos (1.5 + +0 i) == +0 - 0.9624236501192068949955178268487368462704 i": double: 1 float: 1 |