summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBert Tenjy <bert.tenjy@gmail.com>2019-03-07 06:36:48 +0000
committerTulio Magno Quites Machado Filho <tuliom@linux.ibm.com>2020-02-19 17:27:23 -0300
commitdbe5898b5f3b76c205fea9094afb7f1defd49b66 (patch)
treed189d797cb5f233fd227c679d429a6c3921509ab
parentc5e277ed112bda78d2e93bb7392d463d2b6ebf57 (diff)
downloadglibc-dbe5898b5f3b76c205fea9094afb7f1defd49b66.tar.gz
PPC64: Add libmvec SIMD double-precision sine function [BZ #24206]
Implements double-precision vector sine function. The polynomial sine-approximating algorithm is adapted for PPC64 from x86_64 [commit #4b9c2b707b]. The patch has been tested on PPC64/POWER8 Little Endian and Big Endian. Testing uses the framework created for libmvec on x86_64 which runs tests on issuing 'make check'. Tests of the new vector sine function all pass. Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
-rw-r--r--NEWS1
-rw-r--r--sysdeps/powerpc/fpu/libm-test-ulps3
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/Versions2
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile6
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c1
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sin2_vsx.c101
-rw-r--r--sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_trig_data.h44
-rw-r--r--sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist1
8 files changed, 140 insertions, 19 deletions
diff --git a/NEWS b/NEWS
index b7e4fe4eb7..d17067cba3 100644
--- a/NEWS
+++ b/NEWS
@@ -262,6 +262,7 @@ Major new features:
The following functions now have vector versions.
- double-precision cosine: cos
- single-precision cosine: cosf
+ - double-precision sine: sin
GCC support for auto-vectorization of functions on PPC64 is not yet
available. Until that is done, the new vector math functions are
diff --git a/sysdeps/powerpc/fpu/libm-test-ulps b/sysdeps/powerpc/fpu/libm-test-ulps
index 3bd9e67096..e72452e757 100644
--- a/sysdeps/powerpc/fpu/libm-test-ulps
+++ b/sysdeps/powerpc/fpu/libm-test-ulps
@@ -2570,6 +2570,9 @@ ifloat128: 3
ildouble: 5
ldouble: 5
+Function: "sin_vlen2":
+double: 2
+
Function: "sincos":
double: 1
float: 1
diff --git a/sysdeps/powerpc/powerpc64/fpu/Versions b/sysdeps/powerpc/powerpc64/fpu/Versions
index bdd4b657c4..4852a2791f 100644
--- a/sysdeps/powerpc/powerpc64/fpu/Versions
+++ b/sysdeps/powerpc/powerpc64/fpu/Versions
@@ -1,5 +1,5 @@
libmvec {
GLIBC_2.30 {
- _ZGVbN2v_cos; _ZGVbN4v_cosf;
+ _ZGVbN2v_cos; _ZGVbN4v_cosf; _ZGVbN2v_sin;
}
}
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
index 0f43cf5e89..fe4cef9ce2 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/Makefile
@@ -1,7 +1,9 @@
ifeq ($(subdir),mathvec)
-libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx
+libmvec-sysdep_routines += vec_d_cos2_vsx vec_s_cosf4_vsx \
+ vec_d_sin2_vsx
CFLAGS-vec_d_cos2_vsx.c += -mabi=altivec -maltivec -mvsx
CFLAGS-vec_s_cosf4_vsx.c += -mabi=altivec -maltivec -mvsx
+CFLAGS-vec_d_sin2_vsx.c += -mabi=altivec -maltivec -mvsx
endif
# Variables for libmvec tests.
@@ -9,7 +11,7 @@ ifeq ($(subdir),math)
ifeq ($(build-mathvec),yes)
libmvec-tests += double-vlen2 float-vlen4
-double-vlen2-funcs = cos
+double-vlen2-funcs = cos sin
float-vlen4-funcs = cos
double-vlen2-arch-ext-cflags = -mabi=altivec -maltivec -mvsx -DREQUIRE_VSX
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c
index 17e2cc0724..10a1ec281b 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/test-double-vlen2-wrappers.c
@@ -22,3 +22,4 @@
#define VEC_TYPE vector double
VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
+VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin)
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sin2_vsx.c b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sin2_vsx.c
new file mode 100644
index 0000000000..d6c1e25673
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_sin2_vsx.c
@@ -0,0 +1,101 @@
+/* Function sin vectorized with VSX.
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <math.h>
+#include "vec_d_trig_data.h"
+
+/* ALGORITHM DESCRIPTION:
+
+ ( low accuracy ( < 4ulp ) or enhanced performance
+ ( half of correct mantissa ) implementation )
+
+ Argument representation:
+ arg = N*Pi + R
+
+ Result calculation:
+ sin(arg) = sin(N*Pi + R) = (-1)^N * sin(R)
+ sin(R) is approximated by corresponding polynomial. */
+
+vector double
+_ZGVbN2v_sin (vector double x)
+{
+
+ /* ARGUMENT RANGE REDUCTION: X' = |X|. */
+ vector double abs_x_prime = vec_abs (x);
+
+ /* Y = X'*InvPi + RS : right shifter add. */
+ vector double y = (abs_x_prime * __d_inv_pi) + __d_rshifter;
+
+ /* N = Y - RS : right shifter sub. */
+ vector double n = y - __d_rshifter;
+
+ /* SignRes = Y<<63 : shift LSB to MSB place for result sign. */
+ vector double sign_res = (vector double) vec_sl ((vector long long) y,
+ (vector unsigned long long)
+ vec_splats (63));
+
+ /* Check for large arguments path. */
+ vector bool long long large_in = vec_cmpgt (abs_x_prime, __d_rangeval);
+
+ /* R = X' - N*Pi1. */
+ vector double r = abs_x_prime - (n * __d_pi1_fma);
+
+ /* R = R - N*Pi2. */
+ r = r - (n * __d_pi2_fma);
+
+ /* R = R - N*Pi3. */
+ r = r - (n * __d_pi3_fma);
+
+ /* POLYNOMIAL APPROXIMATION: R2 = R*R. */
+ vector double r2 = r * r;
+
+ /* R = R^SignRes : update sign of reduced argument. */
+ vector double r_sign
+ = (vector double) ((vector long long) r ^ (vector long long) sign_res);
+
+ /* Poly = C3+R2*(C4+R2*(C5+R2*(C6+R2*C7))). */
+ vector double poly = r2 * __d_coeff7_sin + __d_coeff6_sin;
+ poly = poly * r2 + __d_coeff5_sin;
+ poly = poly * r2 + __d_coeff4_sin;
+ poly = poly * r2 + __d_coeff3_sin;
+
+ /* Poly = R2*(C1+R2*(C2+R2*Poly)). */
+ poly = poly * r2 + __d_coeff2_sin;
+ poly = poly * r2 + __d_coeff1_sin;
+ poly = poly * r2;
+
+ /* Poly = Poly*R + R. */
+ poly = poly * r_sign + r_sign;
+
+ /* SignX: -ve sign bit of X. */
+ vector double neg_sign
+ = (vector double) vec_andc ((vector bool long long) x, __d_abs_mask);
+
+ /* RECONSTRUCTION: Final sign setting: Res = Poly^SignX. */
+ vector double out
+ = (vector double) ((vector long long) poly ^ (vector long long) neg_sign);
+
+ if (large_in[0] != 0)
+ out[0] = sin (x[0]);
+
+ if (large_in[1] != 0)
+ out[1] = sin (x[1]);
+
+ return out;
+
+} /* _ZGVbN2v_sin function. */
diff --git a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_trig_data.h b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_trig_data.h
index ecd47d258f..bc823ef19c 100644
--- a/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_trig_data.h
+++ b/sysdeps/powerpc/powerpc64/fpu/multiarch/vec_d_trig_data.h
@@ -23,38 +23,50 @@
#include <altivec.h>
/* PI/2. */
-const vector double __d_half_pi = {0x1.921fb54442d18p+0, 0x1.921fb54442d18p+0};
+static const vector double __d_half_pi = {0x1.921fb54442d18p+0, 0x1.921fb54442d18p+0};
/* Inverse PI. */
-const vector double __d_inv_pi = {0x1.45f306dc9c883p-2, 0x1.45f306dc9c883p-2};
+static const vector double __d_inv_pi = {0x1.45f306dc9c883p-2, 0x1.45f306dc9c883p-2};
/* Right-shifter constant. */
-const vector double __d_rshifter = {0x1.8p+52, 0x1.8p+52};
+static const vector double __d_rshifter = {0x1.8p+52, 0x1.8p+52};
/* Working range threshold. */
-const vector double __d_rangeval = {0x1p+23, 0x1p+23};
+static const vector double __d_rangeval = {0x1p+23, 0x1p+23};
/* One-half. */
-const vector double __d_one_half = {0x1p-1, 0x1p-1};
+static const vector double __d_one_half = {0x1p-1, 0x1p-1};
/* Range reduction PI-based constants if FMA available:
PI high part (FMA available). */
-const vector double __d_pi1_fma = {0x1.921fb54442d18p+1, 0x1.921fb54442d18p+1};
+static const vector double __d_pi1_fma = {0x1.921fb54442d18p+1, 0x1.921fb54442d18p+1};
/* PI mid part (FMA available). */
-const vector double __d_pi2_fma = {0x1.1a62633145c06p-53, 0x1.1a62633145c06p-53};
+static const vector double __d_pi2_fma = {0x1.1a62633145c06p-53, 0x1.1a62633145c06p-53};
/* PI low part (FMA available). */
-const vector double __d_pi3_fma
+static const vector double __d_pi3_fma
= {0x1.c1cd129024e09p-106,0x1.c1cd129024e09p-106};
-/* Polynomial coefficients (relative error 2^(-52.115)). */
-const vector double __d_coeff7 = {-0x1.9f0d60811aac8p-41,-0x1.9f0d60811aac8p-41};
-const vector double __d_coeff6 = {0x1.60e6857a2f22p-33,0x1.60e6857a2f22p-33};
-const vector double __d_coeff5 = {-0x1.ae63546002231p-26,-0x1.ae63546002231p-26};
-const vector double __d_coeff4 = {0x1.71de38030feap-19,0x1.71de38030feap-19};
-const vector double __d_coeff3 = {-0x1.a01a019a5b86dp-13,-0x1.a01a019a5b86dp-13};
-const vector double __d_coeff2 = {0x1.111111110a4a8p-7,0x1.111111110a4a8p-7};
-const vector double __d_coeff1 = {-0x1.55555555554a7p-3,-0x1.55555555554a7p-3};
+/* Polynomial coefficients for cosine (relative error 2^(-52.115)). */
+static const vector double __d_coeff7 = {-0x1.9f0d60811aac8p-41,-0x1.9f0d60811aac8p-41};
+static const vector double __d_coeff6 = {0x1.60e6857a2f22p-33,0x1.60e6857a2f22p-33};
+static const vector double __d_coeff5 = {-0x1.ae63546002231p-26,-0x1.ae63546002231p-26};
+static const vector double __d_coeff4 = {0x1.71de38030feap-19,0x1.71de38030feap-19};
+static const vector double __d_coeff3 = {-0x1.a01a019a5b86dp-13,-0x1.a01a019a5b86dp-13};
+static const vector double __d_coeff2 = {0x1.111111110a4a8p-7,0x1.111111110a4a8p-7};
+static const vector double __d_coeff1 = {-0x1.55555555554a7p-3,-0x1.55555555554a7p-3};
+
+/* Absolute value mask. */
+static const vector bool long long __d_abs_mask = { 0x7fffffffffffffff, 0x7fffffffffffffff };
+
+/* Polynomial coefficients for sine (relative error 2^(-52.115)). */
+static const vector double __d_coeff7_sin = { -0x1.9f1517e9f65fp-41, -0x1.9f1517e9f65fp-41 };
+static const vector double __d_coeff6_sin = { 0x1.60e6bee01d83ep-33, 0x1.60e6bee01d83ep-33 };
+static const vector double __d_coeff5_sin = { -0x1.ae6355aaa4a53p-26, -0x1.ae6355aaa4a53p-26 };
+static const vector double __d_coeff4_sin = { 0x1.71de3806add1ap-19, 0x1.71de3806add1ap-19 };
+static const vector double __d_coeff3_sin = { -0x1.a01a019a659ddp-13, -0x1.a01a019a659ddp-13 };
+static const vector double __d_coeff2_sin = { 0x1.111111110a573p-7, 0x1.111111110a573p-7 };
+static const vector double __d_coeff1_sin = { -0x1.55555555554a8p-3, -0x1.55555555554a8p-3 };
#endif /* D_TRIG_DATA_H. */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
index 8eef5e1e72..a1a7f69d4c 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/libmvec.abilist
@@ -1,2 +1,3 @@
GLIBC_2.30 _ZGVbN2v_cos F
+GLIBC_2.30 _ZGVbN2v_sin F
GLIBC_2.30 _ZGVbN4v_cosf F