diff options
author | Ulrich Drepper <drepper@gmail.com> | 2011-10-24 20:19:17 -0400 |
---|---|---|
committer | Ulrich Drepper <drepper@gmail.com> | 2011-10-24 20:19:17 -0400 |
commit | af968f62f24c5c0ef4e7e5ab41acae946908c112 (patch) | |
tree | e1e0570eeb00c434cc751cbadfbeae150eeea11a /sysdeps/x86_64/fpu/multiarch | |
parent | 58985aa92f57ff46e96b32388ce65e7fdd8c8b9e (diff) | |
download | glibc-af968f62f24c5c0ef4e7e5ab41acae946908c112.tar.gz |
Optimize accurate 64-bit routines for FMA4 on x86-64
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch')
31 files changed, 328 insertions, 0 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index bd07e98e21..70cb740aac 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -1,4 +1,36 @@ ifeq ($(subdir),math) libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \ s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c + +ifeq ($(have-mfma4),yes) +libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \ + e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \ + mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \ + sincos32-fma4 doasin-fma4 dosincos-fma4 \ + brandred-fma4 halfulp-fma4 mpexp-fma4 \ + mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4 + +CFLAGS-brandred-fma4.c = -mfma4 +CFLAGS-doasin-fma4.c = -mfma4 +CFLAGS-dosincos-fma4.c = -mfma4 +CFLAGS-e_asin-fma4.c = -mfma4 +CFLAGS-e_atan2-fma4.c = -mfma4 +CFLAGS-e_exp-fma4.c = -mfma4 +CFLAGS-e_log-fma4.c = -mfma4 +CFLAGS-e_pow-fma4.c = -mfma4 +CFLAGS-halfulp-fma4.c = -mfma4 +CFLAGS-mpa-fma4.c = -mfma4 +CFLAGS-mpatan-fma4.c = -mfma4 +CFLAGS-mpatan2-fma4.c = -mfma4 +CFLAGS-mpexp-fma4.c = -mfma4 +CFLAGS-mplog-fma4.c = -mfma4 +CFLAGS-mpsqrt-fma4.c = -mfma4 +CFLAGS-mptan-fma4.c = -mfma4 +CFLAGS-s_atan-fma4.c = -mfma4 +CFLAGS-sincos32-fma4.c = -mfma4 +CFLAGS-slowexp-fma4.c = -mfma4 +CFLAGS-slowpow-fma4.c = -mfma4 +CLFAGS-s_sin-fma4.c = -mfma4 +CLFAGS-s_tan-fma4.c = -mfma4 +endif endif diff --git a/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c b/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c new file mode 100644 index 0000000000..93fb5a11a2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/brandred-fma4.c @@ -0,0 +1,3 @@ +#define __branred __branred_fma4 + +#include <sysdeps/ieee754/dbl-64/branred.c> diff --git a/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c b/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c new file mode 100644 index 0000000000..d7ba67e60b --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/doasin-fma4.c @@ -0,0 +1,3 @@ +#define __doasin __doasin_fma4 + +#include <sysdeps/ieee754/dbl-64/doasin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c b/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c new file mode 100644 index 0000000000..02b420bbd5 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/dosincos-fma4.c @@ -0,0 +1,5 @@ +#define __docos __docos_fma4 +#define __dubcos __dubcos_fma4 +#define __dubsin __dubsin_fma4 + +#include <sysdeps/ieee754/dbl-64/dosincos.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c new file mode 100644 index 0000000000..938bc84187 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_asin-fma4.c @@ -0,0 +1,10 @@ +#define __ieee754_acos __ieee754_acos_fma4 +#define __ieee754_asin __ieee754_asin_fma4 +#define __cos32 __cos32_fma4 +#define __doasin __doasin_fma4 +#define __docos __docos_fma4 +#define __dubcos __dubcos_fma4 +#define __dubsin __dubsin_fma4 +#define __sin32 __sin32_fma4 + +#include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c new file mode 100644 index 0000000000..8882cead9d --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c @@ -0,0 +1,23 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math_private.h> + +extern double __ieee754_acos_sse2 (double); +extern double __ieee754_acos_fma4 (double); +extern double __ieee754_asin_sse2 (double); +extern double __ieee754_asin_fma4 (double); + +libm_ifunc (__ieee754_acos, + HAS_FMA4 ? __ieee754_acos_fma4 : __ieee754_acos_sse2); +strong_alias (__ieee754_acos, __acos_finite) + +libm_ifunc (__ieee754_asin, + HAS_FMA4 ? __ieee754_asin_fma4 : __ieee754_asin_sse2); +strong_alias (__ieee754_asin, __asin_finite) + +# define __ieee754_acos __ieee754_acos_sse2 +# define __ieee754_asin __ieee754_asin_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/e_asin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c new file mode 100644 index 0000000000..84a6f86349 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma4.c @@ -0,0 +1,9 @@ +#define __ieee754_atan2 __ieee754_atan2_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __dvd __dvd_fma4 +#define __mpatan2 __mpatan2_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/e_atan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c new file mode 100644 index 0000000000..12fc929068 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c @@ -0,0 +1,16 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math_private.h> + +extern double __ieee754_atan2_sse2 (double, double); +extern double __ieee754_atan2_fma4 (double, double); + +libm_ifunc (__ieee754_atan2, + HAS_FMA4 ? __ieee754_atan2_fma4 : __ieee754_atan2_sse2); +strong_alias (__ieee754_atan2, __atan2_finite) + +# define __ieee754_atan2 __ieee754_atan2_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/e_atan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c new file mode 100644 index 0000000000..942dfffd78 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c @@ -0,0 +1,5 @@ +#define __ieee754_exp __ieee754_exp_fma4 +#define __exp1 __exp1_fma4 +#define __slowexp __slowexp_fma4 + +#include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c new file mode 100644 index 0000000000..fc1096b54c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c @@ -0,0 +1,15 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math_private.h> + +extern double __ieee754_exp_sse2 (double); +extern double __ieee754_exp_fma4 (double); + +libm_ifunc (__ieee754_exp, HAS_FMA4 ? __ieee754_exp_fma4 : __ieee754_exp_sse2); +strong_alias (__ieee754_exp, __exp_finite) + +# define __ieee754_exp __ieee754_exp_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/e_exp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c new file mode 100644 index 0000000000..0be66d160c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c @@ -0,0 +1,7 @@ +#define __ieee754_log __ieee754_log_fma4 +#define __mplog __mplog_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c new file mode 100644 index 0000000000..c54264609c --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c @@ -0,0 +1,15 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math_private.h> + +extern double __ieee754_log_sse2 (double); +extern double __ieee754_log_fma4 (double); + +libm_ifunc (__ieee754_log, HAS_FMA4 ? __ieee754_log_fma4 : __ieee754_log_sse2); +strong_alias (__ieee754_log, __log_finite) + +# define __ieee754_log __ieee754_log_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/e_log.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c new file mode 100644 index 0000000000..20313be389 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c @@ -0,0 +1,5 @@ +#define __ieee754_pow __ieee754_pow_fma4 +#define __exp1 __exp1_fma4 +#define __slowpow __slowpow_fma4 + +#include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c new file mode 100644 index 0000000000..a740b6c447 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c @@ -0,0 +1,15 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math_private.h> + +extern double __ieee754_pow_sse2 (double, double); +extern double __ieee754_pow_fma4 (double, double); + +libm_ifunc (__ieee754_pow, HAS_FMA4 ? __ieee754_pow_fma4 : __ieee754_pow_sse2); +strong_alias (__ieee754_pow, __pow_finite) + +# define __ieee754_pow __ieee754_pow_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c new file mode 100644 index 0000000000..3fc223e613 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c @@ -0,0 +1,3 @@ +#define __halfulp __halfulp_fma4 + +#include <sysdeps/ieee754/dbl-64/halfulp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c new file mode 100644 index 0000000000..7b9e2ef8d1 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma4.c @@ -0,0 +1,10 @@ +#define __add __add_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __dvd __dvd_fma4 + +#define NO___CPY 1 +#define NO___MP_DBL 1 + +#include <sysdeps/ieee754/dbl-64/mpa.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c new file mode 100644 index 0000000000..942974b26e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma4.c @@ -0,0 +1,8 @@ +#define __mpatan __mpatan_fma4 +#define __add __add_fma4 +#define __dvd __dvd_fma4 +#define __mpsqrt __mpsqrt_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/mpatan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c new file mode 100644 index 0000000000..e7c469e4ca --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma4.c @@ -0,0 +1,8 @@ +#define __mpatan2 __mpatan2_fma4 +#define __add __add_fma4 +#define __dvd __dvd_fma4 +#define __mpatan __mpatan_fma4 +#define __mpsqrt __mpsqrt_fma4 +#define __mul __mul_fma4 + +#include <sysdeps/ieee754/dbl-64/mpatan2.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c new file mode 100644 index 0000000000..021970c670 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c @@ -0,0 +1,7 @@ +#define __mpexp __mpexp_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __dvd __dvd_fma4 +#define __mul __mul_fma4 + +#include <sysdeps/ieee754/dbl-64/mpexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c new file mode 100644 index 0000000000..9581eaf5c8 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c @@ -0,0 +1,7 @@ +#define __mplog __mplog_fma4 +#define __add __add_fma4 +#define __mpexp __mpexp_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/mplog.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c new file mode 100644 index 0000000000..43b6493db2 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma4.c @@ -0,0 +1,6 @@ +#define __mpsqrt __mpsqrt_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/mpsqrt.c> diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c new file mode 100644 index 0000000000..767924edf0 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma4.c @@ -0,0 +1,6 @@ +#define __mptan __mptan_fma4 +#define __c32 __c32_fma4 +#define __dvd __dvd_fma4 +#define __mpranred __mpranred_fma4 + +#include <sysdeps/ieee754/dbl-64/mptan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c new file mode 100644 index 0000000000..a8f0977649 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c @@ -0,0 +1,8 @@ +#define atan __atan_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __mpatan __mpatan_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/s_atan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c new file mode 100644 index 0000000000..ffc4a56fa8 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c @@ -0,0 +1,14 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math.h> + +extern double __atan_sse2 (double); +extern double __atan_fma4 (double); + +libm_ifunc (atan, HAS_FMA4 ? __atan_fma4 : __atan_sse2); + +# define atan __atan_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/s_atan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c new file mode 100644 index 0000000000..97cef8bd42 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sin-fma4.c @@ -0,0 +1,11 @@ +#define __cos __cos_fma4 +#define __sin __sin_fma4 +#define __branred __branred_fma4 +#define __docos __docos_fma4 +#define __dubsin __dubsin_fma4 +#define __mpcos __mpcos_fma4 +#define __mpcos1 __mpcos1_fma4 +#define __mpsin __mpsin_fma4 +#define __mpsin1 __mpsin1_fma4 + +#include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c new file mode 100644 index 0000000000..a7c35dc858 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c @@ -0,0 +1,22 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math.h> +# undef NAN + +extern double __cos_sse2 (double); +extern double __cos_fma4 (double); +extern double __sin_sse2 (double); +extern double __sin_fma4 (double); + +libm_ifunc (__cos, HAS_FMA4 ? __cos_fma4 : __cos_sse2); +weak_alias (__cos, cos) + +libm_ifunc (__sin, HAS_FMA4 ? __sin_fma4 : __sin_sse2); +weak_alias (__sin, sin) + +# define __cos __cos_sse2 +# define __sin __sin_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/s_sin.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c new file mode 100644 index 0000000000..c3cefc2e66 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c @@ -0,0 +1,9 @@ +#define tan __tan_fma4 +#define __branred __branred_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __mpranred __mpranred_fma4 +#define __mptan __mptan_fma4 +#define __sub __sub_fma4 + + +#include <sysdeps/ieee754/dbl-64/s_tan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c new file mode 100644 index 0000000000..cca02b54de --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c @@ -0,0 +1,14 @@ +#ifdef HAVE_FMA4_SUPPORT +# include <init-arch.h> +# include <math.h> + +extern double __tan_sse2 (double); +extern double __tan_fma4 (double); + +libm_ifunc (tan, HAS_FMA4 ? __tan_fma4 : __tan_sse2); + +# define tan __tan_sse2 +#endif + + +#include <sysdeps/ieee754/dbl-64/s_tan.c> diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c new file mode 100644 index 0000000000..f0d2d27575 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma4.c @@ -0,0 +1,14 @@ +#define __cos32 __cos32_fma4 +#define __sin32 __sin32_fma4 +#define __c32 __c32_fma4 +#define __mpsin __mpsin_fma4 +#define __mpsin1 __mpsin1_fma4 +#define __mpcos __mpcos_fma4 +#define __mpcos1 __mpcos1_fma4 +#define __mpranred __mpranred_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/sincos32.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c new file mode 100644 index 0000000000..83cb359901 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c @@ -0,0 +1,8 @@ +#define __slowexp __slowexp_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __mpexp __mpexp_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 + +#include <sysdeps/ieee754/dbl-64/slowexp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c new file mode 100644 index 0000000000..744f3f6e53 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c @@ -0,0 +1,10 @@ +#define __slowpow __slowpow_fma4 +#define __add __add_fma4 +#define __dbl_mp __dbl_mp_fma4 +#define __mpexp __mpexp_fma4 +#define __mplog __mplog_fma4 +#define __mul __mul_fma4 +#define __sub __sub_fma4 +#define __halfulp __halfulp_fma4 + +#include <sysdeps/ieee754/dbl-64/slowpow.c> |