summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/fpu
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-12 11:27:51 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-12 11:27:51 -0400
commit0ac5ae2335292908f39031b1ea9fe8edce433c0f (patch)
treef9d26c8abc0de39d18d4c13e70f6022cdc6b461f /sysdeps/x86_64/fpu
parenta843a204a3e8a0dd53584dad3668771abaec84ac (diff)
downloadglibc-0ac5ae2335292908f39031b1ea9fe8edce433c0f.tar.gz
Optimize libm
libm is now somewhat integrated with gcc's -ffinite-math-only option and lots of the wrapper functions have been optimized.
Diffstat (limited to 'sysdeps/x86_64/fpu')
-rw-r--r--sysdeps/x86_64/fpu/bits/fenv.h35
-rw-r--r--sysdeps/x86_64/fpu/bits/mathinline.h3
-rw-r--r--sysdeps/x86_64/fpu/e_exp2l.S1
-rw-r--r--sysdeps/x86_64/fpu/e_fmodl.S1
-rw-r--r--sysdeps/x86_64/fpu/e_log10l.S27
-rw-r--r--sysdeps/x86_64/fpu/e_log2l.S25
-rw-r--r--sysdeps/x86_64/fpu/e_logl.S27
-rw-r--r--sysdeps/x86_64/fpu/e_powl.S34
-rw-r--r--sysdeps/x86_64/fpu/e_remainderl.S1
-rw-r--r--sysdeps/x86_64/fpu/e_scalbl.S8
-rw-r--r--sysdeps/x86_64/fpu/e_sqrt.c6
-rw-r--r--sysdeps/x86_64/fpu/e_sqrtf.c6
-rw-r--r--sysdeps/x86_64/fpu/math_private.h25
13 files changed, 152 insertions, 47 deletions
diff --git a/sysdeps/x86_64/fpu/bits/fenv.h b/sysdeps/x86_64/fpu/bits/fenv.h
index 11859f00c8..be2518dac3 100644
--- a/sysdeps/x86_64/fpu/bits/fenv.h
+++ b/sysdeps/x86_64/fpu/bits/fenv.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997,1998,1999,2000,2001,2004 Free Software Foundation, Inc.
+/* Copyright (C) 1997-2001,2004,2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -95,3 +95,36 @@ fenv_t;
/* Floating-point environment where none of the exception is masked. */
# define FE_NOMASK_ENV ((__const fenv_t *) -2)
#endif
+
+
+#ifdef __OPTIMIZE__
+/* Optimized versions. */
+extern int __feraiseexcept_renamed (int) __asm__ ("feraiseexcept");
+__extern_inline int feraiseexcept (int __excepts)
+{
+ if (__builtin_constant_p (__excepts)
+ && (__excepts & ~(FE_INVALID | FE_DIVBYZERO)) == 0)
+ {
+ if ((FE_INVALID & __excepts) != 0)
+ {
+ /* One example of a invalid operation is 0.0 / 0.0. */
+ float __f = 0.0;
+
+ __asm__ __volatile__ ("divss %0, %0 " : : "x" (__f));
+ (void) &__f;
+ }
+ if ((FE_DIVBYZERO & __excepts) != 0)
+ {
+ float f = 1.0;
+ float g = 0.0;
+
+ __asm__ __volatile__ ("divss %1, %0" : : "x" (f), "x" (g));
+ (void) &f;
+ }
+
+ return 0;
+ }
+
+ return __feraiseexcept_renamed (__excepts);
+}
+#endif
diff --git a/sysdeps/x86_64/fpu/bits/mathinline.h b/sysdeps/x86_64/fpu/bits/mathinline.h
index 780f8786dd..5bdf47ecbf 100644
--- a/sysdeps/x86_64/fpu/bits/mathinline.h
+++ b/sysdeps/x86_64/fpu/bits/mathinline.h
@@ -103,7 +103,8 @@ __NTH (llrint (double __x))
}
# endif
-# if __FINITE_MATH_ONLY__ == 1 && (__WORDSIZE == 64 || defined __SSE2_MATH__)
+# if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \
+ && (__WORDSIZE == 64 || defined __SSE2_MATH__)
/* Determine maximum of two values. */
__MATH_INLINE float
__NTH (fmaxf (float __x, float __y))
diff --git a/sysdeps/x86_64/fpu/e_exp2l.S b/sysdeps/x86_64/fpu/e_exp2l.S
index 336b989098..7abf425c7b 100644
--- a/sysdeps/x86_64/fpu/e_exp2l.S
+++ b/sysdeps/x86_64/fpu/e_exp2l.S
@@ -36,3 +36,4 @@ ENTRY(__ieee754_exp2l)
fldz /* Set result to 0. */
2: ret
END (__ieee754_exp2l)
+strong_alias (__ieee754_exp2l, __exp2l_finite)
diff --git a/sysdeps/x86_64/fpu/e_fmodl.S b/sysdeps/x86_64/fpu/e_fmodl.S
index 2967bf224b..07c50df8d1 100644
--- a/sysdeps/x86_64/fpu/e_fmodl.S
+++ b/sysdeps/x86_64/fpu/e_fmodl.S
@@ -20,3 +20,4 @@ ENTRY(__ieee754_fmodl)
fstp %st(1)
ret
END (__ieee754_fmodl)
+strong_alias (__ieee754_fmodl, __fmodl_finite)
diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S
index 633234b744..50c58757af 100644
--- a/sysdeps/x86_64/fpu/e_log10l.S
+++ b/sysdeps/x86_64/fpu/e_log10l.S
@@ -10,14 +10,12 @@
#include <machine/asm.h>
-RCSID("$NetBSD: $")
-
#ifdef __ELF__
- .section .rodata
+ .section .rodata.cst8,"aM",@progbits,8
#else
.text
#endif
- .align ALIGNARG(4)
+ .p2align 3
ASM_TYPE_DIRECTIVE(one,@object)
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
@@ -30,9 +28,9 @@ limit: .double 0.29
#ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
#else
-#define MO(op) op
+# define MO(op) op
#endif
.text
@@ -65,3 +63,20 @@ ENTRY(__ieee754_log10l)
fstp %st(1)
ret
END(__ieee754_log10l)
+
+
+ENTRY(__log10l_finite)
+ fldlg2 // log10(2)
+ fldt 8(%rsp) // x : log10(2)
+ fld %st // x : x : log10(2)
+4: fsubl MO(one) // x-1 : x : log10(2)
+ fld %st // x-1 : x-1 : x : log10(2)
+ fabs // |x-1| : x-1 : x : log10(2)
+ fcompl MO(limit) // x-1 : x : log10(2)
+ fnstsw // x-1 : x : log10(2)
+ andb $0x45, %ah
+ jz 2b
+ fstp %st(1) // x-1 : log10(2)
+ fyl2xp1 // log10(x)
+ ret
+END(__log10l_finite)
diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S
index f04d30a05a..78dc2d5c0e 100644
--- a/sysdeps/x86_64/fpu/e_log2l.S
+++ b/sysdeps/x86_64/fpu/e_log2l.S
@@ -10,11 +10,11 @@
#include <machine/asm.h>
#ifdef __ELF__
- .section .rodata
+ .section .rodata.cst8,"aM",@progbits,8
#else
.text
#endif
- .align ALIGNARG(4)
+ .p2align 3
ASM_TYPE_DIRECTIVE(one,@object)
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
@@ -27,9 +27,9 @@ limit: .double 0.29
#ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
#else
-#define MO(op) op
+# define MO(op) op
#endif
.text
@@ -62,3 +62,20 @@ ENTRY(__ieee754_log2l)
fstp %st(1)
ret
END (__ieee754_log2l)
+
+
+ENTRY(__log2l_finite)
+ fldl MO(one)
+ fldt 8(%rsp) // x : 1
+ fld %st // x : x : 1
+ fsub %st(2), %st // x-1 : x : 1
+ fld %st // x-1 : x-1 : x : 1
+ fabs // |x-1| : x-1 : x : 1
+ fcompl MO(limit) // x-1 : x : 1
+ fnstsw // x-1 : x : 1
+ andb $0x45, %ah
+ jz 2b
+ fstp %st(1) // x-1 : 1
+ fyl2xp1 // log(x)
+ ret
+END (__log2l_finite)
diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S
index 2ba91eedfd..2503b9a013 100644
--- a/sysdeps/x86_64/fpu/e_logl.S
+++ b/sysdeps/x86_64/fpu/e_logl.S
@@ -8,15 +8,13 @@
#include <machine/asm.h>
-RCSID("$NetBSD: $")
-
#ifdef __ELF__
- .section .rodata
+ .section .rodata.cst8,"aM",@progbits,8
#else
.text
#endif
- .align ALIGNARG(4)
+ .p2align 3
ASM_TYPE_DIRECTIVE(one,@object)
one: .double 1.0
ASM_SIZE_DIRECTIVE(one)
@@ -29,9 +27,9 @@ limit: .double 0.29
#ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
#else
-#define MO(op) op
+# define MO(op) op
#endif
.text
@@ -64,3 +62,20 @@ ENTRY(__ieee754_logl)
fstp %st(1)
ret
END (__ieee754_logl)
+
+
+ENTRY(__logl_finite)
+ fldln2 // log(2)
+ fldt 8(%rsp) // x : log(2)
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2b
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END (__logl_finite)
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index a0b1b1df1e..a65c465ec7 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -22,12 +22,27 @@
#include <machine/asm.h>
#ifdef __ELF__
- .section .rodata
+ .section .rodata.cst8,"aM",@progbits,8
#else
.text
#endif
+ .p2align 3
+ ASM_TYPE_DIRECTIVE(one,@object)
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ ASM_TYPE_DIRECTIVE(limit,@object)
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
- .align ALIGNARG(4)
+#ifdef __ELF__
+ .section .rodata.cst16,"aM",@progbits,16
+#else
+ .text
+#endif
+ .p2align 3
ASM_TYPE_DIRECTIVE(infinity,@object)
inf_zero:
infinity:
@@ -43,21 +58,11 @@ minfinity:
mzero:
.byte 0, 0, 0, 0, 0, 0, 0, 0x80
ASM_SIZE_DIRECTIVE(minf_mzero)
- ASM_TYPE_DIRECTIVE(one,@object)
-one: .double 1.0
- ASM_SIZE_DIRECTIVE(one)
- ASM_TYPE_DIRECTIVE(limit,@object)
-limit: .double 0.29
- ASM_SIZE_DIRECTIVE(limit)
- ASM_TYPE_DIRECTIVE(p63,@object)
-p63:
- .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
- ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
#else
-#define MO(op) op
+# define MO(op) op
#endif
.text
@@ -339,3 +344,4 @@ ENTRY(__ieee754_powl)
ret
END(__ieee754_powl)
+strong_alias (__ieee754_powl, __powl_finite)
diff --git a/sysdeps/x86_64/fpu/e_remainderl.S b/sysdeps/x86_64/fpu/e_remainderl.S
index 480b1cad8b..4ee0910912 100644
--- a/sysdeps/x86_64/fpu/e_remainderl.S
+++ b/sysdeps/x86_64/fpu/e_remainderl.S
@@ -18,3 +18,4 @@ ENTRY(__ieee754_remainderl)
fstp %st(1)
ret
END (__ieee754_remainderl)
+strong_alias (__ieee754_remainderl, __remainderl_finite)
diff --git a/sysdeps/x86_64/fpu/e_scalbl.S b/sysdeps/x86_64/fpu/e_scalbl.S
index 6b229705ba..5833321a19 100644
--- a/sysdeps/x86_64/fpu/e_scalbl.S
+++ b/sysdeps/x86_64/fpu/e_scalbl.S
@@ -10,8 +10,6 @@
#include <machine/asm.h>
-RCSID("$NetBSD: $")
-
#ifdef __ELF__
.section .rodata
#else
@@ -23,16 +21,15 @@ RCSID("$NetBSD: $")
zero_nan:
.double 0.0
nan: .byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
-minus_zero:
.byte 0, 0, 0, 0, 0, 0, 0, 0x80
.byte 0, 0, 0, 0, 0, 0, 0xff, 0x7f
ASM_SIZE_DIRECTIVE(zero_nan)
#ifdef PIC
-#define MO(op) op##(%rip)
+# define MO(op) op##(%rip)
#else
-#define MO(op) op
+# define MO(op) op
#endif
.text
@@ -98,3 +95,4 @@ ENTRY(__ieee754_scalbl)
fdiv %st
ret
END(__ieee754_scalbl)
+strong_alias (__ieee754_scalbl, __scalbl_finite)
diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c
index d588a8b10d..99120993fe 100644
--- a/sysdeps/x86_64/fpu/e_sqrt.c
+++ b/sysdeps/x86_64/fpu/e_sqrt.c
@@ -1,5 +1,5 @@
/* Square root of floating point number.
- Copyright (C) 2002 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,12 +19,14 @@
#include <math_private.h>
+#undef __ieee754_sqrt
double
__ieee754_sqrt (double x)
{
double res;
- asm ("sqrtsd %0, %1" : "=x" (res) : "x" (x));
+ asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (x));
return res;
}
+strong_alias (__ieee754_sqrt, __sqrt_finite)
diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c
index f7801f05d2..dade4f59f4 100644
--- a/sysdeps/x86_64/fpu/e_sqrtf.c
+++ b/sysdeps/x86_64/fpu/e_sqrtf.c
@@ -1,5 +1,5 @@
/* Square root of floating point number.
- Copyright (C) 2002 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -19,12 +19,14 @@
#include <math_private.h>
+#undef __ieee754_sqrtf
float
__ieee754_sqrtf (float x)
{
float res;
- asm ("sqrtss %0, %1" : "=x" (res) : "x" (x));
+ asm ("sqrtss %1, %0" : "=x" (res) : "xm" (x));
return res;
}
+strong_alias (__ieee754_sqrtf, __sqrtf_finite)
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 37357d317a..523ec549ac 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -58,22 +58,35 @@ do { \
#endif
#define __isnan(d) \
- ({ long int __di; EXTRACT_WORDS64 (__di, d); \
+ ({ long int __di; EXTRACT_WORDS64 (__di, (double) d); \
(__di & 0x7fffffffffffffffl) > 0x7ff0000000000000l; })
#define __isnanf(d) \
- ({ int __di; GET_FLOAT_WORD (__di, d); \
+ ({ int __di; GET_FLOAT_WORD (__di, (float) d); \
(__di & 0x7fffffff) > 0x7f800000; })
#define __isinf_ns(d) \
- ({ long int __di; EXTRACT_WORDS64 (__di, d); \
+ ({ long int __di; EXTRACT_WORDS64 (__di, (double) d); \
(__di & 0x7fffffffffffffffl) == 0x7ff0000000000000l; })
#define __isinf_nsf(d) \
- ({ int __di; GET_FLOAT_WORD (__di, d); \
+ ({ int __di; GET_FLOAT_WORD (__di, (float) d); \
(__di & 0x7fffffff) == 0x7f800000; })
#define __finite(d) \
- ({ long int __di; EXTRACT_WORDS64 (__di, d); \
+ ({ long int __di; EXTRACT_WORDS64 (__di, (double) d); \
(__di & 0x7fffffffffffffffl) < 0x7ff0000000000000l; })
#define __finitef(d) \
- ({ int __di; GET_FLOAT_WORD (__di, d); \
+ ({ int __di; GET_FLOAT_WORD (__di, (float) d); \
(__di & 0x7fffffff) < 0x7f800000; })
+
+#define __ieee754_sqrt(d) \
+ ({ double __res; \
+ asm ("sqrtsd %1, %0" : "=x" (__res) : "xm" ((double) d)); \
+ __res; })
+#define __ieee754_sqrtf(d) \
+ ({ float __res; \
+ asm ("sqrtss %1, %0" : "=x" (__res) : "xm" ((float) d)); \
+ __res; })
+#define __ieee754_sqrtl(d) \
+ ({ long double __res; \
+ asm ("fsqrt" : "=t" (__res) : "0" ((long double) d)); \
+ __res; })