diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/setup.py | 3 | ||||
-rw-r--r-- | numpy/core/src/npymath/ieee754.c.src | 842 |
2 files changed, 844 insertions, 1 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 41f702bd1..10c1c3f92 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -766,7 +766,8 @@ def configuration(parent_package='',top_path=None): npymath_sources = [join('src', 'npymath', 'npy_math_internal.h.src'), join('src', 'npymath', 'npy_math.c'), - join('src', 'npymath', 'ieee754.cpp'), + # join('src', 'npymath', 'ieee754.cpp'), + join('src', 'npymath', 'ieee754.c.src'), join('src', 'npymath', 'npy_math_complex.c.src'), join('src', 'npymath', 'halffloat.c') ] diff --git a/numpy/core/src/npymath/ieee754.c.src b/numpy/core/src/npymath/ieee754.c.src new file mode 100644 index 000000000..4e6ddb712 --- /dev/null +++ b/numpy/core/src/npymath/ieee754.c.src @@ -0,0 +1,842 @@ +/* -*- c -*- */ +/* + * vim:syntax=c + * + * Low-level routines related to IEEE-754 format + */ +#include "npy_math_common.h" +#include "npy_math_private.h" +#include "numpy/utils.h" + +#ifndef HAVE_COPYSIGN +double npy_copysign(double x, double y) +{ + npy_uint32 hx, hy; + GET_HIGH_WORD(hx, x); + GET_HIGH_WORD(hy, y); + SET_HIGH_WORD(x, (hx & 0x7fffffff) | (hy & 0x80000000)); + return x; +} +#endif + +/* + The below code is provided for compilers which do not yet provide C11 compatibility (gcc 4.5 and older) + */ +#ifndef LDBL_TRUE_MIN +#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ +#endif + +#if !defined(HAVE_DECL_SIGNBIT) +#include "_signbit.c" + +int _npy_signbit_f(float x) +{ + return _npy_signbit_d((double) x); +} + +int _npy_signbit_ld(long double x) +{ + return _npy_signbit_d((double) x); +} +#endif + +/* + * FIXME: There is a lot of redundancy between _next* and npy_nextafter*. + * refactor this at some point + * + * p >= 0, returnx x + nulp + * p < 0, returnx x - nulp + */ +static double _next(double x, int p) +{ + volatile double t; + npy_int32 hx, hy, ix; + npy_uint32 lx; + + EXTRACT_WORDS(hx, lx, x); + ix = hx & 0x7fffffff; /* |x| */ + + if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0)) /* x is nan */ + return x; + if ((ix | lx) == 0) { /* x == 0 */ + if (p >= 0) { + INSERT_WORDS(x, 0x0, 1); /* return +minsubnormal */ + } else { + INSERT_WORDS(x, 0x80000000, 1); /* return -minsubnormal */ + } + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (p < 0) { /* x -= ulp */ + if (lx == 0) + hx -= 1; + lx -= 1; + } else { /* x += ulp */ + lx += 1; + if (lx == 0) + hx += 1; + } + hy = hx & 0x7ff00000; + if (hy >= 0x7ff00000) + return x + x; /* overflow */ + if (hy < 0x00100000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + INSERT_WORDS(x, hx, lx); + return x; + } + } + INSERT_WORDS(x, hx, lx); + return x; +} + +static float _nextf(float x, int p) +{ + volatile float t; + npy_int32 hx, hy, ix; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7fffffff; /* |x| */ + + if ((ix > 0x7f800000)) /* x is nan */ + return x; + if (ix == 0) { /* x == 0 */ + if (p >= 0) { + SET_FLOAT_WORD(x, 0x0 | 1); /* return +minsubnormal */ + } else { + SET_FLOAT_WORD(x, 0x80000000 | 1); /* return -minsubnormal */ + } + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (p < 0) { /* x -= ulp */ + hx -= 1; + } else { /* x += ulp */ + hx += 1; + } + hy = hx & 0x7f800000; + if (hy >= 0x7f800000) + return x + x; /* overflow */ + if (hy < 0x00800000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + SET_FLOAT_WORD(x, hx); + return x; + } + } + SET_FLOAT_WORD(x, hx); + return x; +} + +#if defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \ + defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE) + +/* + * FIXME: this is ugly and untested. The asm part only works with gcc, and we + * should consolidate the GET_LDOUBLE* / SET_LDOUBLE macros + */ +#define math_opt_barrier(x) \ + ({ __typeof (x) __x = x; __asm ("" : "+m" (__x)); __x; }) +#define math_force_eval(x) __asm __volatile ("" : : "m" (x)) + +/* only works for big endian */ +typedef union +{ + npy_longdouble value; + struct + { + npy_uint64 msw; + npy_uint64 lsw; + } parts64; + struct + { + npy_uint32 w0, w1, w2, w3; + } parts32; +} ieee854_long_double_shape_type; + +/* Get two 64 bit ints from a long double. */ + +#define GET_LDOUBLE_WORDS64(ix0,ix1,d) \ +do { \ + ieee854_long_double_shape_type qw_u; \ + qw_u.value = (d); \ + (ix0) = qw_u.parts64.msw; \ + (ix1) = qw_u.parts64.lsw; \ +} while (0) + +/* Set a long double from two 64 bit ints. */ + +#define SET_LDOUBLE_WORDS64(d,ix0,ix1) \ +do { \ + ieee854_long_double_shape_type qw_u; \ + qw_u.parts64.msw = (ix0); \ + qw_u.parts64.lsw = (ix1); \ + (d) = qw_u.value; \ +} while (0) + +static npy_longdouble _nextl(npy_longdouble x, int p) +{ + npy_int64 hx,ihx,ilx; + npy_uint64 lx; + npy_longdouble u; + + GET_LDOUBLE_WORDS64(hx, lx, x); + ihx = hx & 0x7fffffffffffffffLL; /* |hx| */ + ilx = lx & 0x7fffffffffffffffLL; /* |lx| */ + + if(((ihx & 0x7ff0000000000000LL)==0x7ff0000000000000LL)&& + ((ihx & 0x000fffffffffffffLL)!=0)) { + return x; /* signal the nan */ + } + if(ihx == 0 && ilx == 0) { /* x == 0 */ + SET_LDOUBLE_WORDS64(x, p, 0ULL);/* return +-minsubnormal */ + u = x * x; + if (u == x) { + return u; + } else { + return x; /* raise underflow flag */ + } + } + + if(p < 0) { /* p < 0, x -= ulp */ + if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL)) + return x+x; /* overflow, return -inf */ + if (hx >= 0x7ff0000000000000LL) { + SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL); + return u; + } + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ + u = math_opt_barrier (x); + x -= LDBL_TRUE_MIN; + if (ihx < 0x0360000000000000LL + || (hx > 0 && (npy_int64) lx <= 0) + || (hx < 0 && (npy_int64) lx > 1)) { + u = u * u; + math_force_eval (u); /* raise underflow flag */ + } + return x; + } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); + u *= 0x1.0000000000000p-105L; + } else + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); + return x - u; + } else { /* p >= 0, x += ulp */ + if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL)) + return x+x; /* overflow, return +inf */ + if ((npy_uint64) hx >= 0xfff0000000000000ULL) { + SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL); + return u; + } + if(ihx <= 0x0360000000000000LL) { /* x <= LDBL_MIN */ + u = math_opt_barrier (x); + x += LDBL_TRUE_MIN; + if (ihx < 0x0360000000000000LL + || (hx > 0 && (npy_int64) lx < 0 && lx != 0x8000000000000001LL) + || (hx < 0 && (npy_int64) lx >= 0)) { + u = u * u; + math_force_eval (u); /* raise underflow flag */ + } + if (x == 0.0L) /* handle negative LDBL_TRUE_MIN case */ + x = -0.0L; + return x; + } + if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */ + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL); + u *= 0x1.0000000000000p-105L; + } else + SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL); + return x + u; + } +} +#else +static npy_longdouble _nextl(npy_longdouble x, int p) +{ + volatile npy_longdouble t; + union IEEEl2bitsrep ux; + + ux.e = x; + + if ((GET_LDOUBLE_EXP(ux) == 0x7fff && + ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(ux)) != 0)) { + return ux.e; /* x is nan */ + } + if (ux.e == 0.0) { + SET_LDOUBLE_MANH(ux, 0); /* return +-minsubnormal */ + SET_LDOUBLE_MANL(ux, 1); + if (p >= 0) { + SET_LDOUBLE_SIGN(ux, 0); + } else { + SET_LDOUBLE_SIGN(ux, 1); + } + t = ux.e * ux.e; + if (t == ux.e) { + return t; + } else { + return ux.e; /* raise underflow flag */ + } + } + if (p < 0) { /* x -= ulp */ + if (GET_LDOUBLE_MANL(ux) == 0) { + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) - 1); + } + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) - 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + } + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) - 1); + } else { /* x += ulp */ + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) + 1); + if (GET_LDOUBLE_MANL(ux) == 0) { + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) + 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) + 1); + } + } + } + if (GET_LDOUBLE_EXP(ux) == 0x7fff) { + return ux.e + ux.e; /* overflow */ + } + if (GET_LDOUBLE_EXP(ux) == 0) { /* underflow */ + if (LDBL_NBIT) { + SET_LDOUBLE_MANH(ux, GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT); + } + t = ux.e * ux.e; + if (t != ux.e) { /* raise underflow flag */ + return ux.e; + } + } + + return ux.e; +} +#endif + +/* + * nextafter code taken from BSD math lib, the code contains the following + * notice: + * + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#ifndef HAVE_NEXTAFTER +double npy_nextafter(double x, double y) +{ + volatile double t; + npy_int32 hx, hy, ix, iy; + npy_uint32 lx, ly; + + EXTRACT_WORDS(hx, lx, x); + EXTRACT_WORDS(hy, ly, y); + ix = hx & 0x7fffffff; /* |x| */ + iy = hy & 0x7fffffff; /* |y| */ + + if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0) || /* x is nan */ + ((iy >= 0x7ff00000) && ((iy - 0x7ff00000) | ly) != 0)) /* y is nan */ + return x + y; + if (x == y) + return y; /* x=y, return y */ + if ((ix | lx) == 0) { /* x == 0 */ + INSERT_WORDS(x, hy & 0x80000000, 1); /* return +-minsubnormal */ + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (hx >= 0) { /* x > 0 */ + if (hx > hy || ((hx == hy) && (lx > ly))) { /* x > y, x -= ulp */ + if (lx == 0) + hx -= 1; + lx -= 1; + } else { /* x < y, x += ulp */ + lx += 1; + if (lx == 0) + hx += 1; + } + } else { /* x < 0 */ + if (hy >= 0 || hx > hy || ((hx == hy) && (lx > ly))) { /* x < y, x -= ulp */ + if (lx == 0) + hx -= 1; + lx -= 1; + } else { /* x > y, x += ulp */ + lx += 1; + if (lx == 0) + hx += 1; + } + } + hy = hx & 0x7ff00000; + if (hy >= 0x7ff00000) + return x + x; /* overflow */ + if (hy < 0x00100000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + INSERT_WORDS(y, hx, lx); + return y; + } + } + INSERT_WORDS(x, hx, lx); + return x; +} +#endif + +#ifndef HAVE_NEXTAFTERF +float npy_nextafterf(float x, float y) +{ + volatile float t; + npy_int32 hx, hy, ix, iy; + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + ix = hx & 0x7fffffff; /* |x| */ + iy = hy & 0x7fffffff; /* |y| */ + + if ((ix > 0x7f800000) || /* x is nan */ + (iy > 0x7f800000)) /* y is nan */ + return x + y; + if (x == y) + return y; /* x=y, return y */ + if (ix == 0) { /* x == 0 */ + SET_FLOAT_WORD(x, (hy & 0x80000000) | 1); /* return +-minsubnormal */ + t = x * x; + if (t == x) + return t; + else + return x; /* raise underflow flag */ + } + if (hx >= 0) { /* x > 0 */ + if (hx > hy) { /* x > y, x -= ulp */ + hx -= 1; + } else { /* x < y, x += ulp */ + hx += 1; + } + } else { /* x < 0 */ + if (hy >= 0 || hx > hy) { /* x < y, x -= ulp */ + hx -= 1; + } else { /* x > y, x += ulp */ + hx += 1; + } + } + hy = hx & 0x7f800000; + if (hy >= 0x7f800000) + return x + x; /* overflow */ + if (hy < 0x00800000) { /* underflow */ + t = x * x; + if (t != x) { /* raise underflow flag */ + SET_FLOAT_WORD(y, hx); + return y; + } + } + SET_FLOAT_WORD(x, hx); + return x; +} +#endif + +#ifndef HAVE_NEXTAFTERL +npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) +{ + volatile npy_longdouble t; + union IEEEl2bitsrep ux; + union IEEEl2bitsrep uy; + + ux.e = x; + uy.e = y; + + if ((GET_LDOUBLE_EXP(ux) == 0x7fff && + ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(ux)) != 0) || + (GET_LDOUBLE_EXP(uy) == 0x7fff && + ((GET_LDOUBLE_MANH(uy) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(uy)) != 0)) { + return ux.e + uy.e; /* x or y is nan */ + } + if (ux.e == uy.e) { + return uy.e; /* x=y, return y */ + } + if (ux.e == 0.0) { + SET_LDOUBLE_MANH(ux, 0); /* return +-minsubnormal */ + SET_LDOUBLE_MANL(ux, 1); + SET_LDOUBLE_SIGN(ux, GET_LDOUBLE_SIGN(uy)); + t = ux.e * ux.e; + if (t == ux.e) { + return t; + } else { + return ux.e; /* raise underflow flag */ + } + } + if ((ux.e > 0.0) ^ (ux.e < uy.e)) { /* x -= ulp */ + if (GET_LDOUBLE_MANL(ux) == 0) { + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) - 1); + } + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) - 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + } + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) - 1); + } else { /* x += ulp */ + SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) + 1); + if (GET_LDOUBLE_MANL(ux) == 0) { + SET_LDOUBLE_MANH(ux, + (GET_LDOUBLE_MANH(ux) + 1) | + (GET_LDOUBLE_MANH(ux) & LDBL_NBIT)); + if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) { + SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) + 1); + } + } + } + if (GET_LDOUBLE_EXP(ux) == 0x7fff) { + return ux.e + ux.e; /* overflow */ + } + if (GET_LDOUBLE_EXP(ux) == 0) { /* underflow */ + if (LDBL_NBIT) { + SET_LDOUBLE_MANH(ux, GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT); + } + t = ux.e * ux.e; + if (t != ux.e) { /* raise underflow flag */ + return ux.e; + } + } + + return ux.e; +} +#endif + +/**begin repeat + * #suff = f,,l# + * #SUFF = F,,L# + * #type = npy_float, npy_double, npy_longdouble# + */ +@type@ npy_spacing@suff@(@type@ x) +{ + /* XXX: npy isnan/isinf may be optimized by bit twiddling */ + if (npy_isinf(x)) { + return NPY_NAN@SUFF@; + } + + return _next@suff@(x, 1) - x; +} +/**end repeat**/ + +/* + * Decorate all the math functions which are available on the current platform + */ + +#ifdef HAVE_NEXTAFTERF +float npy_nextafterf(float x, float y) +{ + return nextafterf(x, y); +} +#endif + +#ifdef HAVE_NEXTAFTER +double npy_nextafter(double x, double y) +{ + return nextafter(x, y); +} +#endif + +#ifdef HAVE_NEXTAFTERL +npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y) +{ + return nextafterl(x, y); +} +#endif + +int npy_clear_floatstatus() { + char x=0; + return npy_clear_floatstatus_barrier(&x); +} +int npy_get_floatstatus() { + char x=0; + return npy_get_floatstatus_barrier(&x); +} + +/* + * Functions to set the floating point status word. + */ + +#if (defined(__unix__) || defined(unix)) && !defined(USG) +#include <sys/param.h> +#endif + + +/* + * Define floating point status functions. We must define + * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier, + * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid} + * for all supported platforms. + */ + + +/* Solaris --------------------------------------------------------*/ +/* --------ignoring SunOS ieee_flags approach, someone else can +** deal with that! */ +#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \ + (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \ + defined(__NetBSD__) +#include <ieeefp.h> + +int npy_get_floatstatus_barrier(char * param) +{ + int fpstatus = fpgetsticky(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((FP_X_DZ & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char * param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + fpsetsticky(0); + + return fpstatus; +} + +void npy_set_floatstatus_divbyzero(void) +{ + fpsetsticky(FP_X_DZ); +} + +void npy_set_floatstatus_overflow(void) +{ + fpsetsticky(FP_X_OFL); +} + +void npy_set_floatstatus_underflow(void) +{ + fpsetsticky(FP_X_UFL); +} + +void npy_set_floatstatus_invalid(void) +{ + fpsetsticky(FP_X_INV); +} + +#elif defined(_AIX) && !defined(__GNUC__) +#include <float.h> +#include <fpxcp.h> + +int npy_get_floatstatus_barrier(char *param) +{ + int fpstatus = fp_read_flag(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char * param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + fp_swap_flag(0); + + return fpstatus; +} + +void npy_set_floatstatus_divbyzero(void) +{ + fp_raise_xcp(FP_DIV_BY_ZERO); +} + +void npy_set_floatstatus_overflow(void) +{ + fp_raise_xcp(FP_OVERFLOW); +} + +void npy_set_floatstatus_underflow(void) +{ + fp_raise_xcp(FP_UNDERFLOW); +} + +void npy_set_floatstatus_invalid(void) +{ + fp_raise_xcp(FP_INVALID); +} + +#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) || \ + defined (__UCLIBC__) || (defined(__arc__) && defined(__GLIBC__)) + +/* + * By using a volatile floating point value, + * the compiler is forced to actually do the requested + * operations because of potential concurrency. + * + * We shouldn't write multiple values to a single + * global here, because that would cause + * a race condition. + */ +static volatile double _npy_floatstatus_x, + _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300, + _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf; + +void npy_set_floatstatus_divbyzero(void) +{ + _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero; +} + +void npy_set_floatstatus_overflow(void) +{ + _npy_floatstatus_x = _npy_floatstatus_big * 1e300; +} + +void npy_set_floatstatus_underflow(void) +{ + _npy_floatstatus_x = _npy_floatstatus_small * 1e-300; +} + +void npy_set_floatstatus_invalid(void) +{ + _npy_floatstatus_inf = NPY_INFINITY; + _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY; +} + +/* MS Windows -----------------------------------------------------*/ +#if defined(_MSC_VER) + +#include <float.h> + +int npy_get_floatstatus_barrier(char *param) +{ + /* + * By using a volatile, the compiler cannot reorder this call + */ +#if defined(_WIN64) + int fpstatus = _statusfp(); +#else + /* windows enables sse on 32 bit, so check both flags */ + int fpstatus, fpstatus2; + _statusfp2(&fpstatus, &fpstatus2); + fpstatus |= fpstatus2; +#endif + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char *param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + _clearfp(); + + return fpstatus; +} + +/* OSF/Alpha (Tru64) ---------------------------------------------*/ +#elif defined(__osf__) && defined(__alpha) + +#include <machine/fpu.h> + +int npy_get_floatstatus_barrier(char *param) +{ + unsigned long fpstatus = ieee_get_fp_control(); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + return ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char *param) +{ + int fpstatus = npy_get_floatstatus_barrier(param); + /* clear status bits as well as disable exception mode if on */ + ieee_set_fp_control(0); + + return fpstatus; +} + +#endif +/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */ + +#else +/* General GCC code, should work on most platforms */ +# include <fenv.h> + +int npy_get_floatstatus_barrier(char* param) +{ + int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW | + FE_UNDERFLOW | FE_INVALID); + /* + * By using a volatile, the compiler cannot reorder this call + */ + if (param != NULL) { + volatile char NPY_UNUSED(c) = *(char*)param; + } + + return ((FE_DIVBYZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) | + ((FE_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) | + ((FE_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) | + ((FE_INVALID & fpstatus) ? NPY_FPE_INVALID : 0); +} + +int npy_clear_floatstatus_barrier(char * param) +{ + /* testing float status is 50-100 times faster than clearing on x86 */ + int fpstatus = npy_get_floatstatus_barrier(param); + if (fpstatus != 0) { + feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | + FE_UNDERFLOW | FE_INVALID); + } + + return fpstatus; +} + + +void npy_set_floatstatus_divbyzero(void) +{ + feraiseexcept(FE_DIVBYZERO); +} + +void npy_set_floatstatus_overflow(void) +{ + feraiseexcept(FE_OVERFLOW); +} + +void npy_set_floatstatus_underflow(void) +{ + feraiseexcept(FE_UNDERFLOW); +} + +void npy_set_floatstatus_invalid(void) +{ + feraiseexcept(FE_INVALID); +} + +#endif |