diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2013-09-22 12:16:06 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2013-09-22 12:16:06 -0700 |
commit | 6f9a41276153187a4c6e45eb0b8a9999d946608d (patch) | |
tree | a1131b52e643664e41c4f25fd530f36f61307f25 /numpy | |
parent | 27e931f7e93904667ffc8609fd8cae36e0de6f48 (diff) | |
parent | fd2e1104718490be8504f8d6665205ca594a37e7 (diff) | |
download | numpy-6f9a41276153187a4c6e45eb0b8a9999d946608d.tar.gz |
Merge pull request #3772 from juliantaylor/sse-configure-check
BUG: make checking for sse intrinsics more robust
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/include/numpy/npy_common.h | 8 | ||||
-rw-r--r-- | numpy/core/setup.py | 10 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 5 | ||||
-rw-r--r-- | numpy/core/src/multiarray/einsum.c.src | 6 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 3 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 26 |
6 files changed, 38 insertions, 20 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index 62ffa4006..08582bf79 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -18,6 +18,14 @@ #define NPY_GCC_UNROLL_LOOPS #endif +#if defined HAVE_XMMINTRIN_H && defined HAVE__MM_LOAD_PS +#define NPY_HAVE_SSE_INTRINSICS +#endif + +#if defined HAVE_EMMINTRIN_H && defined HAVE__MM_LOAD_PD +#define NPY_HAVE_SSE2_INTRINSICS +#endif + /* * give a hint to the compiler which branch is more likely or unlikely * to occur, e.g. rare error cases: diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 1c8cea4f7..576b7d5ff 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -165,8 +165,14 @@ def check_math_capabilities(config, moredefs, mathlibs): if config.check_func("", decl=False, call=False, headers=[h]): moredefs.append((fname2def(h).replace(".", "_"), 1)) - for f, args in OPTIONAL_INTRINSICS: - if config.check_func(f, decl=False, call=True, call_args=args): + for tup in OPTIONAL_INTRINSICS: + headers = None + if len(tup) == 2: + f, args = tup + else: + f, args, headers = tup[0], tup[1], [tup[2]] + if config.check_func(f, decl=False, call=True, call_args=args, + headers=headers): moredefs.append((fname2def(f), 1)) for dec, fn in OPTIONAL_GCC_ATTRIBUTES: diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 1f3e6b44e..bad3607fa 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -107,7 +107,8 @@ OPTIONAL_HEADERS = [ "emmintrin.h", # SSE2 ] -# optional gcc compiler builtins and their call arguments +# optional gcc compiler builtins and their call arguments and optional a +# required header # call arguments are required as the compiler will do strict signature checking OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_isinf", '5.'), @@ -115,6 +116,8 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'), ("__builtin_bswap32", '5u'), ("__builtin_bswap64", '5u'), ("__builtin_expect", '5, 0'), + ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE + ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2 ] # gcc function attributes diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src index 56b1ce746..7a94c9305 100644 --- a/numpy/core/src/multiarray/einsum.c.src +++ b/numpy/core/src/multiarray/einsum.c.src @@ -14,16 +14,16 @@ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE +#include <numpy/npy_common.h> #include <numpy/arrayobject.h> #include <numpy/halffloat.h> #include <npy_pycompat.h> -#include <npy_config.h> #include <ctype.h> #include "convert.h" -#ifdef HAVE_XMMINTRIN_H +#ifdef NPY_HAVE_SSE_INTRINSICS #define EINSUM_USE_SSE1 1 #else #define EINSUM_USE_SSE1 0 @@ -32,7 +32,7 @@ /* * TODO: Only some SSE2 for float64 is implemented. */ -#ifdef HAVE_EMMINTRIN_H +#ifdef NPY_HAVE_SSE2_INTRINSICS #define EINSUM_USE_SSE2 1 #else #define EINSUM_USE_SSE2 0 diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index d1fc58ffa..a444d37c3 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -10,6 +10,7 @@ #define NO_IMPORT_ARRAY #endif +#include "numpy/npy_common.h" #include "numpy/arrayobject.h" #include "numpy/ufuncobject.h" #include "numpy/npy_math.h" @@ -564,7 +565,7 @@ NPY_NO_EXPORT void BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { if(IS_BINARY_REDUCE) { -#ifdef HAVE_EMMINTRIN_H +#ifdef NPY_HAVE_SSE2_INTRINSICS /* * stick with our variant for more reliable performance, only known * platform which outperforms it by ~20% is an i7 with glibc 2.17 diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index e1fe6c5b5..e274e0596 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -16,10 +16,10 @@ #define __NPY_SIMD_INC #include "lowlevel_strided_loops.h" -#include "npy_config.h" +#include "numpy/npy_common.h" /* for NO_FLOATING_POINT_SUPPORT */ #include "numpy/ufuncobject.h" -#ifdef HAVE_EMMINTRIN_H +#ifdef NPY_HAVE_SSE2_INTRINSICS #include <emmintrin.h> #endif #include <assert.h> @@ -140,7 +140,7 @@ static const npy_int32 fanout_4[] = { * #name = unary, unary, unary_reduce, unary_reduce# */ -#if @vector@ && defined HAVE_EMMINTRIN_H +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS /* prototypes */ static void @@ -151,7 +151,7 @@ sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n); static NPY_INLINE int run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) { -#if @vector@ && defined HAVE_EMMINTRIN_H +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS if (@check@(sizeof(@type@), 16)) { sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]); return 1; @@ -167,7 +167,7 @@ run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps * # kind = add, subtract, multiply, divide# */ -#if @vector@ && defined HAVE_EMMINTRIN_H +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS /* prototypes */ static void @@ -185,7 +185,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, static NPY_INLINE int run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) { -#if @vector@ && defined HAVE_EMMINTRIN_H +#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS @type@ * ip1 = (@type@ *)args[0]; @type@ * ip2 = (@type@ *)args[1]; @type@ * op = (@type@ *)args[2]; @@ -216,7 +216,7 @@ run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps * #simd = 1, 1, 1, 1, 1, 1, 0, 0# */ -#if @vector@ && @simd@ && defined HAVE_EMMINTRIN_H +#if @vector@ && @simd@ && defined NPY_HAVE_SSE2_INTRINSICS /* prototypes */ static void @@ -234,7 +234,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, static NPY_INLINE int run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) { -#if @vector@ && @simd@ && defined HAVE_EMMINTRIN_H +#if @vector@ && @simd@ && defined NPY_HAVE_SSE2_INTRINSICS @type@ * ip1 = (@type@ *)args[0]; @type@ * ip2 = (@type@ *)args[1]; npy_bool * op = (npy_bool *)args[2]; @@ -278,7 +278,7 @@ sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2, static NPY_INLINE int run_binary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) { -#if defined HAVE_EMMINTRIN_H +#if defined NPY_HAVE_SSE2_INTRINSICS if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_BINARY(sizeof(npy_bool), 16)) { sse2_binary_@kind@_BOOL((npy_bool*)args[2], (npy_bool*)args[0], (npy_bool*)args[1], dimensions[0]); @@ -295,7 +295,7 @@ sse2_reduce_@kind@_BOOL(npy_bool * op, npy_bool * ip, npy_intp n); static NPY_INLINE int run_reduce_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) { -#if defined HAVE_EMMINTRIN_H +#if defined NPY_HAVE_SSE2_INTRINSICS if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_REDUCE(sizeof(npy_bool), 16)) { sse2_reduce_@kind@_BOOL((npy_bool*)args[0], (npy_bool*)args[1], dimensions[0]); @@ -317,7 +317,7 @@ sse2_@kind@_BOOL(npy_bool *, npy_bool *, const npy_intp n); static NPY_INLINE int run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) { -#if defined HAVE_EMMINTRIN_H +#if defined NPY_HAVE_SSE2_INTRINSICS if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_UNARY(sizeof(npy_bool), 16)) { sse2_@kind@_BOOL((npy_bool*)args[1], (npy_bool*)args[0], dimensions[0]); return 1; @@ -328,7 +328,7 @@ run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) /**end repeat**/ -#ifdef HAVE_EMMINTRIN_H +#ifdef NPY_HAVE_SSE2_INTRINSICS /* * Vectorized operations @@ -843,6 +843,6 @@ sse2_@kind@_BOOL(@type@ * op, @type@ * ip, const npy_intp n) /**end repeat**/ -#endif /* HAVE_EMMINTRIN_H */ +#endif /* NPY_HAVE_SSE2_INTRINSICS */ #endif |