summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2013-09-22 12:16:06 -0700
committerCharles Harris <charlesr.harris@gmail.com>2013-09-22 12:16:06 -0700
commit6f9a41276153187a4c6e45eb0b8a9999d946608d (patch)
treea1131b52e643664e41c4f25fd530f36f61307f25 /numpy
parent27e931f7e93904667ffc8609fd8cae36e0de6f48 (diff)
parentfd2e1104718490be8504f8d6665205ca594a37e7 (diff)
downloadnumpy-6f9a41276153187a4c6e45eb0b8a9999d946608d.tar.gz
Merge pull request #3772 from juliantaylor/sse-configure-check
BUG: make checking for sse intrinsics more robust
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/include/numpy/npy_common.h8
-rw-r--r--numpy/core/setup.py10
-rw-r--r--numpy/core/setup_common.py5
-rw-r--r--numpy/core/src/multiarray/einsum.c.src6
-rw-r--r--numpy/core/src/umath/loops.c.src3
-rw-r--r--numpy/core/src/umath/simd.inc.src26
6 files changed, 38 insertions, 20 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index 62ffa4006..08582bf79 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -18,6 +18,14 @@
#define NPY_GCC_UNROLL_LOOPS
#endif
+#if defined HAVE_XMMINTRIN_H && defined HAVE__MM_LOAD_PS
+#define NPY_HAVE_SSE_INTRINSICS
+#endif
+
+#if defined HAVE_EMMINTRIN_H && defined HAVE__MM_LOAD_PD
+#define NPY_HAVE_SSE2_INTRINSICS
+#endif
+
/*
* give a hint to the compiler which branch is more likely or unlikely
* to occur, e.g. rare error cases:
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 1c8cea4f7..576b7d5ff 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -165,8 +165,14 @@ def check_math_capabilities(config, moredefs, mathlibs):
if config.check_func("", decl=False, call=False, headers=[h]):
moredefs.append((fname2def(h).replace(".", "_"), 1))
- for f, args in OPTIONAL_INTRINSICS:
- if config.check_func(f, decl=False, call=True, call_args=args):
+ for tup in OPTIONAL_INTRINSICS:
+ headers = None
+ if len(tup) == 2:
+ f, args = tup
+ else:
+ f, args, headers = tup[0], tup[1], [tup[2]]
+ if config.check_func(f, decl=False, call=True, call_args=args,
+ headers=headers):
moredefs.append((fname2def(f), 1))
for dec, fn in OPTIONAL_GCC_ATTRIBUTES:
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 1f3e6b44e..bad3607fa 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -107,7 +107,8 @@ OPTIONAL_HEADERS = [
"emmintrin.h", # SSE2
]
-# optional gcc compiler builtins and their call arguments
+# optional gcc compiler builtins and their call arguments and optional a
+# required header
# call arguments are required as the compiler will do strict signature checking
OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
("__builtin_isinf", '5.'),
@@ -115,6 +116,8 @@ OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
("__builtin_bswap32", '5u'),
("__builtin_bswap64", '5u'),
("__builtin_expect", '5, 0'),
+ ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
+ ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
]
# gcc function attributes
diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src
index 56b1ce746..7a94c9305 100644
--- a/numpy/core/src/multiarray/einsum.c.src
+++ b/numpy/core/src/multiarray/einsum.c.src
@@ -14,16 +14,16 @@
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
+#include <numpy/npy_common.h>
#include <numpy/arrayobject.h>
#include <numpy/halffloat.h>
#include <npy_pycompat.h>
-#include <npy_config.h>
#include <ctype.h>
#include "convert.h"
-#ifdef HAVE_XMMINTRIN_H
+#ifdef NPY_HAVE_SSE_INTRINSICS
#define EINSUM_USE_SSE1 1
#else
#define EINSUM_USE_SSE1 0
@@ -32,7 +32,7 @@
/*
* TODO: Only some SSE2 for float64 is implemented.
*/
-#ifdef HAVE_EMMINTRIN_H
+#ifdef NPY_HAVE_SSE2_INTRINSICS
#define EINSUM_USE_SSE2 1
#else
#define EINSUM_USE_SSE2 0
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index d1fc58ffa..a444d37c3 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -10,6 +10,7 @@
#define NO_IMPORT_ARRAY
#endif
+#include "numpy/npy_common.h"
#include "numpy/arrayobject.h"
#include "numpy/ufuncobject.h"
#include "numpy/npy_math.h"
@@ -564,7 +565,7 @@ NPY_NO_EXPORT void
BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
if(IS_BINARY_REDUCE) {
-#ifdef HAVE_EMMINTRIN_H
+#ifdef NPY_HAVE_SSE2_INTRINSICS
/*
* stick with our variant for more reliable performance, only known
* platform which outperforms it by ~20% is an i7 with glibc 2.17
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index e1fe6c5b5..e274e0596 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -16,10 +16,10 @@
#define __NPY_SIMD_INC
#include "lowlevel_strided_loops.h"
-#include "npy_config.h"
+#include "numpy/npy_common.h"
/* for NO_FLOATING_POINT_SUPPORT */
#include "numpy/ufuncobject.h"
-#ifdef HAVE_EMMINTRIN_H
+#ifdef NPY_HAVE_SSE2_INTRINSICS
#include <emmintrin.h>
#endif
#include <assert.h>
@@ -140,7 +140,7 @@ static const npy_int32 fanout_4[] = {
* #name = unary, unary, unary_reduce, unary_reduce#
*/
-#if @vector@ && defined HAVE_EMMINTRIN_H
+#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
/* prototypes */
static void
@@ -151,7 +151,7 @@ sse2_@func@_@TYPE@(@type@ *, @type@ *, const npy_intp n);
static NPY_INLINE int
run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
{
-#if @vector@ && defined HAVE_EMMINTRIN_H
+#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
if (@check@(sizeof(@type@), 16)) {
sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]);
return 1;
@@ -167,7 +167,7 @@ run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
* # kind = add, subtract, multiply, divide#
*/
-#if @vector@ && defined HAVE_EMMINTRIN_H
+#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
/* prototypes */
static void
@@ -185,7 +185,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2,
static NPY_INLINE int
run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
{
-#if @vector@ && defined HAVE_EMMINTRIN_H
+#if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS
@type@ * ip1 = (@type@ *)args[0];
@type@ * ip2 = (@type@ *)args[1];
@type@ * op = (@type@ *)args[2];
@@ -216,7 +216,7 @@ run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
* #simd = 1, 1, 1, 1, 1, 1, 0, 0#
*/
-#if @vector@ && @simd@ && defined HAVE_EMMINTRIN_H
+#if @vector@ && @simd@ && defined NPY_HAVE_SSE2_INTRINSICS
/* prototypes */
static void
@@ -234,7 +234,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2,
static NPY_INLINE int
run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps)
{
-#if @vector@ && @simd@ && defined HAVE_EMMINTRIN_H
+#if @vector@ && @simd@ && defined NPY_HAVE_SSE2_INTRINSICS
@type@ * ip1 = (@type@ *)args[0];
@type@ * ip2 = (@type@ *)args[1];
npy_bool * op = (npy_bool *)args[2];
@@ -278,7 +278,7 @@ sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2,
static NPY_INLINE int
run_binary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps)
{
-#if defined HAVE_EMMINTRIN_H
+#if defined NPY_HAVE_SSE2_INTRINSICS
if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_BINARY(sizeof(npy_bool), 16)) {
sse2_binary_@kind@_BOOL((npy_bool*)args[2], (npy_bool*)args[0],
(npy_bool*)args[1], dimensions[0]);
@@ -295,7 +295,7 @@ sse2_reduce_@kind@_BOOL(npy_bool * op, npy_bool * ip, npy_intp n);
static NPY_INLINE int
run_reduce_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps)
{
-#if defined HAVE_EMMINTRIN_H
+#if defined NPY_HAVE_SSE2_INTRINSICS
if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_REDUCE(sizeof(npy_bool), 16)) {
sse2_reduce_@kind@_BOOL((npy_bool*)args[0], (npy_bool*)args[1],
dimensions[0]);
@@ -317,7 +317,7 @@ sse2_@kind@_BOOL(npy_bool *, npy_bool *, const npy_intp n);
static NPY_INLINE int
run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps)
{
-#if defined HAVE_EMMINTRIN_H
+#if defined NPY_HAVE_SSE2_INTRINSICS
if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_UNARY(sizeof(npy_bool), 16)) {
sse2_@kind@_BOOL((npy_bool*)args[1], (npy_bool*)args[0], dimensions[0]);
return 1;
@@ -328,7 +328,7 @@ run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps)
/**end repeat**/
-#ifdef HAVE_EMMINTRIN_H
+#ifdef NPY_HAVE_SSE2_INTRINSICS
/*
* Vectorized operations
@@ -843,6 +843,6 @@ sse2_@kind@_BOOL(@type@ * op, @type@ * ip, const npy_intp n)
/**end repeat**/
-#endif /* HAVE_EMMINTRIN_H */
+#endif /* NPY_HAVE_SSE2_INTRINSICS */
#endif