summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2013-06-16 06:48:13 -0700
committerCharles Harris <charlesr.harris@gmail.com>2013-06-16 06:48:13 -0700
commit2a5c2c8227b600654f31ed346c73cce77bef554d (patch)
treea712e09e278e01797a60ac562ac56f5b72b5ae9b /numpy
parentbb8c89db8bc5afd39dbe42d6f1f6657e769165d7 (diff)
parent4b4a7365aeff3dfd19d4b72765db5358963614e9 (diff)
downloadnumpy-2a5c2c8227b600654f31ed346c73cce77bef554d.tar.gz
Merge pull request #3436 from juliantaylor/vectorize-bool
ENH: vectorize boolean logical &&, ||, abs and not
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/umath/loops.c.src6
-rw-r--r--numpy/core/src/umath/simd.inc.src169
-rw-r--r--numpy/core/tests/test_numeric.py70
3 files changed, 242 insertions, 3 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 068ecde7c..59d144569 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -571,6 +571,9 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
*((npy_bool *)iop1) = io1;
}
else {
+ if (run_binary_simd_@kind@_BOOL(args, dimensions, steps)) {
+ return;
+ }
BINARY_LOOP {
const npy_bool in1 = *(npy_bool *)ip1;
const npy_bool in2 = *(npy_bool *)ip2;
@@ -613,6 +616,9 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED
NPY_NO_EXPORT void
BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
{
+ if (run_unary_simd_@kind@_BOOL(args, dimensions, steps)) {
+ return;
+ }
UNARY_LOOP {
npy_bool in1 = *(npy_bool *)ip1;
*((npy_bool *)op1) = in1 @OP@ 0;
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 746943097..0382f2cf7 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -19,6 +19,9 @@
#include "npy_config.h"
/* for NO_FLOATING_POINT_SUPPORT */
#include "numpy/ufuncobject.h"
+#ifdef HAVE_EMMINTRIN_H
+#include <emmintrin.h>
+#endif
#include <assert.h>
#include <stdlib.h>
@@ -75,6 +78,12 @@ void PyUFunc_clearfperr(void);
* if it was run returns true and false if nothing was done
*/
+/*
+ *****************************************************************************
+ ** FLOAT DISPATCHERS
+ *****************************************************************************
+ */
+
/**begin repeat
* Float types
* #type = npy_float, npy_double, npy_longdouble#
@@ -161,13 +170,66 @@ run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps
/**end repeat**/
-
/*
- * Vectorized operations
+ *****************************************************************************
+ ** BOOL DISPATCHERS
+ *****************************************************************************
+ */
+
+/**begin repeat
+ * # kind = logical_or, logical_and#
*/
+static void
+sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2,
+ npy_intp n);
+
+static NPY_INLINE int
+run_binary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if defined HAVE_EMMINTRIN_H
+ if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_BINARY(sizeof(npy_bool), 16)) {
+ sse2_binary_@kind@_BOOL((npy_bool*)args[2], (npy_bool*)args[0],
+ (npy_bool*)args[1], dimensions[0]);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/**end repeat**/
+
+/**begin repeat
+ * # kind = absolute, logical_not#
+ */
+
+static void
+sse2_@kind@_BOOL(npy_bool *, npy_bool *, const npy_intp n);
+
+static NPY_INLINE int
+run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if defined HAVE_EMMINTRIN_H
+ if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_UNARY(sizeof(npy_bool), 16)) {
+ sse2_@kind@_BOOL((npy_bool*)args[1], (npy_bool*)args[0], dimensions[0]);
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/**end repeat**/
+
#ifdef HAVE_EMMINTRIN_H
-#include <emmintrin.h>
+
+/*
+ * Vectorized operations
+ */
+/*
+ *****************************************************************************
+ ** FLOAT LOOPS
+ *****************************************************************************
+ */
/**begin repeat
* horizontal reductions on a vector
@@ -446,6 +508,107 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
/**end repeat**/
+/*
+ *****************************************************************************
+ ** BOOL LOOPS
+ *****************************************************************************
+ */
+
+/**begin repeat
+ * # kind = logical_or, logical_and#
+ * # and = 0, 1#
+ * # op = ||, &&#
+ * # vop = or, and#
+ * # vpre = _mm*2#
+ * # vsuf = si128*2#
+ * # vtype = __m128i*2#
+ * # type = npy_bool*2#
+ * # vloadu = _mm_loadu_si128*2#
+ * # vstore = _mm_store_si128*2#
+ */
+
+/*
+ * convert any bit set to boolean true so vectorized and normal operations are
+ * consistent, should not be required if bool is used correctly everywhere but
+ * you never know
+ */
+#if !@and@
+static NPY_INLINE @vtype@ byte_to_true(@vtype@ v)
+{
+ const @vtype@ zero = @vpre@_setzero_@vsuf@();
+ const @vtype@ truemask = @vpre@_set1_epi8(1 == 1);
+ /* get 0xFF for zeros */
+ @vtype@ tmp = @vpre@_cmpeq_epi8(v, zero);
+ /* filled with 0xFF/0x00, negate and mask to boolean true */
+ return @vpre@_andnot_@vsuf@(tmp, truemask);
+}
+#endif
+
+static void
+sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2, npy_intp n)
+{
+ LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
+ op[i] = ip1[i] @op@ ip2[i];
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vloadu@((__m128i*)&ip1[i]);
+ @vtype@ b = @vloadu@((__m128i*)&ip2[i]);
+#if @and@
+ const @vtype@ zero = @vpre@_setzero_@vsuf@();
+ /* get 0xFF for non zeros*/
+ @vtype@ tmp = @vpre@_cmpeq_epi8(a, zero);
+ /* andnot -> 0x00 for zeros xFF for non zeros, & with ip2 */
+ tmp = @vpre@_andnot_@vsuf@(tmp, b);
+#else
+ @vtype@ tmp = @vpre@_or_@vsuf@(a, b);
+#endif
+
+ @vstore@((__m128i*)&op[i], byte_to_true(tmp));
+ }
+ LOOP_BLOCKED_END {
+ op[i] = (ip1[i] @op@ ip2[i]);
+ }
+}
+
+/**end repeat**/
+
+/**begin repeat
+ * # kind = absolute, logical_not#
+ * # op = !=, ==#
+ * # not = 0, 1#
+ * # vpre = _mm*2#
+ * # vsuf = si128*2#
+ * # vtype = __m128i*2#
+ * # type = npy_bool*2#
+ * # vloadu = _mm_loadu_si128*2#
+ * # vstore = _mm_store_si128*2#
+ */
+
+static void
+sse2_@kind@_BOOL(@type@ * op, @type@ * ip, const npy_intp n)
+{
+ LOOP_BLOCK_ALIGN_VAR(op, @type@, 16)
+ op[i] = (ip[i] @op@ 0);
+ LOOP_BLOCKED(@type@, 16) {
+ @vtype@ a = @vloadu@((__m128i*)&ip[i]);
+#if @not@
+ const @vtype@ zero = @vpre@_setzero_@vsuf@();
+ const @vtype@ truemask = @vpre@_set1_epi8(1 == 1);
+ /* equivalent to byte_to_true but can skip the negation */
+ a = @vpre@_cmpeq_epi8(a, zero);
+ a = @vpre@_and_@vsuf@(a, truemask);
+#else
+ /* abs is kind of pointless but maybe its used for byte_to_true */
+ a = byte_to_true(a);
+#endif
+ @vstore@((__m128i*)&op[i], a);
+ }
+ LOOP_BLOCKED_END {
+ op[i] = (ip[i] @op@ 0);
+ }
+}
+
+/**end repeat**/
+
#endif /* HAVE_EMMINTRIN_H */
#endif
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 5c8de3734..ed4e0b79e 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -223,6 +223,76 @@ class TestBoolScalar(TestCase):
self.assertTrue((f ^ f) is f)
+class TestBoolArray(TestCase):
+ def setUp(self):
+ # offset for simd tests
+ self.t = array([True] * 41, dtype=np.bool)[1::]
+ self.f = array([False] * 41, dtype=np.bool)[1::]
+ self.o = array([False] * 42, dtype=np.bool)[2::]
+ self.nm = self.f.copy()
+ self.im = self.t.copy()
+ self.nm[3] = True
+ self.nm[-2] = True
+ self.im[3] = False
+ self.im[-2] = False
+
+ def test_all_any(self):
+ self.assertTrue(self.t.all())
+ self.assertTrue(self.t.any())
+ self.assertFalse(self.f.all())
+ self.assertFalse(self.f.any())
+ self.assertTrue(self.nm.any())
+ self.assertTrue(self.im.any())
+ self.assertFalse(self.nm.all())
+ self.assertFalse(self.im.all())
+
+ def test_logical_not_abs(self):
+ assert_array_equal(~self.t, self.f)
+ assert_array_equal(np.abs(~self.t), self.f)
+ assert_array_equal(np.abs(~self.f), self.t)
+ assert_array_equal(np.abs(self.f), self.f)
+ assert_array_equal(~np.abs(self.f), self.t)
+ assert_array_equal(~np.abs(self.t), self.f)
+ assert_array_equal(np.abs(~self.nm), self.im)
+ np.logical_not(self.t, out=self.o)
+ assert_array_equal(self.o, self.f)
+ np.abs(self.t, out=self.o)
+ assert_array_equal(self.o, self.t)
+
+ def test_logical_and_or_xor(self):
+ assert_array_equal(self.t | self.t, self.t)
+ assert_array_equal(self.f | self.f, self.f)
+ assert_array_equal(self.t | self.f, self.t)
+ assert_array_equal(self.f | self.t, self.t)
+ np.logical_or(self.t, self.t, out=self.o)
+ assert_array_equal(self.o, self.t)
+ assert_array_equal(self.t & self.t, self.t)
+ assert_array_equal(self.f & self.f, self.f)
+ assert_array_equal(self.t & self.f, self.f)
+ assert_array_equal(self.f & self.t, self.f)
+ np.logical_and(self.t, self.t, out=self.o)
+ assert_array_equal(self.o, self.t)
+ assert_array_equal(self.t ^ self.t, self.f)
+ assert_array_equal(self.f ^ self.f, self.f)
+ assert_array_equal(self.t ^ self.f, self.t)
+ assert_array_equal(self.f ^ self.t, self.t)
+ np.logical_xor(self.t, self.t, out=self.o)
+ assert_array_equal(self.o, self.f)
+
+ assert_array_equal(self.nm & self.t, self.nm)
+ assert_array_equal(self.im & self.f, False)
+ assert_array_equal(self.nm & True, self.nm)
+ assert_array_equal(self.im & False, self.f)
+ assert_array_equal(self.nm | self.t, self.t)
+ assert_array_equal(self.im | self.f, self.im)
+ assert_array_equal(self.nm | True, self.t)
+ assert_array_equal(self.im | False, self.im)
+ assert_array_equal(self.nm ^ self.t, self.im)
+ assert_array_equal(self.im ^ self.f, self.im)
+ assert_array_equal(self.nm ^ True, self.im)
+ assert_array_equal(self.im ^ False, self.im)
+
+
class TestSeterr(TestCase):
def test_default(self):
err = geterr()