diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2013-06-16 06:48:13 -0700 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2013-06-16 06:48:13 -0700 |
commit | 2a5c2c8227b600654f31ed346c73cce77bef554d (patch) | |
tree | a712e09e278e01797a60ac562ac56f5b72b5ae9b /numpy | |
parent | bb8c89db8bc5afd39dbe42d6f1f6657e769165d7 (diff) | |
parent | 4b4a7365aeff3dfd19d4b72765db5358963614e9 (diff) | |
download | numpy-2a5c2c8227b600654f31ed346c73cce77bef554d.tar.gz |
Merge pull request #3436 from juliantaylor/vectorize-bool
ENH: vectorize boolean logical &&, ||, abs and not
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 6 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 169 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 70 |
3 files changed, 242 insertions, 3 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 068ecde7c..59d144569 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -571,6 +571,9 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED *((npy_bool *)iop1) = io1; } else { + if (run_binary_simd_@kind@_BOOL(args, dimensions, steps)) { + return; + } BINARY_LOOP { const npy_bool in1 = *(npy_bool *)ip1; const npy_bool in2 = *(npy_bool *)ip2; @@ -613,6 +616,9 @@ BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED NPY_NO_EXPORT void BOOL_@kind@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { + if (run_unary_simd_@kind@_BOOL(args, dimensions, steps)) { + return; + } UNARY_LOOP { npy_bool in1 = *(npy_bool *)ip1; *((npy_bool *)op1) = in1 @OP@ 0; diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 746943097..0382f2cf7 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -19,6 +19,9 @@ #include "npy_config.h" /* for NO_FLOATING_POINT_SUPPORT */ #include "numpy/ufuncobject.h" +#ifdef HAVE_EMMINTRIN_H +#include <emmintrin.h> +#endif #include <assert.h> #include <stdlib.h> @@ -75,6 +78,12 @@ void PyUFunc_clearfperr(void); * if it was run returns true and false if nothing was done */ +/* + ***************************************************************************** + ** FLOAT DISPATCHERS + ***************************************************************************** + */ + /**begin repeat * Float types * #type = npy_float, npy_double, npy_longdouble# @@ -161,13 +170,66 @@ run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps /**end repeat**/ - /* - * Vectorized operations + ***************************************************************************** + ** BOOL DISPATCHERS + ***************************************************************************** + */ + +/**begin repeat + * # kind = logical_or, logical_and# */ +static void +sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2, + npy_intp n); + +static NPY_INLINE int +run_binary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if defined HAVE_EMMINTRIN_H + if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_BINARY(sizeof(npy_bool), 16)) { + sse2_binary_@kind@_BOOL((npy_bool*)args[2], (npy_bool*)args[0], + (npy_bool*)args[1], dimensions[0]); + return 1; + } +#endif + return 0; +} + +/**end repeat**/ + +/**begin repeat + * # kind = absolute, logical_not# + */ + +static void +sse2_@kind@_BOOL(npy_bool *, npy_bool *, const npy_intp n); + +static NPY_INLINE int +run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) +{ +#if defined HAVE_EMMINTRIN_H + if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_UNARY(sizeof(npy_bool), 16)) { + sse2_@kind@_BOOL((npy_bool*)args[1], (npy_bool*)args[0], dimensions[0]); + return 1; + } +#endif + return 0; +} + +/**end repeat**/ + #ifdef HAVE_EMMINTRIN_H -#include <emmintrin.h> + +/* + * Vectorized operations + */ +/* + ***************************************************************************** + ** FLOAT LOOPS + ***************************************************************************** + */ /**begin repeat * horizontal reductions on a vector @@ -446,6 +508,107 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) /**end repeat**/ +/* + ***************************************************************************** + ** BOOL LOOPS + ***************************************************************************** + */ + +/**begin repeat + * # kind = logical_or, logical_and# + * # and = 0, 1# + * # op = ||, &&# + * # vop = or, and# + * # vpre = _mm*2# + * # vsuf = si128*2# + * # vtype = __m128i*2# + * # type = npy_bool*2# + * # vloadu = _mm_loadu_si128*2# + * # vstore = _mm_store_si128*2# + */ + +/* + * convert any bit set to boolean true so vectorized and normal operations are + * consistent, should not be required if bool is used correctly everywhere but + * you never know + */ +#if !@and@ +static NPY_INLINE @vtype@ byte_to_true(@vtype@ v) +{ + const @vtype@ zero = @vpre@_setzero_@vsuf@(); + const @vtype@ truemask = @vpre@_set1_epi8(1 == 1); + /* get 0xFF for zeros */ + @vtype@ tmp = @vpre@_cmpeq_epi8(v, zero); + /* filled with 0xFF/0x00, negate and mask to boolean true */ + return @vpre@_andnot_@vsuf@(tmp, truemask); +} +#endif + +static void +sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2, npy_intp n) +{ + LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) + op[i] = ip1[i] @op@ ip2[i]; + LOOP_BLOCKED(@type@, 16) { + @vtype@ a = @vloadu@((__m128i*)&ip1[i]); + @vtype@ b = @vloadu@((__m128i*)&ip2[i]); +#if @and@ + const @vtype@ zero = @vpre@_setzero_@vsuf@(); + /* get 0xFF for non zeros*/ + @vtype@ tmp = @vpre@_cmpeq_epi8(a, zero); + /* andnot -> 0x00 for zeros xFF for non zeros, & with ip2 */ + tmp = @vpre@_andnot_@vsuf@(tmp, b); +#else + @vtype@ tmp = @vpre@_or_@vsuf@(a, b); +#endif + + @vstore@((__m128i*)&op[i], byte_to_true(tmp)); + } + LOOP_BLOCKED_END { + op[i] = (ip1[i] @op@ ip2[i]); + } +} + +/**end repeat**/ + +/**begin repeat + * # kind = absolute, logical_not# + * # op = !=, ==# + * # not = 0, 1# + * # vpre = _mm*2# + * # vsuf = si128*2# + * # vtype = __m128i*2# + * # type = npy_bool*2# + * # vloadu = _mm_loadu_si128*2# + * # vstore = _mm_store_si128*2# + */ + +static void +sse2_@kind@_BOOL(@type@ * op, @type@ * ip, const npy_intp n) +{ + LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) + op[i] = (ip[i] @op@ 0); + LOOP_BLOCKED(@type@, 16) { + @vtype@ a = @vloadu@((__m128i*)&ip[i]); +#if @not@ + const @vtype@ zero = @vpre@_setzero_@vsuf@(); + const @vtype@ truemask = @vpre@_set1_epi8(1 == 1); + /* equivalent to byte_to_true but can skip the negation */ + a = @vpre@_cmpeq_epi8(a, zero); + a = @vpre@_and_@vsuf@(a, truemask); +#else + /* abs is kind of pointless but maybe its used for byte_to_true */ + a = byte_to_true(a); +#endif + @vstore@((__m128i*)&op[i], a); + } + LOOP_BLOCKED_END { + op[i] = (ip[i] @op@ 0); + } +} + +/**end repeat**/ + #endif /* HAVE_EMMINTRIN_H */ #endif diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 5c8de3734..ed4e0b79e 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -223,6 +223,76 @@ class TestBoolScalar(TestCase): self.assertTrue((f ^ f) is f) +class TestBoolArray(TestCase): + def setUp(self): + # offset for simd tests + self.t = array([True] * 41, dtype=np.bool)[1::] + self.f = array([False] * 41, dtype=np.bool)[1::] + self.o = array([False] * 42, dtype=np.bool)[2::] + self.nm = self.f.copy() + self.im = self.t.copy() + self.nm[3] = True + self.nm[-2] = True + self.im[3] = False + self.im[-2] = False + + def test_all_any(self): + self.assertTrue(self.t.all()) + self.assertTrue(self.t.any()) + self.assertFalse(self.f.all()) + self.assertFalse(self.f.any()) + self.assertTrue(self.nm.any()) + self.assertTrue(self.im.any()) + self.assertFalse(self.nm.all()) + self.assertFalse(self.im.all()) + + def test_logical_not_abs(self): + assert_array_equal(~self.t, self.f) + assert_array_equal(np.abs(~self.t), self.f) + assert_array_equal(np.abs(~self.f), self.t) + assert_array_equal(np.abs(self.f), self.f) + assert_array_equal(~np.abs(self.f), self.t) + assert_array_equal(~np.abs(self.t), self.f) + assert_array_equal(np.abs(~self.nm), self.im) + np.logical_not(self.t, out=self.o) + assert_array_equal(self.o, self.f) + np.abs(self.t, out=self.o) + assert_array_equal(self.o, self.t) + + def test_logical_and_or_xor(self): + assert_array_equal(self.t | self.t, self.t) + assert_array_equal(self.f | self.f, self.f) + assert_array_equal(self.t | self.f, self.t) + assert_array_equal(self.f | self.t, self.t) + np.logical_or(self.t, self.t, out=self.o) + assert_array_equal(self.o, self.t) + assert_array_equal(self.t & self.t, self.t) + assert_array_equal(self.f & self.f, self.f) + assert_array_equal(self.t & self.f, self.f) + assert_array_equal(self.f & self.t, self.f) + np.logical_and(self.t, self.t, out=self.o) + assert_array_equal(self.o, self.t) + assert_array_equal(self.t ^ self.t, self.f) + assert_array_equal(self.f ^ self.f, self.f) + assert_array_equal(self.t ^ self.f, self.t) + assert_array_equal(self.f ^ self.t, self.t) + np.logical_xor(self.t, self.t, out=self.o) + assert_array_equal(self.o, self.f) + + assert_array_equal(self.nm & self.t, self.nm) + assert_array_equal(self.im & self.f, False) + assert_array_equal(self.nm & True, self.nm) + assert_array_equal(self.im & False, self.f) + assert_array_equal(self.nm | self.t, self.t) + assert_array_equal(self.im | self.f, self.im) + assert_array_equal(self.nm | True, self.t) + assert_array_equal(self.im | False, self.im) + assert_array_equal(self.nm ^ self.t, self.im) + assert_array_equal(self.im ^ self.f, self.im) + assert_array_equal(self.nm ^ True, self.im) + assert_array_equal(self.im ^ False, self.im) + + class TestSeterr(TestCase): def test_default(self): err = geterr() |