diff options
-rw-r--r-- | doc/release/1.13.0-notes.rst | 9 | ||||
-rw-r--r-- | numpy/ma/core.py | 139 | ||||
-rw-r--r-- | numpy/ma/tests/test_core.py | 89 |
3 files changed, 164 insertions, 73 deletions
diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst index 049653ea4..49fd6735d 100644 --- a/doc/release/1.13.0-notes.rst +++ b/doc/release/1.13.0-notes.rst @@ -179,6 +179,15 @@ Better default repr for ``ndarray`` subclasses Subclasses of ndarray with no ``repr`` specialization now correctly indent their data and type lines. +More reliable comparisons of masked arrays +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Comparisons of masked arrays were buggy for masked scalars and failed for +structured arrays with dimension higher than one. Both problems are now +solved. In the process, it was ensured that in getting the result for a +structured array, masked fields are properly ignored, i.e., the result is equal +if all fields that are non-masked in both are equal, thus making the behaviour +identical to what one gets by comparing an unstructured masked array and then +doing ``.all()`` over some axis. Changes ======= diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 3b2b39b18..35d4b72bc 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -23,6 +23,7 @@ Released for unlimited redistribution. from __future__ import division, absolute_import, print_function import sys +import operator import warnings from functools import reduce @@ -1733,7 +1734,8 @@ def mask_or(m1, m2, copy=False, shrink=True): if (dtype1 != dtype2): raise ValueError("Incompatible dtypes '%s'<>'%s'" % (dtype1, dtype2)) if dtype1.names: - newmask = np.empty_like(m1) + # Allocate an output mask array with the properly broadcast shape. + newmask = np.empty(np.broadcast(m1, m2).shape, dtype1) _recursive_mask_or(m1, m2, newmask) return newmask return make_mask(umath.logical_or(m1, m2), copy=copy, shrink=shrink) @@ -3873,81 +3875,84 @@ class MaskedArray(ndarray): return True return False - def __eq__(self, other): - """ - Check whether other equals self elementwise. + def _comparison(self, other, compare): + """Compare self with other using operator.eq or operator.ne. + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. """ - if self is masked: - return masked omask = getmask(other) - if omask is nomask: - check = self.filled(0).__eq__(other) - try: - check = check.view(type(self)) - check._mask = self._mask - except AttributeError: - # Dang, we have a bool instead of an array: return the bool - return check + smask = self.mask + mask = mask_or(smask, omask, copy=True) + + odata = getdata(other) + if mask.dtype.names: + # For possibly masked structured arrays we need to be careful, + # since the standard structured array comparison will use all + # fields, masked or not. To avoid masked fields influencing the + # outcome, we set all masked fields in self to other, so they'll + # count as equal. To prepare, we ensure we have the right shape. + broadcast_shape = np.broadcast(self, odata).shape + sbroadcast = np.broadcast_to(self, broadcast_shape, subok=True) + sbroadcast._mask = mask + sdata = sbroadcast.filled(odata) + # Now take care of the mask; the merged mask should have an item + # masked if all fields were masked (in one and/or other). + mask = (mask == np.ones((), mask.dtype)) + else: - odata = filled(other, 0) - check = self.filled(0).__eq__(odata).view(type(self)) - if self._mask is nomask: - check._mask = omask - else: - mask = mask_or(self._mask, omask) - if mask.dtype.names: - if mask.size > 1: - axis = 1 - else: - axis = None - try: - mask = mask.view((bool_, len(self.dtype))).all(axis) - except (ValueError, np.AxisError): - # TODO: what error are we trying to catch here? - # invalid axis, or invalid view? - mask = np.all([[f[n].all() for n in mask.dtype.names] - for f in mask], axis=axis) - check._mask = mask + # For regular arrays, just use the data as they come. + sdata = self.data + + check = compare(sdata, odata) + + if isinstance(check, (np.bool_, bool)): + return masked if mask else check + + if mask is not nomask: + # Adjust elements that were masked, which should be treated + # as equal if masked in both, unequal if masked in one. + # Note that this works automatically for structured arrays too. + check = np.where(mask, compare(smask, omask), check) + if mask.shape != check.shape: + # Guarantee consistency of the shape, making a copy since the + # the mask may need to get written to later. + mask = np.broadcast_to(mask, check.shape).copy() + + check = check.view(type(self)) + check._mask = mask return check - def __ne__(self, other): + def __eq__(self, other): + """Check whether other equals self elementwise. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. """ - Check whether other doesn't equal self elementwise + return self._comparison(other, operator.eq) + + def __ne__(self, other): + """Check whether other does not equal self elementwise. + + When either of the elements is masked, the result is masked as well, + but the underlying boolean data are still set, with self and other + considered equal if both are masked, and unequal otherwise. + For structured arrays, all fields are combined, with masked values + ignored. The result is masked if all fields were masked, with self + and other considered equal only if both were fully masked. """ - if self is masked: - return masked - omask = getmask(other) - if omask is nomask: - check = self.filled(0).__ne__(other) - try: - check = check.view(type(self)) - check._mask = self._mask - except AttributeError: - # In case check is a boolean (or a numpy.bool) - return check - else: - odata = filled(other, 0) - check = self.filled(0).__ne__(odata).view(type(self)) - if self._mask is nomask: - check._mask = omask - else: - mask = mask_or(self._mask, omask) - if mask.dtype.names: - if mask.size > 1: - axis = 1 - else: - axis = None - try: - mask = mask.view((bool_, len(self.dtype))).all(axis) - except (ValueError, np.AxisError): - # TODO: what error are we trying to catch here? - # invalid axis, or invalid view? - mask = np.all([[f[n].all() for n in mask.dtype.names] - for f in mask], axis=axis) - check._mask = mask - return check + return self._comparison(other, operator.ne) def __add__(self, other): """ diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index f9d032f09..d64f1acdc 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -1335,32 +1335,95 @@ class TestMaskedArrayArithmetic(TestCase): ndtype = [('A', int), ('B', int)] a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) test = (a == a) - assert_equal(test, [True, True]) + assert_equal(test.data, [True, True]) + assert_equal(test.mask, [False, False]) + test = (a == a[0]) + assert_equal(test.data, [True, False]) assert_equal(test.mask, [False, False]) b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) test = (a == b) - assert_equal(test, [False, True]) + assert_equal(test.data, [False, True]) + assert_equal(test.mask, [True, False]) + test = (a[0] == b) + assert_equal(test.data, [False, False]) assert_equal(test.mask, [True, False]) b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) test = (a == b) - assert_equal(test, [True, False]) + assert_equal(test.data, [True, True]) assert_equal(test.mask, [False, False]) + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] == a) + assert_equal(test.data, [[True, False], [False, False]]) + assert_equal(test.mask, [[False, False], [False, True]]) def test_ne_on_structured(self): # Test the equality of structured arrays ndtype = [('A', int), ('B', int)] a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) test = (a != a) - assert_equal(test, [False, False]) + assert_equal(test.data, [False, False]) + assert_equal(test.mask, [False, False]) + test = (a != a[0]) + assert_equal(test.data, [False, True]) assert_equal(test.mask, [False, False]) b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) test = (a != b) - assert_equal(test, [True, False]) + assert_equal(test.data, [True, False]) + assert_equal(test.mask, [True, False]) + test = (a[0] != b) + assert_equal(test.data, [True, True]) assert_equal(test.mask, [True, False]) b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) test = (a != b) - assert_equal(test, [False, True]) + assert_equal(test.data, [False, False]) assert_equal(test.mask, [False, False]) + # complicated dtype, 2-dimensional array. + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([[(1, (1, 1)), (2, (2, 2))], + [(3, (3, 3)), (4, (4, 4))]], + mask=[[(0, (1, 0)), (0, (0, 1))], + [(1, (0, 0)), (1, (1, 1))]], dtype=ndtype) + test = (a[0, 0] != a) + assert_equal(test.data, [[False, True], [True, True]]) + assert_equal(test.mask, [[False, False], [False, True]]) + + def test_eq_ne_structured_extra(self): + # ensure simple examples are symmetric and make sense. + # from https://github.com/numpy/numpy/pull/8590#discussion_r101126465 + dt = np.dtype('i4,i4') + for m1 in (mvoid((1, 2), mask=(0, 0), dtype=dt), + mvoid((1, 2), mask=(0, 1), dtype=dt), + mvoid((1, 2), mask=(1, 0), dtype=dt), + mvoid((1, 2), mask=(1, 1), dtype=dt)): + ma1 = m1.view(MaskedArray) + r1 = ma1.view('2i4') + for m2 in (mvoid((1, 1), dtype=dt), + mvoid((1, 0), mask=(0, 1), dtype=dt), + mvoid((3, 2), mask=(0, 1), dtype=dt)): + ma2 = m2.view(MaskedArray) + r2 = ma2.view('2i4') + eq_expected = (r1 == r2).all() + assert_equal(m1 == m2, eq_expected) + assert_equal(m2 == m1, eq_expected) + assert_equal(ma1 == m2, eq_expected) + assert_equal(m1 == ma2, eq_expected) + assert_equal(ma1 == ma2, eq_expected) + # Also check it is the same if we do it element by element. + el_by_el = [m1[name] == m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).all(), eq_expected) + ne_expected = (r1 != r2).any() + assert_equal(m1 != m2, ne_expected) + assert_equal(m2 != m1, ne_expected) + assert_equal(ma1 != m2, ne_expected) + assert_equal(m1 != ma2, ne_expected) + assert_equal(ma1 != ma2, ne_expected) + el_by_el = [m1[name] != m2[name] for name in dt.names] + assert_equal(array(el_by_el, dtype=bool).any(), ne_expected) def test_eq_with_None(self): # Really, comparisons with None should not be done, but check them @@ -1393,6 +1456,20 @@ class TestMaskedArrayArithmetic(TestCase): assert_equal(a == 0, False) assert_equal(a != 1, False) assert_equal(a != 0, True) + b = array(1, mask=True) + assert_equal(b == 0, masked) + assert_equal(b == 1, masked) + assert_equal(b != 0, masked) + assert_equal(b != 1, masked) + + def test_eq_different_dimensions(self): + m1 = array([[0, 1], [1, 2]]) + m2 = array([1, 1], mask=[0, 1]) + test = (m1 == m2) + assert_equal(test, [[False, False], + [True, False]]) + assert_equal(test.mask, [[False, True], + [False, True]]) def test_numpyarithmetics(self): # Check that the mask is not back-propagated when using numpy functions |