diff options
Diffstat (limited to 'numpy/ma')
-rw-r--r-- | numpy/ma/__init__.pyi | 1 | ||||
-rw-r--r-- | numpy/ma/bench.py | 130 | ||||
-rw-r--r-- | numpy/ma/core.py | 433 | ||||
-rw-r--r-- | numpy/ma/core.pyi | 10 | ||||
-rw-r--r-- | numpy/ma/extras.py | 102 | ||||
-rw-r--r-- | numpy/ma/tests/test_core.py | 162 | ||||
-rw-r--r-- | numpy/ma/tests/test_extras.py | 74 | ||||
-rw-r--r-- | numpy/ma/tests/test_old_ma.py | 8 | ||||
-rw-r--r-- | numpy/ma/tests/test_regression.py | 6 | ||||
-rw-r--r-- | numpy/ma/tests/test_subclassing.py | 10 | ||||
-rw-r--r-- | numpy/ma/testutils.py | 2 |
11 files changed, 662 insertions, 276 deletions
diff --git a/numpy/ma/__init__.pyi b/numpy/ma/__init__.pyi index 7f5cb56a8..ce72383e5 100644 --- a/numpy/ma/__init__.pyi +++ b/numpy/ma/__init__.pyi @@ -155,7 +155,6 @@ from numpy.ma.core import ( resize as resize, right_shift as right_shift, round as round, - round_ as round_, set_fill_value as set_fill_value, shape as shape, sin as sin, diff --git a/numpy/ma/bench.py b/numpy/ma/bench.py deleted file mode 100644 index 56865683d..000000000 --- a/numpy/ma/bench.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python3 - -import timeit -import numpy - - -############################################################################### -# Global variables # -############################################################################### - - -# Small arrays -xs = numpy.random.uniform(-1, 1, 6).reshape(2, 3) -ys = numpy.random.uniform(-1, 1, 6).reshape(2, 3) -zs = xs + 1j * ys -m1 = [[True, False, False], [False, False, True]] -m2 = [[True, False, True], [False, False, True]] -nmxs = numpy.ma.array(xs, mask=m1) -nmys = numpy.ma.array(ys, mask=m2) -nmzs = numpy.ma.array(zs, mask=m1) - -# Big arrays -xl = numpy.random.uniform(-1, 1, 100*100).reshape(100, 100) -yl = numpy.random.uniform(-1, 1, 100*100).reshape(100, 100) -zl = xl + 1j * yl -maskx = xl > 0.8 -masky = yl < -0.8 -nmxl = numpy.ma.array(xl, mask=maskx) -nmyl = numpy.ma.array(yl, mask=masky) -nmzl = numpy.ma.array(zl, mask=maskx) - - -############################################################################### -# Functions # -############################################################################### - - -def timer(s, v='', nloop=500, nrep=3): - units = ["s", "ms", "µs", "ns"] - scaling = [1, 1e3, 1e6, 1e9] - print("%s : %-50s : " % (v, s), end=' ') - varnames = ["%ss,nm%ss,%sl,nm%sl" % tuple(x*4) for x in 'xyz'] - setup = 'from __main__ import numpy, ma, %s' % ','.join(varnames) - Timer = timeit.Timer(stmt=s, setup=setup) - best = min(Timer.repeat(nrep, nloop)) / nloop - if best > 0.0: - order = min(-int(numpy.floor(numpy.log10(best)) // 3), 3) - else: - order = 3 - print("%d loops, best of %d: %.*g %s per loop" % (nloop, nrep, - 3, - best * scaling[order], - units[order])) - - -def compare_functions_1v(func, nloop=500, - xs=xs, nmxs=nmxs, xl=xl, nmxl=nmxl): - funcname = func.__name__ - print("-"*50) - print(f'{funcname} on small arrays') - module, data = "numpy.ma", "nmxs" - timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) - - print("%s on large arrays" % funcname) - module, data = "numpy.ma", "nmxl" - timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) - return - -def compare_methods(methodname, args, vars='x', nloop=500, test=True, - xs=xs, nmxs=nmxs, xl=xl, nmxl=nmxl): - print("-"*50) - print(f'{methodname} on small arrays') - data, ver = f'nm{vars}l', 'numpy.ma' - timer("%(data)s.%(methodname)s(%(args)s)" % locals(), v=ver, nloop=nloop) - - print("%s on large arrays" % methodname) - data, ver = "nm%sl" % vars, 'numpy.ma' - timer("%(data)s.%(methodname)s(%(args)s)" % locals(), v=ver, nloop=nloop) - return - -def compare_functions_2v(func, nloop=500, test=True, - xs=xs, nmxs=nmxs, - ys=ys, nmys=nmys, - xl=xl, nmxl=nmxl, - yl=yl, nmyl=nmyl): - funcname = func.__name__ - print("-"*50) - print(f'{funcname} on small arrays') - module, data = "numpy.ma", "nmxs,nmys" - timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) - - print(f'{funcname} on large arrays') - module, data = "numpy.ma", "nmxl,nmyl" - timer("%(module)s.%(funcname)s(%(data)s)" % locals(), v="%11s" % module, nloop=nloop) - return - - -if __name__ == '__main__': - compare_functions_1v(numpy.sin) - compare_functions_1v(numpy.log) - compare_functions_1v(numpy.sqrt) - - compare_functions_2v(numpy.multiply) - compare_functions_2v(numpy.divide) - compare_functions_2v(numpy.power) - - compare_methods('ravel', '', nloop=1000) - compare_methods('conjugate', '', 'z', nloop=1000) - compare_methods('transpose', '', nloop=1000) - compare_methods('compressed', '', nloop=1000) - compare_methods('__getitem__', '0', nloop=1000) - compare_methods('__getitem__', '(0,0)', nloop=1000) - compare_methods('__getitem__', '[0,-1]', nloop=1000) - compare_methods('__setitem__', '0, 17', nloop=1000, test=False) - compare_methods('__setitem__', '(0,0), 17', nloop=1000, test=False) - - print("-"*50) - print("__setitem__ on small arrays") - timer('nmxs.__setitem__((-1,0),numpy.ma.masked)', 'numpy.ma ', nloop=10000) - - print("-"*50) - print("__setitem__ on large arrays") - timer('nmxl.__setitem__((-1,0),numpy.ma.masked)', 'numpy.ma ', nloop=10000) - - print("-"*50) - print("where on small arrays") - timer('numpy.ma.where(nmxs>2,nmxs,nmys)', 'numpy.ma ', nloop=1000) - print("-"*50) - print("where on large arrays") - timer('numpy.ma.where(nmxl>2,nmxl,nmyl)', 'numpy.ma ', nloop=100) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 8c4ad37eb..2fe326885 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -2081,9 +2081,11 @@ def masked_equal(x, value, copy=True): """ Mask an array where equal to a given value. - This function is a shortcut to ``masked_where``, with - `condition` = (x == value). For floating point arrays, - consider using ``masked_values(x, value)``. + Return a MaskedArray, masked where the data in array `x` are + equal to `value`. The fill_value of the returned MaskedArray + is set to `value`. + + For floating point arrays, consider using ``masked_values(x, value)``. See Also -------- @@ -2303,25 +2305,17 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True): Note that `mask` is set to ``nomask`` if possible. - >>> ma.masked_values(x, 1.5) + >>> ma.masked_values(x, 2.1) masked_array(data=[1. , 1.1, 2. , 1.1, 3. ], mask=False, - fill_value=1.5) + fill_value=2.1) - For integers, the fill value will be different in general to the - result of ``masked_equal``. + Unlike `masked_equal`, `masked_values` can perform approximate equalities. - >>> x = np.arange(5) - >>> x - array([0, 1, 2, 3, 4]) - >>> ma.masked_values(x, 2) - masked_array(data=[0, 1, --, 3, 4], - mask=[False, False, True, False, False], - fill_value=2) - >>> ma.masked_equal(x, 2) - masked_array(data=[0, 1, --, 3, 4], + >>> ma.masked_values(x, 2.1, atol=1e-1) + masked_array(data=[1.0, 1.1, --, 1.1, 3.0], mask=[False, False, True, False, False], - fill_value=2) + fill_value=2.1) """ xnew = filled(x, value) @@ -2362,8 +2356,13 @@ def masked_invalid(a, copy=True): fill_value=1e+20) """ - - return masked_where(~(np.isfinite(getdata(a))), a, copy=copy) + a = np.array(a, copy=False, subok=True) + res = masked_where(~(np.isfinite(a)), a, copy=copy) + # masked_invalid previously never returned nomask as a mask and doing so + # threw off matplotlib (gh-22842). So use shrink=False: + if res._mask is nomask: + res._mask = make_mask_none(res.shape, res.dtype) + return res ############################################################################### # Printing options # @@ -2841,7 +2840,6 @@ class MaskedArray(ndarray): # Process mask. # Type of the mask mdtype = make_mask_descr(_data.dtype) - if mask is nomask: # Case 1. : no mask in input. # Erase the current mask ? @@ -2859,7 +2857,7 @@ class MaskedArray(ndarray): mask = np.array( [getmaskarray(np.asanyarray(m, dtype=_data.dtype)) for m in data], dtype=mdtype) - except ValueError: + except (ValueError, TypeError): # If data is nested mask = nomask # Force shrinking of the mask if needed (and possible) @@ -2872,10 +2870,22 @@ class MaskedArray(ndarray): _data._mask = _data._mask.copy() # Reset the shape of the original mask if getmask(data) is not nomask: - data._mask.shape = data.shape + # gh-21022 encounters an issue here + # because data._mask.shape is not writeable, but + # the op was also pointless in that case, because + # the shapes were the same, so we can at least + # avoid that path + if data._mask.shape != data.shape: + data._mask.shape = data.shape else: # Case 2. : With a mask in input. # If mask is boolean, create an array of True or False + + # if users pass `mask=None` be forgiving here and cast it False + # for speed; although the default is `mask=nomask` and can differ. + if mask is None: + mask = False + if mask is True and mdtype == MaskType: mask = np.ones(_data.shape, dtype=mdtype) elif mask is False and mdtype == MaskType: @@ -2923,6 +2933,7 @@ class MaskedArray(ndarray): else: _data._mask = np.logical_or(mask, _data._mask) _data._sharedmask = False + # Update fill_value. if fill_value is None: fill_value = getattr(data, '_fill_value', None) @@ -3329,6 +3340,10 @@ class MaskedArray(ndarray): # Note: Don't try to check for m.any(), that'll take too long return dout + # setitem may put NaNs into integer arrays or occasionally overflow a + # float. But this may happen in masked values, so avoid otherwise + # correct warnings (as is typical also in masked calculations). + @np.errstate(over='ignore', invalid='ignore') def __setitem__(self, indx, value): """ x.__setitem__(i, y) <==> x[i]=y @@ -4620,6 +4635,7 @@ class MaskedArray(ndarray): otherwise. 'K' means to read the elements in the order they occur in memory, except for reversing the data when strides are negative. By default, 'C' index order is used. + (Masked arrays currently use 'A' on the data when 'K' is passed.) Returns ------- @@ -4646,6 +4662,13 @@ class MaskedArray(ndarray): fill_value=999999) """ + # The order of _data and _mask could be different (it shouldn't be + # normally). Passing order `K` or `A` would be incorrect. + # So we ignore the mask memory order. + # TODO: We don't actually support K, so use A instead. We could + # try to guess this correct by sorting strides or deprecate. + if order in "kKaA": + order = "F" if self._data.flags.fnc else "C" r = ndarray.ravel(self._data, order=order).view(type(self)) r._update_from(self) if self._mask is not nomask: @@ -6295,6 +6318,12 @@ class MaskedArray(ndarray): memo[id(self)] = copied for (k, v) in self.__dict__.items(): copied.__dict__[k] = deepcopy(v, memo) + # as clearly documented for np.copy(), you need to use + # deepcopy() directly for arrays of object type that may + # contain compound types--you cannot depend on normal + # copy semantics to do the right thing here + if self.dtype.hasobject: + copied._data[...] = deepcopy(copied._data) return copied @@ -6900,6 +6929,32 @@ def power(a, b, third=None): The *out* argument to `numpy.power` is not supported, `third` has to be None. + Examples + -------- + >>> import numpy.ma as ma + >>> x = [11.2, -3.973, 0.801, -1.41] + >>> mask = [0, 0, 0, 1] + >>> masked_x = ma.masked_array(x, mask) + >>> masked_x + masked_array(data=[11.2, -3.973, 0.801, --], + mask=[False, False, False, True], + fill_value=1e+20) + >>> ma.power(masked_x, 2) + masked_array(data=[125.43999999999998, 15.784728999999999, + 0.6416010000000001, --], + mask=[False, False, False, True], + fill_value=1e+20) + >>> y = [-0.5, 2, 0, 17] + >>> masked_y = ma.masked_array(y, mask) + >>> masked_y + masked_array(data=[-0.5, 2.0, 0.0, --], + mask=[False, False, False, True], + fill_value=1e+20) + >>> ma.power(masked_x, masked_y) + masked_array(data=[0.29880715233359845, 15.784728999999999, 1.0, --], + mask=[False, False, False, True], + fill_value=1e+20) + """ if third is not None: raise MaskError("3-argument power not supported.") @@ -6965,6 +7020,21 @@ def sort(a, axis=-1, kind=None, order=None, endwith=True, fill_value=None): See Also -------- MaskedArray.sort : equivalent method + + Examples + -------- + >>> import numpy.ma as ma + >>> x = [11.2, -3.973, 0.801, -1.41] + >>> mask = [0, 0, 0, 1] + >>> masked_x = ma.masked_array(x, mask) + >>> masked_x + masked_array(data=[11.2, -3.973, 0.801, --], + mask=[False, False, False, True], + fill_value=1e+20) + >>> ma.sort(masked_x) + masked_array(data=[-3.973, 0.801, 11.2, --], + mask=[False, False, False, True], + fill_value=1e+20) """ a = np.array(a, copy=True, subok=True) if axis is None: @@ -6990,6 +7060,29 @@ def compressed(x): -------- ma.MaskedArray.compressed : Equivalent method. + Examples + -------- + + Create an array with negative values masked: + + >>> import numpy as np + >>> x = np.array([[1, -1, 0], [2, -1, 3], [7, 4, -1]]) + >>> masked_x = np.ma.masked_array(x, mask=x < 0) + >>> masked_x + masked_array( + data=[[1, --, 0], + [2, --, 3], + [7, 4, --]], + mask=[[False, True, False], + [False, True, False], + [False, False, True]], + fill_value=999999) + + Compress the masked array into a 1-D array of non-masked values: + + >>> np.ma.compressed(masked_x) + array([1, 0, 2, 3, 7, 4]) + """ return asanyarray(x).compressed() @@ -7065,6 +7158,38 @@ def diag(v, k=0): -------- numpy.diag : Equivalent function for ndarrays. + Examples + -------- + + Create an array with negative values masked: + + >>> import numpy as np + >>> x = np.array([[11.2, -3.973, 18], [0.801, -1.41, 12], [7, 33, -12]]) + >>> masked_x = np.ma.masked_array(x, mask=x < 0) + >>> masked_x + masked_array( + data=[[11.2, --, 18.0], + [0.801, --, 12.0], + [7.0, 33.0, --]], + mask=[[False, True, False], + [False, True, False], + [False, False, True]], + fill_value=1e+20) + + Isolate the main diagonal from the masked array: + + >>> np.ma.diag(masked_x) + masked_array(data=[11.2, --, --], + mask=[False, True, True], + fill_value=1e+20) + + Isolate the first diagonal below the main diagonal: + + >>> np.ma.diag(masked_x, -1) + masked_array(data=[0.801, 33.0], + mask=[False, False], + fill_value=1e+20) + """ output = np.diag(v, k).view(MaskedArray) if getmask(v) is not nomask: @@ -7104,6 +7229,21 @@ def right_shift(a, n): -------- numpy.right_shift + Examples + -------- + >>> import numpy.ma as ma + >>> x = [11, 3, 8, 1] + >>> mask = [0, 0, 0, 1] + >>> masked_x = ma.masked_array(x, mask) + >>> masked_x + masked_array(data=[11, 3, 8, --], + mask=[False, False, False, True], + fill_value=999999) + >>> ma.right_shift(masked_x,1) + masked_array(data=[5, 1, 4, --], + mask=[False, False, False, True], + fill_value=999999) + """ m = getmask(a) if m is nomask: @@ -7315,6 +7455,141 @@ def size(obj, axis=None): size.__doc__ = np.size.__doc__ +def diff(a, /, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): + """ + Calculate the n-th discrete difference along the given axis. + The first difference is given by ``out[i] = a[i+1] - a[i]`` along + the given axis, higher differences are calculated by using `diff` + recursively. + Preserves the input mask. + + Parameters + ---------- + a : array_like + Input array + n : int, optional + The number of times values are differenced. If zero, the input + is returned as-is. + axis : int, optional + The axis along which the difference is taken, default is the + last axis. + prepend, append : array_like, optional + Values to prepend or append to `a` along axis prior to + performing the difference. Scalar values are expanded to + arrays with length 1 in the direction of axis and the shape + of the input array in along all other axes. Otherwise the + dimension and shape must match `a` except along axis. + + Returns + ------- + diff : MaskedArray + The n-th differences. The shape of the output is the same as `a` + except along `axis` where the dimension is smaller by `n`. The + type of the output is the same as the type of the difference + between any two elements of `a`. This is the same as the type of + `a` in most cases. A notable exception is `datetime64`, which + results in a `timedelta64` output array. + + See Also + -------- + numpy.diff : Equivalent function in the top-level NumPy module. + + Notes + ----- + Type is preserved for boolean arrays, so the result will contain + `False` when consecutive elements are the same and `True` when they + differ. + + For unsigned integer arrays, the results will also be unsigned. This + should not be surprising, as the result is consistent with + calculating the difference directly: + + >>> u8_arr = np.array([1, 0], dtype=np.uint8) + >>> np.ma.diff(u8_arr) + masked_array(data=[255], + mask=False, + fill_value=999999, + dtype=uint8) + >>> u8_arr[1,...] - u8_arr[0,...] + 255 + + If this is not desirable, then the array should be cast to a larger + integer type first: + + >>> i16_arr = u8_arr.astype(np.int16) + >>> np.ma.diff(i16_arr) + masked_array(data=[-1], + mask=False, + fill_value=999999, + dtype=int16) + + Examples + -------- + >>> a = np.array([1, 2, 3, 4, 7, 0, 2, 3]) + >>> x = np.ma.masked_where(a < 2, a) + >>> np.ma.diff(x) + masked_array(data=[--, 1, 1, 3, --, --, 1], + mask=[ True, False, False, False, True, True, False], + fill_value=999999) + + >>> np.ma.diff(x, n=2) + masked_array(data=[--, 0, 2, --, --, --], + mask=[ True, False, False, True, True, True], + fill_value=999999) + + >>> a = np.array([[1, 3, 1, 5, 10], [0, 1, 5, 6, 8]]) + >>> x = np.ma.masked_equal(a, value=1) + >>> np.ma.diff(x) + masked_array( + data=[[--, --, --, 5], + [--, --, 1, 2]], + mask=[[ True, True, True, False], + [ True, True, False, False]], + fill_value=1) + + >>> np.ma.diff(x, axis=0) + masked_array(data=[[--, --, --, 1, -2]], + mask=[[ True, True, True, False, False]], + fill_value=1) + + """ + if n == 0: + return a + if n < 0: + raise ValueError("order must be non-negative but got " + repr(n)) + + a = np.ma.asanyarray(a) + if a.ndim == 0: + raise ValueError( + "diff requires input that is at least one dimensional" + ) + + combined = [] + if prepend is not np._NoValue: + prepend = np.ma.asanyarray(prepend) + if prepend.ndim == 0: + shape = list(a.shape) + shape[axis] = 1 + prepend = np.broadcast_to(prepend, tuple(shape)) + combined.append(prepend) + + combined.append(a) + + if append is not np._NoValue: + append = np.ma.asanyarray(append) + if append.ndim == 0: + shape = list(a.shape) + shape[axis] = 1 + append = np.broadcast_to(append, tuple(shape)) + combined.append(append) + + if len(combined) > 1: + a = np.ma.concatenate(combined, axis) + + # GH 22465 np.diff without prepend/append preserves the mask + return np.diff(a, n, axis) + + ############################################################################## # Extra functions # ############################################################################## @@ -7542,94 +7817,18 @@ def round_(a, decimals=0, out=None): round = round_ -# Needed by dot, so move here from extras.py. It will still be exported -# from extras.py for compatibility. -def mask_rowcols(a, axis=None): +def _mask_propagate(a, axis): """ - Mask rows and/or columns of a 2D array that contain masked values. - - Mask whole rows and/or columns of a 2D array that contain - masked values. The masking behavior is selected using the - `axis` parameter. - - - If `axis` is None, rows *and* columns are masked. - - If `axis` is 0, only rows are masked. - - If `axis` is 1 or -1, only columns are masked. - - Parameters - ---------- - a : array_like, MaskedArray - The array to mask. If not a MaskedArray instance (or if no array - elements are masked). The result is a MaskedArray with `mask` set - to `nomask` (False). Must be a 2D array. - axis : int, optional - Axis along which to perform the operation. If None, applies to a - flattened version of the array. - - Returns - ------- - a : MaskedArray - A modified version of the input array, masked depending on the value - of the `axis` parameter. - - Raises - ------ - NotImplementedError - If input array `a` is not 2D. - - See Also - -------- - mask_rows : Mask rows of a 2D array that contain masked values. - mask_cols : Mask cols of a 2D array that contain masked values. - masked_where : Mask where a condition is met. - - Notes - ----- - The input array's mask is modified by this function. - - Examples - -------- - >>> import numpy.ma as ma - >>> a = np.zeros((3, 3), dtype=int) - >>> a[1, 1] = 1 - >>> a - array([[0, 0, 0], - [0, 1, 0], - [0, 0, 0]]) - >>> a = ma.masked_equal(a, 1) - >>> a - masked_array( - data=[[0, 0, 0], - [0, --, 0], - [0, 0, 0]], - mask=[[False, False, False], - [False, True, False], - [False, False, False]], - fill_value=1) - >>> ma.mask_rowcols(a) - masked_array( - data=[[0, --, 0], - [--, --, --], - [0, --, 0]], - mask=[[False, True, False], - [ True, True, True], - [False, True, False]], - fill_value=1) - + Mask whole 1-d vectors of an array that contain masked values. """ a = array(a, subok=False) - if a.ndim != 2: - raise NotImplementedError("mask_rowcols works for 2D arrays only.") m = getmask(a) - # Nothing is masked: return a - if m is nomask or not m.any(): + if m is nomask or not m.any() or axis is None: return a - maskedval = m.nonzero() a._mask = a._mask.copy() - if not axis: - a[np.unique(maskedval[0])] = masked - if axis in [None, 1, -1]: - a[:, np.unique(maskedval[1])] = masked + axes = normalize_axis_tuple(axis, a.ndim) + for ax in axes: + a._mask |= m.any(axis=ax, keepdims=True) return a @@ -7646,10 +7845,6 @@ def dot(a, b, strict=False, out=None): corresponding method, it is recommended that the optional arguments be treated as keyword only. At some point that may be mandatory. - .. note:: - Works only with 2-D arrays at the moment. - - Parameters ---------- a, b : masked_array_like @@ -7693,18 +7888,22 @@ def dot(a, b, strict=False, out=None): fill_value=999999) """ - # !!!: Works only with 2D arrays. There should be a way to get it to run - # with higher dimension - if strict and (a.ndim == 2) and (b.ndim == 2): - a = mask_rowcols(a, 0) - b = mask_rowcols(b, 1) + if strict is True: + if np.ndim(a) == 0 or np.ndim(b) == 0: + pass + elif b.ndim == 1: + a = _mask_propagate(a, a.ndim - 1) + b = _mask_propagate(b, b.ndim - 1) + else: + a = _mask_propagate(a, a.ndim - 1) + b = _mask_propagate(b, b.ndim - 2) am = ~getmaskarray(a) bm = ~getmaskarray(b) if out is None: d = np.dot(filled(a, 0), filled(b, 0)) m = ~np.dot(am, bm) - if d.ndim == 0: + if np.ndim(d) == 0: d = np.asarray(d) r = d.view(get_masked_subclass(a, b)) r.__setmask__(m) @@ -8255,12 +8454,6 @@ clip = _convert2ma( np_ret='clipped_array : ndarray', np_ma_ret='clipped_array : MaskedArray', ) -diff = _convert2ma( - 'diff', - params=dict(fill_value=None, hardmask=False), - np_ret='diff : ndarray', - np_ma_ret='diff : MaskedArray', -) empty = _convert2ma( 'empty', params=dict(fill_value=None, hardmask=False), diff --git a/numpy/ma/core.pyi b/numpy/ma/core.pyi index ffdb21983..e94ebce3c 100644 --- a/numpy/ma/core.pyi +++ b/numpy/ma/core.pyi @@ -7,7 +7,6 @@ from numpy import ( amin as amin, bool_ as bool_, expand_dims as expand_dims, - diff as diff, clip as clip, indices as indices, ones_like as ones_like, @@ -220,6 +219,10 @@ class MaskedArray(ndarray[_ShapeType, _DType_co]): def compress(self, condition, axis=..., out=...): ... def __eq__(self, other): ... def __ne__(self, other): ... + def __ge__(self, other): ... + def __gt__(self, other): ... + def __le__(self, other): ... + def __lt__(self, other): ... def __add__(self, other): ... def __radd__(self, other): ... def __sub__(self, other): ... @@ -276,7 +279,6 @@ class MaskedArray(ndarray[_ShapeType, _DType_co]): def sort(self, axis=..., kind=..., order=..., endwith=..., fill_value=...): ... def min(self, axis=..., out=..., fill_value=..., keepdims=...): ... # NOTE: deprecated - # def mini(self, axis=...): ... # def tostring(self, fill_value=..., order=...): ... def max(self, axis=..., out=..., fill_value=..., keepdims=...): ... def ptp(self, axis=..., out=..., fill_value=..., keepdims=...): ... @@ -430,10 +432,10 @@ def resize(x, new_shape): ... def ndim(obj): ... def shape(obj): ... def size(obj, axis=...): ... +def diff(a, /, n=..., axis=..., prepend=..., append=...): ... def where(condition, x=..., y=...): ... def choose(indices, choices, out=..., mode=...): ... -def round_(a, decimals=..., out=...): ... -round = round_ +def round(a, decimals=..., out=...): ... def inner(a, b): ... innerproduct = inner diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 4e7f8e85e..8a6246c36 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -27,8 +27,7 @@ from . import core as ma from .core import ( MaskedArray, MAError, add, array, asarray, concatenate, filled, count, getmask, getmaskarray, make_mask_descr, masked, masked_array, mask_or, - nomask, ones, sort, zeros, getdata, get_masked_subclass, dot, - mask_rowcols + nomask, ones, sort, zeros, getdata, get_masked_subclass, dot ) import numpy as np @@ -398,7 +397,7 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): dtypes.append(np.asarray(res).dtype) outarr = zeros(outshape, object) outarr[tuple(ind)] = res - Ntot = np.product(outshape) + Ntot = np.prod(outshape) k = 1 while k < Ntot: # increment the index @@ -418,7 +417,7 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): j = i.copy() j[axis] = ([slice(None, None)] * res.ndim) j.put(indlist, ind) - Ntot = np.product(outshape) + Ntot = np.prod(outshape) holdshape = outshape outshape = list(arr.shape) outshape[axis] = res.shape @@ -732,12 +731,8 @@ def median(a, axis=None, out=None, overwrite_input=False, keepdims=False): else: return m - r, k = _ureduce(a, func=_median, axis=axis, out=out, + return _ureduce(a, func=_median, keepdims=keepdims, axis=axis, out=out, overwrite_input=overwrite_input) - if keepdims: - return r.reshape(k) - else: - return r def _median(a, axis=None, out=None, overwrite_input=False): @@ -959,6 +954,95 @@ def compress_cols(a): return compress_rowcols(a, 1) +def mask_rowcols(a, axis=None): + """ + Mask rows and/or columns of a 2D array that contain masked values. + + Mask whole rows and/or columns of a 2D array that contain + masked values. The masking behavior is selected using the + `axis` parameter. + + - If `axis` is None, rows *and* columns are masked. + - If `axis` is 0, only rows are masked. + - If `axis` is 1 or -1, only columns are masked. + + Parameters + ---------- + a : array_like, MaskedArray + The array to mask. If not a MaskedArray instance (or if no array + elements are masked), the result is a MaskedArray with `mask` set + to `nomask` (False). Must be a 2D array. + axis : int, optional + Axis along which to perform the operation. If None, applies to a + flattened version of the array. + + Returns + ------- + a : MaskedArray + A modified version of the input array, masked depending on the value + of the `axis` parameter. + + Raises + ------ + NotImplementedError + If input array `a` is not 2D. + + See Also + -------- + mask_rows : Mask rows of a 2D array that contain masked values. + mask_cols : Mask cols of a 2D array that contain masked values. + masked_where : Mask where a condition is met. + + Notes + ----- + The input array's mask is modified by this function. + + Examples + -------- + >>> import numpy.ma as ma + >>> a = np.zeros((3, 3), dtype=int) + >>> a[1, 1] = 1 + >>> a + array([[0, 0, 0], + [0, 1, 0], + [0, 0, 0]]) + >>> a = ma.masked_equal(a, 1) + >>> a + masked_array( + data=[[0, 0, 0], + [0, --, 0], + [0, 0, 0]], + mask=[[False, False, False], + [False, True, False], + [False, False, False]], + fill_value=1) + >>> ma.mask_rowcols(a) + masked_array( + data=[[0, --, 0], + [--, --, --], + [0, --, 0]], + mask=[[False, True, False], + [ True, True, True], + [False, True, False]], + fill_value=1) + + """ + a = array(a, subok=False) + if a.ndim != 2: + raise NotImplementedError("mask_rowcols works for 2D arrays only.") + m = getmask(a) + # Nothing is masked: return a + if m is nomask or not m.any(): + return a + maskedval = m.nonzero() + a._mask = a._mask.copy() + if not axis: + a[np.unique(maskedval[0])] = masked + if axis in [None, 1, -1]: + a[:, np.unique(maskedval[1])] = masked + return a + + def mask_rows(a, axis=np._NoValue): """ Mask rows of a 2D array that contain masked values. diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 6196dcfab..6ab1d7e4f 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -8,6 +8,7 @@ __author__ = "Pierre GF Gerard-Marchant" import sys import warnings +import copy import operator import itertools import textwrap @@ -21,8 +22,9 @@ import numpy.ma.core import numpy.core.fromnumeric as fromnumeric import numpy.core.umath as umath from numpy.testing import ( - assert_raises, assert_warns, suppress_warnings + assert_raises, assert_warns, suppress_warnings, IS_WASM ) +from numpy.testing._private.utils import requires_memory from numpy import ndarray from numpy.compat import asbytes from numpy.ma.testutils import ( @@ -214,6 +216,8 @@ class TestMaskedArray: assert_(np.may_share_memory(x.mask, y.mask)) y = array([1, 2, 3], mask=x._mask, copy=True) assert_(not np.may_share_memory(x.mask, y.mask)) + x = array([1, 2, 3], mask=None) + assert_equal(x._mask, [False, False, False]) def test_masked_singleton_array_creation_warns(self): # The first works, but should not (ideally), there may be no way @@ -373,6 +377,24 @@ class TestMaskedArray: assert_equal(s1, s2) assert_(x1[1:1].shape == (0,)) + def test_setitem_no_warning(self): + # Setitem shouldn't warn, because the assignment might be masked + # and warning for a masked assignment is weird (see gh-23000) + # (When the value is masked, otherwise a warning would be acceptable + # but is not given currently.) + x = np.ma.arange(60).reshape((6, 10)) + index = (slice(1, 5, 2), [7, 5]) + value = np.ma.masked_all((2, 2)) + value._data[...] = np.inf # not a valid integer... + x[index] = value + # The masked scalar is special cased, but test anyway (it's NaN): + x[...] = np.ma.masked + # Finally, a large value that cannot be cast to the float32 `x` + x = np.ma.arange(3., dtype=np.float32) + value = np.ma.array([2e234, 1, 1], mask=[True, False, False]) + x[...] = value + x[[0, 1, 2]] = value + @suppress_copy_mask_on_assignment def test_copy(self): # Tests of some subtle points of copying and sizing. @@ -1332,16 +1354,16 @@ class TestMaskedArrayArithmetic: assert_equal(np.sum(x, axis=0), sum(x, axis=0)) assert_equal(np.sum(filled(xm, 0), axis=0), sum(xm, axis=0)) assert_equal(np.sum(x, 0), sum(x, 0)) - assert_equal(np.product(x, axis=0), product(x, axis=0)) - assert_equal(np.product(x, 0), product(x, 0)) - assert_equal(np.product(filled(xm, 1), axis=0), product(xm, axis=0)) + assert_equal(np.prod(x, axis=0), product(x, axis=0)) + assert_equal(np.prod(x, 0), product(x, 0)) + assert_equal(np.prod(filled(xm, 1), axis=0), product(xm, axis=0)) s = (3, 4) x.shape = y.shape = xm.shape = ym.shape = s if len(s) > 1: assert_equal(np.concatenate((x, y), 1), concatenate((xm, ym), 1)) assert_equal(np.add.reduce(x, 1), add.reduce(x, 1)) assert_equal(np.sum(x, 1), sum(x, 1)) - assert_equal(np.product(x, 1), product(x, 1)) + assert_equal(np.prod(x, 1), product(x, 1)) def test_binops_d2D(self): # Test binary operations on 2D data @@ -3407,6 +3429,24 @@ class TestMaskedArrayMethods: assert_equal(a.ravel(order='C'), [1, 2, 3, 4]) assert_equal(a.ravel(order='F'), [1, 3, 2, 4]) + @pytest.mark.parametrize("order", "AKCF") + @pytest.mark.parametrize("data_order", "CF") + def test_ravel_order(self, order, data_order): + # Ravelling must ravel mask and data in the same order always to avoid + # misaligning the two in the ravel result. + arr = np.ones((5, 10), order=data_order) + arr[0, :] = 0 + mask = np.ones((10, 5), dtype=bool, order=data_order).T + mask[0, :] = False + x = array(arr, mask=mask) + assert x._data.flags.fnc != x._mask.flags.fnc + assert (x.filled(0) == 0).all() + raveled = x.ravel(order) + assert (raveled.filled(0) == 0).all() + + # NOTE: Can be wrong if arr order is neither C nor F and `order="K"` + assert_array_equal(arr.ravel(order), x.ravel(order)._data) + def test_reshape(self): # Tests reshape x = arange(4) @@ -4082,6 +4122,7 @@ class TestMaskedArrayMathMethods: assert_equal(a.max(-1), [3, 6]) assert_equal(a.max(1), [3, 6]) + @requires_memory(free_bytes=2 * 10000 * 1000 * 2) def test_mean_overflow(self): # Test overflow in masked arrays # gh-20272 @@ -4089,6 +4130,46 @@ class TestMaskedArrayMathMethods: mask=np.zeros((10000, 10000))) assert_equal(a.mean(), 65535.0) + def test_diff_with_prepend(self): + # GH 22465 + x = np.array([1, 2, 2, 3, 4, 2, 1, 1]) + + a = np.ma.masked_equal(x[3:], value=2) + a_prep = np.ma.masked_equal(x[:3], value=2) + diff1 = np.ma.diff(a, prepend=a_prep, axis=0) + + b = np.ma.masked_equal(x, value=2) + diff2 = np.ma.diff(b, axis=0) + + assert_(np.ma.allequal(diff1, diff2)) + + def test_diff_with_append(self): + # GH 22465 + x = np.array([1, 2, 2, 3, 4, 2, 1, 1]) + + a = np.ma.masked_equal(x[:3], value=2) + a_app = np.ma.masked_equal(x[3:], value=2) + diff1 = np.ma.diff(a, append=a_app, axis=0) + + b = np.ma.masked_equal(x, value=2) + diff2 = np.ma.diff(b, axis=0) + + assert_(np.ma.allequal(diff1, diff2)) + + def test_diff_with_dim_0(self): + with pytest.raises( + ValueError, + match="diff requires input that is at least one dimensional" + ): + np.ma.diff(np.array(1)) + + def test_diff_with_n_0(self): + a = np.ma.masked_equal([1, 2, 2, 3, 4, 2, 1, 1], value=2) + diff = np.ma.diff(a, n=0, axis=0) + + assert_(np.ma.allequal(a, diff)) + + class TestMaskedArrayMathMethodsComplex: # Test class for miscellaneous MaskedArrays methods. def setup_method(self): @@ -4365,6 +4446,7 @@ class TestMaskedArrayFunctions: assert_equal(test, ctrl) assert_equal(test.mask, ctrl.mask) + @pytest.mark.skipif(IS_WASM, reason="fp errors don't work in wasm") def test_where(self): # Test the where function x = np.array([1., 1., 1., -2., pi/2.0, 4., 5., -10., 10., 1., 2., 3.]) @@ -4504,6 +4586,32 @@ class TestMaskedArrayFunctions: match="not supported for the input types"): np.ma.masked_invalid(a) + def test_masked_invalid_pandas(self): + # getdata() used to be bad for pandas series due to its _data + # attribute. This test is a regression test mainly and may be + # removed if getdata() is adjusted. + class Series(): + _data = "nonsense" + + def __array__(self): + return np.array([5, np.nan, np.inf]) + + arr = np.ma.masked_invalid(Series()) + assert_array_equal(arr._data, np.array(Series())) + assert_array_equal(arr._mask, [False, True, True]) + + @pytest.mark.parametrize("copy", [True, False]) + def test_masked_invalid_full_mask(self, copy): + # Matplotlib relied on masked_invalid always returning a full mask + # (Also astropy projects, but were ok with it gh-22720 and gh-22842) + a = np.ma.array([1, 2, 3, 4]) + assert a._mask is nomask + res = np.ma.masked_invalid(a, copy=copy) + assert res.mask is not nomask + # mask of a should not be mutated + assert a.mask is nomask + assert np.may_share_memory(a._data, res._data) != copy + def test_choose(self): # Test choose choices = [[0, 1, 2, 3], [10, 11, 12, 13], @@ -5499,3 +5607,47 @@ note original note""" assert_equal(np.ma.core.doc_note(method.__doc__, "note"), expected_doc) + + +def test_gh_22556(): + source = np.ma.array([0, [0, 1, 2]], dtype=object) + deepcopy = copy.deepcopy(source) + deepcopy[1].append('this should not appear in source') + assert len(source[1]) == 3 + + +def test_gh_21022(): + # testing for absence of reported error + source = np.ma.masked_array(data=[-1, -1], mask=True, dtype=np.float64) + axis = np.array(0) + result = np.prod(source, axis=axis, keepdims=False) + result = np.ma.masked_array(result, + mask=np.ones(result.shape, dtype=np.bool_)) + array = np.ma.masked_array(data=-1, mask=True, dtype=np.float64) + copy.deepcopy(array) + copy.deepcopy(result) + + +def test_deepcopy_2d_obj(): + source = np.ma.array([[0, "dog"], + [1, 1], + [[1, 2], "cat"]], + mask=[[0, 1], + [0, 0], + [0, 0]], + dtype=object) + deepcopy = copy.deepcopy(source) + deepcopy[2, 0].extend(['this should not appear in source', 3]) + assert len(source[2, 0]) == 2 + assert len(deepcopy[2, 0]) == 4 + assert_equal(deepcopy._mask, source._mask) + deepcopy._mask[0, 0] = 1 + assert source._mask[0, 0] == 0 + + +def test_deepcopy_0d_obj(): + source = np.ma.array(0, mask=[0], dtype=object) + deepcopy = copy.deepcopy(source) + deepcopy[...] = 17 + assert_equal(source, 0) + assert_equal(deepcopy, 17) diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py index 3c95e25ea..d09a50fec 100644 --- a/numpy/ma/tests/test_extras.py +++ b/numpy/ma/tests/test_extras.py @@ -12,6 +12,7 @@ import itertools import pytest import numpy as np +from numpy.core.numeric import normalize_axis_tuple from numpy.testing import ( assert_warns, suppress_warnings ) @@ -386,8 +387,8 @@ class TestConcatenator: # Tests mr_ on 2D arrays. a_1 = np.random.rand(5, 5) a_2 = np.random.rand(5, 5) - m_1 = np.round_(np.random.rand(5, 5), 0) - m_2 = np.round_(np.random.rand(5, 5), 0) + m_1 = np.round(np.random.rand(5, 5), 0) + m_2 = np.round(np.random.rand(5, 5), 0) b_1 = masked_array(a_1, mask=m_1) b_2 = masked_array(a_2, mask=m_2) # append columns @@ -729,6 +730,47 @@ class TestCompressFunctions: assert_equal(c.mask, [[0, 0, 1], [1, 1, 1], [0, 0, 1]]) c = dot(b, a, strict=False) assert_equal(c, np.dot(b.filled(0), a.filled(0))) + # + a = masked_array(np.arange(8).reshape(2, 2, 2), + mask=[[[1, 0], [0, 0]], [[0, 0], [0, 0]]]) + b = masked_array(np.arange(8).reshape(2, 2, 2), + mask=[[[0, 0], [0, 0]], [[0, 0], [0, 1]]]) + c = dot(a, b, strict=True) + assert_equal(c.mask, + [[[[1, 1], [1, 1]], [[0, 0], [0, 1]]], + [[[0, 0], [0, 1]], [[0, 0], [0, 1]]]]) + c = dot(a, b, strict=False) + assert_equal(c.mask, + [[[[0, 0], [0, 1]], [[0, 0], [0, 0]]], + [[[0, 0], [0, 0]], [[0, 0], [0, 0]]]]) + c = dot(b, a, strict=True) + assert_equal(c.mask, + [[[[1, 0], [0, 0]], [[1, 0], [0, 0]]], + [[[1, 0], [0, 0]], [[1, 1], [1, 1]]]]) + c = dot(b, a, strict=False) + assert_equal(c.mask, + [[[[0, 0], [0, 0]], [[0, 0], [0, 0]]], + [[[0, 0], [0, 0]], [[1, 0], [0, 0]]]]) + # + a = masked_array(np.arange(8).reshape(2, 2, 2), + mask=[[[1, 0], [0, 0]], [[0, 0], [0, 0]]]) + b = 5. + c = dot(a, b, strict=True) + assert_equal(c.mask, [[[1, 0], [0, 0]], [[0, 0], [0, 0]]]) + c = dot(a, b, strict=False) + assert_equal(c.mask, [[[1, 0], [0, 0]], [[0, 0], [0, 0]]]) + c = dot(b, a, strict=True) + assert_equal(c.mask, [[[1, 0], [0, 0]], [[0, 0], [0, 0]]]) + c = dot(b, a, strict=False) + assert_equal(c.mask, [[[1, 0], [0, 0]], [[0, 0], [0, 0]]]) + # + a = masked_array(np.arange(8).reshape(2, 2, 2), + mask=[[[1, 0], [0, 0]], [[0, 0], [0, 0]]]) + b = masked_array(np.arange(2), mask=[0, 1]) + c = dot(a, b, strict=True) + assert_equal(c.mask, [[1, 1], [1, 1]]) + c = dot(a, b, strict=False) + assert_equal(c.mask, [[1, 0], [0, 0]]) def test_dot_returns_maskedarray(self): # See gh-6611 @@ -989,6 +1031,34 @@ class TestMedian: assert_(r is out) assert_(type(r) is MaskedArray) + @pytest.mark.parametrize( + argnames='axis', + argvalues=[ + None, + 1, + (1, ), + (0, 1), + (-3, -1), + ] + ) + def test_keepdims_out(self, axis): + mask = np.zeros((3, 5, 7, 11), dtype=bool) + # Randomly set some elements to True: + w = np.random.random((4, 200)) * np.array(mask.shape)[:, None] + w = w.astype(np.intp) + mask[tuple(w)] = np.nan + d = masked_array(np.ones(mask.shape), mask=mask) + if axis is None: + shape_out = (1,) * d.ndim + else: + axis_norm = normalize_axis_tuple(axis, d.ndim) + shape_out = tuple( + 1 if i in axis_norm else d.shape[i] for i in range(d.ndim)) + out = masked_array(np.empty(shape_out)) + result = median(d, axis=axis, keepdims=True, out=out) + assert result is out + assert_equal(result.shape, shape_out) + def test_single_non_masked_value_on_axis(self): data = [[1., 0.], [0., 3.], diff --git a/numpy/ma/tests/test_old_ma.py b/numpy/ma/tests/test_old_ma.py index 8465b1153..7b892ad23 100644 --- a/numpy/ma/tests/test_old_ma.py +++ b/numpy/ma/tests/test_old_ma.py @@ -194,16 +194,16 @@ class TestMa: assert_(eq(np.sum(x, axis=0), sum(x, axis=0))) assert_(eq(np.sum(filled(xm, 0), axis=0), sum(xm, axis=0))) assert_(eq(np.sum(x, 0), sum(x, 0))) - assert_(eq(np.product(x, axis=0), product(x, axis=0))) - assert_(eq(np.product(x, 0), product(x, 0))) - assert_(eq(np.product(filled(xm, 1), axis=0), + assert_(eq(np.prod(x, axis=0), product(x, axis=0))) + assert_(eq(np.prod(x, 0), product(x, 0))) + assert_(eq(np.prod(filled(xm, 1), axis=0), product(xm, axis=0))) if len(s) > 1: assert_(eq(np.concatenate((x, y), 1), concatenate((xm, ym), 1))) assert_(eq(np.add.reduce(x, 1), add.reduce(x, 1))) assert_(eq(np.sum(x, 1), sum(x, 1))) - assert_(eq(np.product(x, 1), product(x, 1))) + assert_(eq(np.prod(x, 1), product(x, 1))) def test_testCI(self): # Test of conversions and indexing diff --git a/numpy/ma/tests/test_regression.py b/numpy/ma/tests/test_regression.py index cb3d0349f..f4f32cc7a 100644 --- a/numpy/ma/tests/test_regression.py +++ b/numpy/ma/tests/test_regression.py @@ -89,3 +89,9 @@ class TestRegression: def test_masked_array_tobytes_fortran(self): ma = np.ma.arange(4).reshape((2,2)) assert_array_equal(ma.tobytes(order='F'), ma.T.tobytes()) + + def test_structured_array(self): + # see gh-22041 + np.ma.array((1, (b"", b"")), + dtype=[("x", np.int_), + ("y", [("i", np.void), ("j", np.void)])]) diff --git a/numpy/ma/tests/test_subclassing.py b/numpy/ma/tests/test_subclassing.py index 64c66eeb9..e3c885253 100644 --- a/numpy/ma/tests/test_subclassing.py +++ b/numpy/ma/tests/test_subclassing.py @@ -154,6 +154,7 @@ class WrappedArray(NDArrayOperatorsMixin): ufunc deferrals are commutative. See: https://github.com/numpy/numpy/issues/15200) """ + __slots__ = ('_array', 'attrs') __array_priority__ = 20 def __init__(self, array, **attrs): @@ -448,3 +449,12 @@ class TestClassWrapping: assert_(isinstance(np.divide(wm, m2), WrappedArray)) assert_(isinstance(np.divide(m2, wm), WrappedArray)) assert_equal(np.divide(m2, wm), np.divide(wm, m2)) + + def test_mixins_have_slots(self): + mixin = NDArrayOperatorsMixin() + # Should raise an error + assert_raises(AttributeError, mixin.__setattr__, "not_a_real_attr", 1) + + m = np.ma.masked_array([1, 3, 5], mask=[False, True, False]) + wm = WrappedArray(m) + assert_raises(AttributeError, wm.__setattr__, "not_an_attr", 2) diff --git a/numpy/ma/testutils.py b/numpy/ma/testutils.py index 2dd479abe..7a633906b 100644 --- a/numpy/ma/testutils.py +++ b/numpy/ma/testutils.py @@ -233,7 +233,7 @@ def fail_if_array_equal(x, y, err_msg='', verbose=True): """ def compare(x, y): - return (not np.alltrue(approx(x, y))) + return (not np.all(approx(x, y))) assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose, header='Arrays are not equal') |