diff options
author | David Freese <dfreese@stanford.edu> | 2014-02-16 08:51:16 -0800 |
---|---|---|
committer | David Freese <dfreese@stanford.edu> | 2014-05-02 08:57:27 -0700 |
commit | beec75be6f96a5c0fc9496b587e68eb03bb4a6ba (patch) | |
tree | a5bfadd37ec0ffdb9249d18d21179d25e8c9ec32 /numpy | |
parent | a0cf18394d5ce33514fdc37093bd2f65ad4b0dde (diff) | |
download | numpy-beec75be6f96a5c0fc9496b587e68eb03bb4a6ba.tar.gz |
ENH: added functionality nanmedian to numpy
Implemented a nanmedian and associated tests as an
extension of np.median to complement the other
nanfunctions
Added negative values to the unit tests
Cleaned up documentation of nanmedian
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/nanfunctions.py | 146 | ||||
-rw-r--r-- | numpy/lib/tests/test_nanfunctions.py | 98 |
2 files changed, 238 insertions, 6 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index badba32da..818e130a8 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -17,12 +17,14 @@ Functions from __future__ import division, absolute_import, print_function import warnings +import operator import numpy as np - +from numpy.core.fromnumeric import partition +from numpy.lib.function_base import _ureduce as _ureduce __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', - 'nanvar', 'nanstd' + 'nanmedian', 'nanvar', 'nanstd' ] @@ -601,6 +603,146 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): return avg +def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays + """ + Private function for rank 1 arrays. Compute the median ignoring NaNs. + See nanmedian for parameter usage + + """ + c = np.isnan(arr1d) + s = np.where(c)[0] + if s.size == arr1d.size: + warnings.warn("All-NaN slice encountered", RuntimeWarning) + return np.nan + elif s.size == 0: + return np.median(arr1d, overwrite_input=overwrite_input) + else: + if overwrite_input: + x = arr1d + else: + x = arr1d.copy() + # select non-nans at end of array + enonan = arr1d[-s.size:][~c[-s.size:]] + # fill nans in beginning of array with non-nans of end + x[s[:enonan.size]] = enonan + # slice nans away + return np.median(x[:-s.size], overwrite_input=True) + + +def _nanmedian(a, axis=None, out=None, overwrite_input=False): + """ + Private function that doesn't support extended axis or keepdims. + These methods are extended to this function using _ureduce + See nanmedian for parameter usage + + """ + if axis is None: + part = a.ravel() + if out is None: + return _nanmedian1d(part, overwrite_input) + else: + out[:] = _nanmedian1d(part, overwrite_input) + return out + else: + result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) + if out is not None: + out[:] = result + return result + + +def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False): + """ + Compute the median along the specified axis, while ignoring NaNs. + + Returns the median of the array elements. + + .. versionadded:: 1.9.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : int, optional + Axis along which the medians are computed. The default (axis=None) + is to compute the median along a flattened version of the array. + A sequence of axes is supported since version 1.9.0. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape and buffer length as the expected output, but the + type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if `overwrite_input` is True and the input + is not already an ndarray, an error will be raised. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + + + Returns + ------- + median : ndarray + A new array holding the result. If the input contains integers, or + floats of smaller precision than 64, then the output data-type is + float64. Otherwise, the output data-type is the same as that of the + input. + + See Also + -------- + mean, median, percentile + + Notes + ----- + Given a vector V of length N, the median of V is the middle value of + a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is + odd. When N is even, it is the average of the two middle values of + ``V_sorted``. + + Examples + -------- + >>> a = np.array([[10.0, 7, 4], [3, 2, 1]]) + >>> a[0, 1] = np.nan + >>> a + array([[ 10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.median(a) + nan + >>> np.nanmedian(a) + 3.0 + >>> np.nanmedian(a, axis=0) + array([ 6.5, 2., 2.5]) + >>> np.median(a, axis=1) + array([ 7., 2.]) + >>> b = a.copy() + >>> np.nanmedian(b, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + >>> b = a.copy() + >>> np.nanmedian(b, axis=None, overwrite_input=True) + 3.0 + >>> assert not np.all(a==b) + + """ + a = np.asanyarray(a) + # apply_along_axis in _nanmedian doesn't handle empty arrays well, + # so deal them upfront + if 0 in a.shape: + return np.nanmean(a, axis, out=out, keepdims=keepdims) + + r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out, + overwrite_input=overwrite_input) + if keepdims: + return r.reshape(k) + else: + return r + + def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the variance along the specified axis, while ignoring NaNs. diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index f00aa0165..74a50edf4 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -11,15 +11,15 @@ from numpy.testing import ( # Test data _ndat = np.array([[0.6244, np.nan, 0.2692, 0.0116, np.nan, 0.1170], - [0.5351, 0.9403, np.nan, 0.2100, 0.4759, 0.2833], - [np.nan, np.nan, np.nan, 0.1042, np.nan, 0.5954], + [0.5351, -0.9403, np.nan, 0.2100, 0.4759, 0.2833], + [np.nan, np.nan, np.nan, 0.1042, np.nan, -0.5954], [0.1610, np.nan, np.nan, 0.1859, 0.3146, np.nan]]) # Rows of _ndat with nans removed _rdat = [np.array([ 0.6244, 0.2692, 0.0116, 0.1170]), - np.array([ 0.5351, 0.9403, 0.2100, 0.4759, 0.2833]), - np.array([ 0.1042, 0.5954]), + np.array([ 0.5351, -0.9403, 0.2100, 0.4759, 0.2833]), + np.array([ 0.1042, -0.5954]), np.array([ 0.1610, 0.1859, 0.3146])] @@ -527,5 +527,95 @@ class TestNanFunctions_MeanVarStd(TestCase): assert_(np.isscalar(res)) +class TestNanFunctions_Median(TestCase): + + def test_mutation(self): + # Check that passed array is not modified. + ndat = _ndat.copy() + np.nanmedian(ndat) + assert_equal(ndat, _ndat) + + def test_keepdims(self): + mat = np.eye(3) + for axis in [None, 0, 1]: + tgt = np.median(mat, axis=axis, out=None, overwrite_input=False) + res = np.nanmedian(mat, axis=axis, out=None, overwrite_input=False) + assert_(res.ndim == tgt.ndim) + + def test_out(self): + mat = np.random.rand(3,3) + resout = np.zeros(3) + tgt = np.median(mat, axis=1) + res = np.nanmedian(mat, axis=1, out=resout) + assert_almost_equal(res, resout) + assert_almost_equal(res, tgt) + + def test_result_values(self): + tgt = [np.median(d) for d in _rdat] + res = np.nanmedian(_ndat, axis=1) + assert_almost_equal(res, tgt) + + def test_allnans(self): + mat = np.array([np.nan]*9).reshape(3, 3) + for axis in [None, 0, 1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(np.nanmedian(mat, axis=axis)).all()) + if axis is None: + assert_(len(w) == 1) + else: + assert_(len(w) == 3) + assert_(issubclass(w[0].category, RuntimeWarning)) + # Check scalar + assert_(np.isnan(np.nanmedian(np.nan))) + if axis is None: + assert_(len(w) == 2) + else: + assert_(len(w) == 4) + assert_(issubclass(w[0].category, RuntimeWarning)) + + def test_empty(self): + mat = np.zeros((0, 3)) + for axis in [0, None]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_(np.isnan(np.nanmedian(mat, axis=axis)).all()) + assert_(len(w) == 1) + assert_(issubclass(w[0].category, RuntimeWarning)) + for axis in [1]: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + assert_equal(np.nanmedian(mat, axis=axis), np.zeros([])) + assert_(len(w) == 0) + + def test_scalar(self): + assert_(np.nanmedian(0.) == 0.) + + def test_extended_axis_invalid(self): + d = np.ones((3, 5, 7, 11)) + assert_raises(IndexError, np.nanmedian, d, axis=-5) + assert_raises(IndexError, np.nanmedian, d, axis=(0, -5)) + assert_raises(IndexError, np.nanmedian, d, axis=4) + assert_raises(IndexError, np.nanmedian, d, axis=(0, 4)) + assert_raises(ValueError, np.nanmedian, d, axis=(1, 1)) + + def test_keepdims(self): + d = np.ones((3, 5, 7, 11)) + assert_equal(np.nanmedian(d, axis=None, keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.nanmedian(d, axis=(0, 1), keepdims=True).shape, + (1, 1, 7, 11)) + assert_equal(np.nanmedian(d, axis=(0, 3), keepdims=True).shape, + (1, 5, 7, 1)) + assert_equal(np.nanmedian(d, axis=(1,), keepdims=True).shape, + (3, 1, 7, 11)) + assert_equal(np.nanmedian(d, axis=(0, 1, 2, 3), keepdims=True).shape, + (1, 1, 1, 1)) + assert_equal(np.nanmedian(d, axis=(0, 1, 3), keepdims=True).shape, + (1, 1, 7, 1)) + + + + if __name__ == "__main__": run_module_suite() |