ENH: added functionality nanmedian to numpy

Implemented a nanmedian and associated tests as an extension of np.median to complement the other nanfunctions Added negative values to the unit tests Cleaned up documentation of nanmedian
author: David Freese <dfreese@stanford.edu> 2014-02-16 08:51:16 -0800
committer: David Freese <dfreese@stanford.edu> 2014-05-02 08:57:27 -0700
commit: beec75be6f96a5c0fc9496b587e68eb03bb4a6ba (patch)
tree: a5bfadd37ec0ffdb9249d18d21179d25e8c9ec32 /numpy
parent: a0cf18394d5ce33514fdc37093bd2f65ad4b0dde (diff)
download: numpy-beec75be6f96a5c0fc9496b587e68eb03bb4a6ba.tar.gz
2 files changed, 238 insertions, 6 deletions
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index badba32da..818e130a8 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -17,12 +17,14 @@ Functions
 from __future__ import division, absolute_import, print_function
 
 import warnings
+import operator
 import numpy as np
-
+from numpy.core.fromnumeric import partition
+from numpy.lib.function_base import _ureduce as _ureduce
 
 __all__ = [
     'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
-    'nanvar', 'nanstd'
+    'nanmedian', 'nanvar', 'nanstd'
     ]
 
 
@@ -601,6 +603,146 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):
     return avg
 
 
+def _nanmedian1d(arr1d, overwrite_input=False): # This only works on 1d arrays
+    """
+    Private function for rank 1 arrays. Compute the median ignoring NaNs.
+    See nanmedian for parameter usage
+    
+    """
+    c = np.isnan(arr1d)
+    s = np.where(c)[0]
+    if s.size == arr1d.size:
+        warnings.warn("All-NaN slice encountered", RuntimeWarning)
+        return np.nan
+    elif s.size == 0:
+        return np.median(arr1d, overwrite_input=overwrite_input)
+    else:
+        if overwrite_input: 
+            x = arr1d
+        else: 
+            x = arr1d.copy() 
+        # select non-nans at end of array
+        enonan = arr1d[-s.size:][~c[-s.size:]]
+        # fill nans in beginning of array with non-nans of end
+        x[s[:enonan.size]] = enonan
+        # slice nans away
+        return np.median(x[:-s.size], overwrite_input=True)
+
+
+def _nanmedian(a, axis=None, out=None, overwrite_input=False):
+    """
+    Private function that doesn't support extended axis or keepdims.
+    These methods are extended to this function using _ureduce
+    See nanmedian for parameter usage
+
+    """
+    if axis is None:
+        part = a.ravel()
+        if out is None:
+            return _nanmedian1d(part, overwrite_input)
+        else:
+            out[:] = _nanmedian1d(part, overwrite_input)
+            return out
+    else:
+        result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
+        if out is not None:
+            out[:] = result
+        return result
+
+
+def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
+    """
+    Compute the median along the specified axis, while ignoring NaNs.
+
+    Returns the median of the array elements.
+
+    .. versionadded:: 1.9.0
+
+    Parameters
+    ----------
+    a : array_like
+        Input array or object that can be converted to an array.
+    axis : int, optional
+        Axis along which the medians are computed. The default (axis=None)
+        is to compute the median along a flattened version of the array.
+        A sequence of axes is supported since version 1.9.0.
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must have
+        the same shape and buffer length as the expected output, but the
+        type (of the output) will be cast if necessary.
+    overwrite_input : bool, optional
+       If True, then allow use of memory of input array (a) for
+       calculations. The input array will be modified by the call to
+       median. This will save memory when you do not need to preserve
+       the contents of the input array. Treat the input as undefined,
+       but it will probably be fully or partially sorted. Default is
+       False. Note that, if `overwrite_input` is True and the input
+       is not already an ndarray, an error will be raised.
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the original `arr`.
+
+
+
+    Returns
+    -------
+    median : ndarray
+        A new array holding the result. If the input contains integers, or 
+        floats of smaller precision than 64, then the output data-type is 
+        float64.  Otherwise, the output data-type is the same as that of the 
+        input.
+
+    See Also
+    --------
+    mean, median, percentile
+
+    Notes
+    -----
+    Given a vector V of length N, the median of V is the middle value of
+    a sorted copy of V, ``V_sorted`` - i.e., ``V_sorted[(N-1)/2]``, when N is
+    odd.  When N is even, it is the average of the two middle values of
+    ``V_sorted``.
+
+    Examples
+    --------
+    >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
+    >>> a[0, 1] = np.nan
+    >>> a
+    array([[ 10.,  nan,   4.],
+       [  3.,   2.,   1.]])
+    >>> np.median(a)
+    nan
+    >>> np.nanmedian(a)
+    3.0
+    >>> np.nanmedian(a, axis=0)
+    array([ 6.5,  2.,  2.5])
+    >>> np.median(a, axis=1)
+    array([ 7.,  2.])
+    >>> b = a.copy()
+    >>> np.nanmedian(b, axis=1, overwrite_input=True)
+    array([ 7.,  2.])
+    >>> assert not np.all(a==b)
+    >>> b = a.copy()
+    >>> np.nanmedian(b, axis=None, overwrite_input=True)
+    3.0
+    >>> assert not np.all(a==b)
+
+    """
+    a = np.asanyarray(a)
+    # apply_along_axis in _nanmedian doesn't handle empty arrays well,
+    # so deal them upfront
+    if 0 in a.shape:
+        return np.nanmean(a, axis, out=out, keepdims=keepdims)
+
+    r, k = _ureduce(a, func=_nanmedian, axis=axis, out=out,
+                    overwrite_input=overwrite_input)
+    if keepdims:
+        return r.reshape(k)
+    else:
+        return r
+
+
 def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
     """
     Compute the variance along the specified axis, while ignoring NaNs.
diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py
index f00aa0165..74a50edf4 100644
--- a/numpy/lib/tests/test_nanfunctions.py
+++ b/numpy/lib/tests/test_nanfunctions.py
@@ -11,15 +11,15 @@ from numpy.testing import (
 
 # Test data
 _ndat = np.array([[0.6244, np.nan, 0.2692,  0.0116, np.nan, 0.1170],
-                  [0.5351, 0.9403, np.nan,  0.2100, 0.4759, 0.2833],
-                  [np.nan, np.nan, np.nan,  0.1042, np.nan, 0.5954],
+                  [0.5351, -0.9403, np.nan,  0.2100, 0.4759, 0.2833],
+                  [np.nan, np.nan, np.nan,  0.1042, np.nan, -0.5954],
                   [0.1610, np.nan, np.nan,  0.1859, 0.3146, np.nan]])
 
 
 # Rows of _ndat with nans removed
 _rdat = [np.array([ 0.6244, 0.2692, 0.0116, 0.1170]),
-         np.array([ 0.5351, 0.9403, 0.2100, 0.4759, 0.2833]),
-         np.array([ 0.1042, 0.5954]),
+         np.array([ 0.5351, -0.9403, 0.2100, 0.4759, 0.2833]),
+         np.array([ 0.1042, -0.5954]),
          np.array([ 0.1610, 0.1859, 0.3146])]
 
 
@@ -527,5 +527,95 @@ class TestNanFunctions_MeanVarStd(TestCase):
             assert_(np.isscalar(res))
 
 
+class TestNanFunctions_Median(TestCase):
+
+    def test_mutation(self):
+        # Check that passed array is not modified.
+        ndat = _ndat.copy()
+        np.nanmedian(ndat)
+        assert_equal(ndat, _ndat)
+
+    def test_keepdims(self):
+        mat = np.eye(3)
+        for axis in [None, 0, 1]:
+            tgt = np.median(mat, axis=axis, out=None, overwrite_input=False)
+            res = np.nanmedian(mat, axis=axis, out=None, overwrite_input=False)
+            assert_(res.ndim == tgt.ndim)
+
+    def test_out(self):
+        mat = np.random.rand(3,3)
+        resout = np.zeros(3)
+        tgt = np.median(mat, axis=1)
+        res = np.nanmedian(mat, axis=1, out=resout)
+        assert_almost_equal(res, resout)
+        assert_almost_equal(res, tgt)
+
+    def test_result_values(self):
+            tgt = [np.median(d) for d in _rdat]
+            res = np.nanmedian(_ndat, axis=1)
+            assert_almost_equal(res, tgt)
+
+    def test_allnans(self):
+        mat = np.array([np.nan]*9).reshape(3, 3)
+        for axis in [None, 0, 1]:
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter('always')
+                assert_(np.isnan(np.nanmedian(mat, axis=axis)).all())
+                if axis is None:
+                    assert_(len(w) == 1)
+                else:
+                    assert_(len(w) == 3)
+                assert_(issubclass(w[0].category, RuntimeWarning))
+                # Check scalar
+                assert_(np.isnan(np.nanmedian(np.nan)))
+                if axis is None:
+                    assert_(len(w) == 2)
+                else:
+                    assert_(len(w) == 4)
+                assert_(issubclass(w[0].category, RuntimeWarning))
+
+    def test_empty(self):
+        mat = np.zeros((0, 3))
+        for axis in [0, None]:
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter('always')
+                assert_(np.isnan(np.nanmedian(mat, axis=axis)).all())
+                assert_(len(w) == 1)
+                assert_(issubclass(w[0].category, RuntimeWarning))
+        for axis in [1]:
+            with warnings.catch_warnings(record=True) as w:
+                warnings.simplefilter('always')
+                assert_equal(np.nanmedian(mat, axis=axis), np.zeros([]))
+                assert_(len(w) == 0)
+
+    def test_scalar(self):
+        assert_(np.nanmedian(0.) == 0.)
+
+    def test_extended_axis_invalid(self):
+        d = np.ones((3, 5, 7, 11))
+        assert_raises(IndexError, np.nanmedian, d, axis=-5)
+        assert_raises(IndexError, np.nanmedian, d, axis=(0, -5))
+        assert_raises(IndexError, np.nanmedian, d, axis=4)
+        assert_raises(IndexError, np.nanmedian, d, axis=(0, 4))
+        assert_raises(ValueError, np.nanmedian, d, axis=(1, 1))
+
+    def test_keepdims(self):
+        d = np.ones((3, 5, 7, 11))
+        assert_equal(np.nanmedian(d, axis=None, keepdims=True).shape,
+                     (1, 1, 1, 1))
+        assert_equal(np.nanmedian(d, axis=(0, 1), keepdims=True).shape,
+                     (1, 1, 7, 11))
+        assert_equal(np.nanmedian(d, axis=(0, 3), keepdims=True).shape,
+                     (1, 5, 7, 1))
+        assert_equal(np.nanmedian(d, axis=(1,), keepdims=True).shape,
+                     (3, 1, 7, 11))
+        assert_equal(np.nanmedian(d, axis=(0, 1, 2, 3), keepdims=True).shape,
+                     (1, 1, 1, 1))
+        assert_equal(np.nanmedian(d, axis=(0, 1, 3), keepdims=True).shape,
+                     (1, 1, 7, 1))
+
+
+
+
 if __name__ == "__main__":
     run_module_suite()
author	David Freese <dfreese@stanford.edu>	2014-02-16 08:51:16 -0800
committer	David Freese <dfreese@stanford.edu>	2014-05-02 08:57:27 -0700
commit	beec75be6f96a5c0fc9496b587e68eb03bb4a6ba (patch)
tree	a5bfadd37ec0ffdb9249d18d21179d25e8c9ec32 /numpy
parent	a0cf18394d5ce33514fdc37093bd2f65ad4b0dde (diff)
download	numpy-beec75be6f96a5c0fc9496b587e68eb03bb4a6ba.tar.gz