core : fixed sort when axis is None

mstats : fixed mmedian, renamed to median and moved to numpy.ma.extras for compatibility extras : introduced median from mstats mrecords: * fixed __array_finalize__ to keep the mask of a masked array when viewing it as a mrecarray * simplified _getdata
author: pierregm <pierregm@localhost> 2008-03-22 20:03:03 +0000
committer: pierregm <pierregm@localhost> 2008-03-22 20:03:03 +0000
commit: 5cb370e9234582f62231384c26a426301dad3331 (patch)
tree: 8d95cfba8219304ec211fb53f0d829468d458624 /numpy/ma
parent: a70012ade70ba9912061ad2bac3e2a8196c72823 (diff)
download: numpy-5cb370e9234582f62231384c26a426301dad3331.tar.gz
7 files changed, 166 insertions, 45 deletions
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 027410295..c38e887ba 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -2856,7 +2856,10 @@ argmin.__doc__ = MaskedArray.argmax.__doc__
 
 def sort(a, axis=-1, kind='quicksort', order=None, endwith=True, fill_value=None):
     "Function version of the eponymous method."
-    a = narray(a, copy=False, subok=True)
+    a = narray(a, copy=True, subok=True)
+    if axis is None:
+        a = a.flatten()
+        axis = 0        
     if fill_value is None:
         if endwith:
             filler = minimum_fill_value(a)
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 6324e3190..ada19ec98 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -11,15 +11,12 @@ __version__ = '1.0'
 __revision__ = "$Revision: 3473 $"
 __date__     = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'
 
-__all__ = [
-'apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average',
-'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack',
-'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked',
-'dot', 'hsplit',
-'mask_rowcols','mask_rows','mask_cols','masked_all','masked_all_like',
-'mediff1d', 'mr_',
-'notmasked_edges','notmasked_contiguous',
-'stdu', 'varu',
+__all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 
+           'average', 'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack',
+           'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked',
+           'dot', 'hsplit', 'mask_rowcols','mask_rows','mask_cols','masked_all',
+           'masked_all_like', 'mediff1d', 'median', 'mr_', 'notmasked_edges',
+           'notmasked_contiguous', 'stdu', 'varu',
            ]
 
 from itertools import groupby
@@ -412,6 +409,84 @@ def average(a, axis=None, weights=None, returned=False):
     else:
         return result
 
+
+
+def median(a, axis=0, out=None, overwrite_input=False):
+    """Compute the median along the specified axis.
+
+    Returns the median of the array elements.  The median is taken
+    over the first axis of the array by default, otherwise over
+    the specified axis.
+
+    Parameters
+    ----------
+    a : array-like
+        Input array or object that can be converted to an array
+    axis : {int, None}, optional
+        Axis along which the medians are computed. The default is to
+        compute the median along the first dimension.  axis=None
+        returns the median of the flattened array
+
+    out : ndarray, optional
+        Alternative output array in which to place the result. It must
+        have the same shape and buffer length as the expected output
+        but the type will be cast if necessary.
+
+    overwrite_input : {False, True}, optional
+       If True, then allow use of memory of input array (a) for
+       calculations. The input array will be modified by the call to
+       median. This will save memory when you do not need to preserve
+       the contents of the input array. Treat the input as undefined,
+       but it will probably be fully or partially sorted. Default is
+       False. Note that, if overwrite_input is true, and the input
+       is not already an ndarray, an error will be raised.
+
+    Returns
+    -------
+    median : ndarray.
+        A new array holding the result is returned unless out is
+        specified, in which case a reference to out is returned.
+        Return datatype is float64 for ints and floats smaller than
+        float64, or the input datatype otherwise.
+
+    See Also
+    -------
+    mean
+
+    Notes
+    -----
+    Given a vector V length N, the median of V is the middle value of
+    a sorted copy of V (Vs) - i.e. Vs[(N-1)/2], when N is odd. It is
+    the mean of the two middle values of Vs, when N is even.
+
+    """
+    def _median1D(data):
+        counts = filled(count(data,axis),0)
+        (idx,rmd) = divmod(counts, 2)
+        if rmd:
+            choice = slice(idx,idx+1)
+        else:
+            choice = slice(idx-1,idx+1)
+        return data[choice].mean(0)
+    # 
+    if overwrite_input:
+        if axis is None:
+            sorted = a.ravel()
+            sorted.sort()
+        else:
+            a.sort(axis=axis)
+            sorted = a
+    else:
+        sorted = sort(a, axis=axis)
+    if axis is None:
+        result = _median1D(sorted)
+    else:
+        result = apply_along_axis(_median1D, axis, sorted)
+    return result
+ 
+
+
+
 #..............................................................................
 def compress_rowcols(x, axis=None):
     """Suppress the rows and/or columns of a 2D array that contains
diff --git a/numpy/ma/morestats.py b/numpy/ma/morestats.py
index b9e77a3c9..9816608b7 100644
--- a/numpy/ma/morestats.py
+++ b/numpy/ma/morestats.py
@@ -23,8 +23,8 @@ from numpy.core.numeric import concatenate
 
 import numpy.ma as MA
 from numpy.ma.core import masked, nomask, MaskedArray, masked_array
-from numpy.ma.extras import apply_along_axis, dot
-from numpy.ma.mstats import trim_both, trimmed_stde, mquantiles, mmedian, stde_median
+from numpy.ma.extras import apply_along_axis, dot, median
+from numpy.ma.mstats import trim_both, trimmed_stde, mquantiles, stde_median
 
 from scipy.stats.distributions import norm, beta, t, binom
 from scipy.stats.morestats import find_repeats
@@ -328,7 +328,7 @@ Returns
     A (p,) array of comparison values.
 
     """
-    (med_1, med_2) = (mmedian(group_1, axis=axis), mmedian(group_2, axis=axis))
+    (med_1, med_2) = (median(group_1, axis=axis), median(group_2, axis=axis))
     (std_1, std_2) = (stde_median(group_1, axis=axis),
                       stde_median(group_2, axis=axis))
     W = abs(med_1 - med_2) / sqrt(std_1**2 + std_2**2)
diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py
index 4c0bdfa76..f60f46c82 100644
--- a/numpy/ma/mrecords.py
+++ b/numpy/ma/mrecords.py
@@ -163,8 +163,13 @@ class MaskedRecords(MaskedArray, object):
         _fieldmask = getattr(obj, '_fieldmask', None)
         if _fieldmask is None:
             mdescr = [(n,'|b1') for (n,_) in self.dtype.descr]
-            _fieldmask = numpy.empty(self.shape, dtype=mdescr).view(recarray)
-            _fieldmask.flat = tuple([False]*len(mdescr))
+            _mask = getattr(obj, '_mask', nomask)
+            if _mask is nomask:
+                _fieldmask = numpy.empty(self.shape, dtype=mdescr).view(recarray)
+                _fieldmask.flat = tuple([False]*len(mdescr))
+            else:
+                _fieldmask = narray([tuple([m]*len(mdescr)) for m in _mask],
+                                    dtype=mdescr).view(recarray)
         # Update some of the attributes
         attrdict = dict(_fieldmask=_fieldmask,
                         _hardmask=getattr(obj,'_hardmask',False),
@@ -181,7 +186,7 @@ class MaskedRecords(MaskedArray, object):
     #......................................................
     def _getdata(self):
         "Returns the data as a recarray."
-        return self.view(recarray)
+        return ndarray.view(self,recarray)
     _data = property(fget=_getdata)
     #......................................................
     def __setmask__(self, mask):
@@ -469,7 +474,7 @@ The fieldname base is either `_data` or `_mask`."""
         result = narray(self.filled().tolist(), dtype=object)
         mask = narray(self._fieldmask.tolist())
         result[mask] = None
-        return [tuple(r) for r in result]
+        return result.tolist()
     #--------------------------------------------
     # Pickling
     def __getstate__(self):
@@ -787,5 +792,30 @@ set to 'fi', where `i` is the number of existing fields.
     return newdata
 
 ###############################################################################
-
-        
+#
+#if 1:
+#    from numpy.ma.testutils import assert_equal
+#    if 1:
+#        ilist = [1,2,3,4,5]
+#        flist = [1.1,2.2,3.3,4.4,5.5]
+#        slist = ['one','two','three','four','five']
+#        ddtype = [('a',int_),('b',float_),('c','|S8')]
+#        mask = [0,1,0,0,1]
+#        self_base = masked_array(zip(ilist,flist,slist), mask=mask, dtype=ddtype)
+#    if 1:
+#        base = self_base.copy()
+#        mbase = base.view(mrecarray)
+#        mbase = mbase.copy()
+#        mbase.fill_value = (999999,1e20,'N/A')
+#        
+#        print mbase.a
+#        
+#        # Change the data, the mask should be conserved
+#        mbase.a._data[:] = 5
+#        assert_equal(mbase['a']._data, [5,5,5,5,5])
+#        assert_equal(mbase['a']._mask, [0,1,0,0,1])
+#        #
+#        z = mbase.a
+#        print z.tolist()
+#        z = base[:2]
+#        print z.view(mrecarray)
+\ No newline at end of file
diff --git a/numpy/ma/mstats.py b/numpy/ma/mstats.py
index cd2c93c78..07974e486 100644
--- a/numpy/ma/mstats.py
+++ b/numpy/ma/mstats.py
@@ -13,7 +13,7 @@ __date__     = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'
 
 
 import numpy
-from numpy import bool_, float_, int_, \
+from numpy import bool_, float_, int_, ndarray, \
     sqrt
 from numpy import array as narray
 import numpy.core.numeric as numeric
@@ -21,9 +21,9 @@ from numpy.core.numeric import concatenate
 
 import numpy.ma
 from numpy.ma.core import masked, nomask, MaskedArray, masked_array
-from numpy.ma.extras import apply_along_axis, dot
+from numpy.ma.extras import apply_along_axis, dot, median as mmedian
 
-__all__ = ['cov','meppf','plotting_positions','meppf','mmedian','mquantiles',
+__all__ = ['cov','meppf','plotting_positions','meppf','mquantiles',
            'stde_median','trim_tail','trim_both','trimmed_mean','trimmed_stde',
            'winsorize']
 
@@ -352,24 +352,6 @@ Parameters
 meppf = plotting_positions
 
 
-def mmedian(data, axis=None):
-    """Returns the median of data along the given axis. 
-
-    Missing data are discarded.
-
-    """
-    def _median1D(data):
-        x = numpy.sort(data.compressed())
-        if x.size == 0:
-            return masked
-        return numpy.median(x)
-    data = masked_array(data, subok=True, copy=True)
-    if axis is None:
-        return _median1D(data)
-    else:
-        return apply_along_axis(_median1D, axis, data)
-
-
 def cov(x, y=None, rowvar=True, bias=False, strict=False):
     """Estimates the covariance matrix.
 
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index 5a52aeeee..2e1eebb04 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -324,6 +324,36 @@ class TestApplyAlongAxis(NumpyTestCase):
             return b[1]
         xa = apply_along_axis(myfunc,2,a)
         assert_equal(xa,[[1,4],[7,10]])
+        
+class TestMedian(NumpyTestCase):
+    def __init__(self, *args, **kwds):
+        NumpyTestCase.__init__(self, *args, **kwds)
+    #
+    def test_2d(self):
+        "Tests median w/ 2D"
+        (n,p) = (101,30)
+        x = masked_array(numpy.linspace(-1.,1.,n),)
+        x[:10] = x[-10:] = masked       
+        z = masked_array(numpy.empty((n,p), dtype=numpy.float_))
+        z[:,0] = x[:]
+        idx = numpy.arange(len(x))
+        for i in range(1,p):
+            numpy.random.shuffle(idx)
+            z[:,i] = x[idx]
+        assert_equal(median(z[:,0]), 0)
+        assert_equal(median(z), numpy.zeros((p,)))
+    #
+    def test_3d(self):
+        "Tests median w/ 3D"
+        x = numpy.ma.arange(24).reshape(3,4,2)
+        x[x%3==0] = masked
+        assert_equal(median(x,0), [[12,9],[6,15],[12,9],[18,15]])
+        x.shape = (4,3,2)
+        assert_equal(median(x,0),[[99,10],[11,99],[13,14]])
+        x = numpy.ma.arange(24).reshape(4,3,2)
+        x[x%5==0] = masked
+        assert_equal(median(x,0), [[12,10],[8,9],[16,17]])        
+        
 
 ###############################################################################
 #------------------------------------------------------------------------------
diff --git a/numpy/ma/tests/test_mstats.py b/numpy/ma/tests/test_mstats.py
index e4657a58f..6fb5732e7 100644
--- a/numpy/ma/tests/test_mstats.py
+++ b/numpy/ma/tests/test_mstats.py
@@ -19,6 +19,7 @@ import numpy.ma.testutils
 from numpy.ma.testutils import *
 
 from numpy.ma.mstats import *
+from numpy.ma import median
 
 #..............................................................................
 class TestQuantiles(NumpyTestCase):
@@ -96,19 +97,19 @@ class TestMedian(NumpyTestCase):
         for i in range(1,p):
             numpy.random.shuffle(idx)
             z[:,i] = x[idx]
-        assert_equal(mmedian(z[:,0]), 0)
-        assert_equal(mmedian(z), numpy.zeros((p,)))
+        assert_equal(median(z[:,0]), 0)
+        assert_equal(median(z), numpy.zeros((p,)))
 
     def test_3d(self):
         "Tests median w/ 3D"
         x = numpy.ma.arange(24).reshape(3,4,2)
         x[x%3==0] = masked
-        assert_equal(mmedian(x,0), [[12,9],[6,15],[12,9],[18,15]])
+        assert_equal(median(x,0), [[12,9],[6,15],[12,9],[18,15]])
         x.shape = (4,3,2)
-        assert_equal(mmedian(x,0),[[99,10],[11,99],[13,14]])
+        assert_equal(median(x,0),[[99,10],[11,99],[13,14]])
         x = numpy.ma.arange(24).reshape(4,3,2)
         x[x%5==0] = masked
-        assert_equal(mmedian(x,0), [[12,10],[8,9],[16,17]])
+        assert_equal(median(x,0), [[12,10],[8,9],[16,17]])
 
 #..............................................................................
 class TestTrimming(NumpyTestCase):
author	pierregm <pierregm@localhost>	2008-03-22 20:03:03 +0000
committer	pierregm <pierregm@localhost>	2008-03-22 20:03:03 +0000
commit	5cb370e9234582f62231384c26a426301dad3331 (patch)
tree	8d95cfba8219304ec211fb53f0d829468d458624 /numpy/ma
parent	a70012ade70ba9912061ad2bac3e2a8196c72823 (diff)
download	numpy-5cb370e9234582f62231384c26a426301dad3331.tar.gz