summaryrefslogtreecommitdiff
path: root/numpy/ma
diff options
context:
space:
mode:
authorpierregm <pierregm@localhost>2008-03-22 20:03:03 +0000
committerpierregm <pierregm@localhost>2008-03-22 20:03:03 +0000
commit5cb370e9234582f62231384c26a426301dad3331 (patch)
tree8d95cfba8219304ec211fb53f0d829468d458624 /numpy/ma
parenta70012ade70ba9912061ad2bac3e2a8196c72823 (diff)
downloadnumpy-5cb370e9234582f62231384c26a426301dad3331.tar.gz
core : fixed sort when axis is None
mstats : fixed mmedian, renamed to median and moved to numpy.ma.extras for compatibility extras : introduced median from mstats mrecords: * fixed __array_finalize__ to keep the mask of a masked array when viewing it as a mrecarray * simplified _getdata
Diffstat (limited to 'numpy/ma')
-rw-r--r--numpy/ma/core.py5
-rw-r--r--numpy/ma/extras.py93
-rw-r--r--numpy/ma/morestats.py6
-rw-r--r--numpy/ma/mrecords.py42
-rw-r--r--numpy/ma/mstats.py24
-rw-r--r--numpy/ma/tests/test_extras.py30
-rw-r--r--numpy/ma/tests/test_mstats.py11
7 files changed, 166 insertions, 45 deletions
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index 027410295..c38e887ba 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -2856,7 +2856,10 @@ argmin.__doc__ = MaskedArray.argmax.__doc__
def sort(a, axis=-1, kind='quicksort', order=None, endwith=True, fill_value=None):
"Function version of the eponymous method."
- a = narray(a, copy=False, subok=True)
+ a = narray(a, copy=True, subok=True)
+ if axis is None:
+ a = a.flatten()
+ axis = 0
if fill_value is None:
if endwith:
filler = minimum_fill_value(a)
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 6324e3190..ada19ec98 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -11,15 +11,12 @@ __version__ = '1.0'
__revision__ = "$Revision: 3473 $"
__date__ = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'
-__all__ = [
-'apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average',
-'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack',
-'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked',
-'dot', 'hsplit',
-'mask_rowcols','mask_rows','mask_cols','masked_all','masked_all_like',
-'mediff1d', 'mr_',
-'notmasked_edges','notmasked_contiguous',
-'stdu', 'varu',
+__all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d',
+ 'average', 'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack',
+ 'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked',
+ 'dot', 'hsplit', 'mask_rowcols','mask_rows','mask_cols','masked_all',
+ 'masked_all_like', 'mediff1d', 'median', 'mr_', 'notmasked_edges',
+ 'notmasked_contiguous', 'stdu', 'varu',
]
from itertools import groupby
@@ -412,6 +409,84 @@ def average(a, axis=None, weights=None, returned=False):
else:
return result
+
+
+def median(a, axis=0, out=None, overwrite_input=False):
+ """Compute the median along the specified axis.
+
+ Returns the median of the array elements. The median is taken
+ over the first axis of the array by default, otherwise over
+ the specified axis.
+
+ Parameters
+ ----------
+ a : array-like
+ Input array or object that can be converted to an array
+ axis : {int, None}, optional
+ Axis along which the medians are computed. The default is to
+ compute the median along the first dimension. axis=None
+ returns the median of the flattened array
+
+ out : ndarray, optional
+ Alternative output array in which to place the result. It must
+ have the same shape and buffer length as the expected output
+ but the type will be cast if necessary.
+
+ overwrite_input : {False, True}, optional
+ If True, then allow use of memory of input array (a) for
+ calculations. The input array will be modified by the call to
+ median. This will save memory when you do not need to preserve
+ the contents of the input array. Treat the input as undefined,
+ but it will probably be fully or partially sorted. Default is
+ False. Note that, if overwrite_input is true, and the input
+ is not already an ndarray, an error will be raised.
+
+ Returns
+ -------
+ median : ndarray.
+ A new array holding the result is returned unless out is
+ specified, in which case a reference to out is returned.
+ Return datatype is float64 for ints and floats smaller than
+ float64, or the input datatype otherwise.
+
+ See Also
+ -------
+ mean
+
+ Notes
+ -----
+ Given a vector V length N, the median of V is the middle value of
+ a sorted copy of V (Vs) - i.e. Vs[(N-1)/2], when N is odd. It is
+ the mean of the two middle values of Vs, when N is even.
+
+ """
+ def _median1D(data):
+ counts = filled(count(data,axis),0)
+ (idx,rmd) = divmod(counts, 2)
+ if rmd:
+ choice = slice(idx,idx+1)
+ else:
+ choice = slice(idx-1,idx+1)
+ return data[choice].mean(0)
+ #
+ if overwrite_input:
+ if axis is None:
+ sorted = a.ravel()
+ sorted.sort()
+ else:
+ a.sort(axis=axis)
+ sorted = a
+ else:
+ sorted = sort(a, axis=axis)
+ if axis is None:
+ result = _median1D(sorted)
+ else:
+ result = apply_along_axis(_median1D, axis, sorted)
+ return result
+
+
+
+
#..............................................................................
def compress_rowcols(x, axis=None):
"""Suppress the rows and/or columns of a 2D array that contains
diff --git a/numpy/ma/morestats.py b/numpy/ma/morestats.py
index b9e77a3c9..9816608b7 100644
--- a/numpy/ma/morestats.py
+++ b/numpy/ma/morestats.py
@@ -23,8 +23,8 @@ from numpy.core.numeric import concatenate
import numpy.ma as MA
from numpy.ma.core import masked, nomask, MaskedArray, masked_array
-from numpy.ma.extras import apply_along_axis, dot
-from numpy.ma.mstats import trim_both, trimmed_stde, mquantiles, mmedian, stde_median
+from numpy.ma.extras import apply_along_axis, dot, median
+from numpy.ma.mstats import trim_both, trimmed_stde, mquantiles, stde_median
from scipy.stats.distributions import norm, beta, t, binom
from scipy.stats.morestats import find_repeats
@@ -328,7 +328,7 @@ Returns
A (p,) array of comparison values.
"""
- (med_1, med_2) = (mmedian(group_1, axis=axis), mmedian(group_2, axis=axis))
+ (med_1, med_2) = (median(group_1, axis=axis), median(group_2, axis=axis))
(std_1, std_2) = (stde_median(group_1, axis=axis),
stde_median(group_2, axis=axis))
W = abs(med_1 - med_2) / sqrt(std_1**2 + std_2**2)
diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py
index 4c0bdfa76..f60f46c82 100644
--- a/numpy/ma/mrecords.py
+++ b/numpy/ma/mrecords.py
@@ -163,8 +163,13 @@ class MaskedRecords(MaskedArray, object):
_fieldmask = getattr(obj, '_fieldmask', None)
if _fieldmask is None:
mdescr = [(n,'|b1') for (n,_) in self.dtype.descr]
- _fieldmask = numpy.empty(self.shape, dtype=mdescr).view(recarray)
- _fieldmask.flat = tuple([False]*len(mdescr))
+ _mask = getattr(obj, '_mask', nomask)
+ if _mask is nomask:
+ _fieldmask = numpy.empty(self.shape, dtype=mdescr).view(recarray)
+ _fieldmask.flat = tuple([False]*len(mdescr))
+ else:
+ _fieldmask = narray([tuple([m]*len(mdescr)) for m in _mask],
+ dtype=mdescr).view(recarray)
# Update some of the attributes
attrdict = dict(_fieldmask=_fieldmask,
_hardmask=getattr(obj,'_hardmask',False),
@@ -181,7 +186,7 @@ class MaskedRecords(MaskedArray, object):
#......................................................
def _getdata(self):
"Returns the data as a recarray."
- return self.view(recarray)
+ return ndarray.view(self,recarray)
_data = property(fget=_getdata)
#......................................................
def __setmask__(self, mask):
@@ -469,7 +474,7 @@ The fieldname base is either `_data` or `_mask`."""
result = narray(self.filled().tolist(), dtype=object)
mask = narray(self._fieldmask.tolist())
result[mask] = None
- return [tuple(r) for r in result]
+ return result.tolist()
#--------------------------------------------
# Pickling
def __getstate__(self):
@@ -787,5 +792,30 @@ set to 'fi', where `i` is the number of existing fields.
return newdata
###############################################################################
-
-
+#
+#if 1:
+# from numpy.ma.testutils import assert_equal
+# if 1:
+# ilist = [1,2,3,4,5]
+# flist = [1.1,2.2,3.3,4.4,5.5]
+# slist = ['one','two','three','four','five']
+# ddtype = [('a',int_),('b',float_),('c','|S8')]
+# mask = [0,1,0,0,1]
+# self_base = masked_array(zip(ilist,flist,slist), mask=mask, dtype=ddtype)
+# if 1:
+# base = self_base.copy()
+# mbase = base.view(mrecarray)
+# mbase = mbase.copy()
+# mbase.fill_value = (999999,1e20,'N/A')
+#
+# print mbase.a
+#
+# # Change the data, the mask should be conserved
+# mbase.a._data[:] = 5
+# assert_equal(mbase['a']._data, [5,5,5,5,5])
+# assert_equal(mbase['a']._mask, [0,1,0,0,1])
+# #
+# z = mbase.a
+# print z.tolist()
+# z = base[:2]
+# print z.view(mrecarray) \ No newline at end of file
diff --git a/numpy/ma/mstats.py b/numpy/ma/mstats.py
index cd2c93c78..07974e486 100644
--- a/numpy/ma/mstats.py
+++ b/numpy/ma/mstats.py
@@ -13,7 +13,7 @@ __date__ = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'
import numpy
-from numpy import bool_, float_, int_, \
+from numpy import bool_, float_, int_, ndarray, \
sqrt
from numpy import array as narray
import numpy.core.numeric as numeric
@@ -21,9 +21,9 @@ from numpy.core.numeric import concatenate
import numpy.ma
from numpy.ma.core import masked, nomask, MaskedArray, masked_array
-from numpy.ma.extras import apply_along_axis, dot
+from numpy.ma.extras import apply_along_axis, dot, median as mmedian
-__all__ = ['cov','meppf','plotting_positions','meppf','mmedian','mquantiles',
+__all__ = ['cov','meppf','plotting_positions','meppf','mquantiles',
'stde_median','trim_tail','trim_both','trimmed_mean','trimmed_stde',
'winsorize']
@@ -352,24 +352,6 @@ Parameters
meppf = plotting_positions
-def mmedian(data, axis=None):
- """Returns the median of data along the given axis.
-
- Missing data are discarded.
-
- """
- def _median1D(data):
- x = numpy.sort(data.compressed())
- if x.size == 0:
- return masked
- return numpy.median(x)
- data = masked_array(data, subok=True, copy=True)
- if axis is None:
- return _median1D(data)
- else:
- return apply_along_axis(_median1D, axis, data)
-
-
def cov(x, y=None, rowvar=True, bias=False, strict=False):
"""Estimates the covariance matrix.
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index 5a52aeeee..2e1eebb04 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -324,6 +324,36 @@ class TestApplyAlongAxis(NumpyTestCase):
return b[1]
xa = apply_along_axis(myfunc,2,a)
assert_equal(xa,[[1,4],[7,10]])
+
+class TestMedian(NumpyTestCase):
+ def __init__(self, *args, **kwds):
+ NumpyTestCase.__init__(self, *args, **kwds)
+ #
+ def test_2d(self):
+ "Tests median w/ 2D"
+ (n,p) = (101,30)
+ x = masked_array(numpy.linspace(-1.,1.,n),)
+ x[:10] = x[-10:] = masked
+ z = masked_array(numpy.empty((n,p), dtype=numpy.float_))
+ z[:,0] = x[:]
+ idx = numpy.arange(len(x))
+ for i in range(1,p):
+ numpy.random.shuffle(idx)
+ z[:,i] = x[idx]
+ assert_equal(median(z[:,0]), 0)
+ assert_equal(median(z), numpy.zeros((p,)))
+ #
+ def test_3d(self):
+ "Tests median w/ 3D"
+ x = numpy.ma.arange(24).reshape(3,4,2)
+ x[x%3==0] = masked
+ assert_equal(median(x,0), [[12,9],[6,15],[12,9],[18,15]])
+ x.shape = (4,3,2)
+ assert_equal(median(x,0),[[99,10],[11,99],[13,14]])
+ x = numpy.ma.arange(24).reshape(4,3,2)
+ x[x%5==0] = masked
+ assert_equal(median(x,0), [[12,10],[8,9],[16,17]])
+
###############################################################################
#------------------------------------------------------------------------------
diff --git a/numpy/ma/tests/test_mstats.py b/numpy/ma/tests/test_mstats.py
index e4657a58f..6fb5732e7 100644
--- a/numpy/ma/tests/test_mstats.py
+++ b/numpy/ma/tests/test_mstats.py
@@ -19,6 +19,7 @@ import numpy.ma.testutils
from numpy.ma.testutils import *
from numpy.ma.mstats import *
+from numpy.ma import median
#..............................................................................
class TestQuantiles(NumpyTestCase):
@@ -96,19 +97,19 @@ class TestMedian(NumpyTestCase):
for i in range(1,p):
numpy.random.shuffle(idx)
z[:,i] = x[idx]
- assert_equal(mmedian(z[:,0]), 0)
- assert_equal(mmedian(z), numpy.zeros((p,)))
+ assert_equal(median(z[:,0]), 0)
+ assert_equal(median(z), numpy.zeros((p,)))
def test_3d(self):
"Tests median w/ 3D"
x = numpy.ma.arange(24).reshape(3,4,2)
x[x%3==0] = masked
- assert_equal(mmedian(x,0), [[12,9],[6,15],[12,9],[18,15]])
+ assert_equal(median(x,0), [[12,9],[6,15],[12,9],[18,15]])
x.shape = (4,3,2)
- assert_equal(mmedian(x,0),[[99,10],[11,99],[13,14]])
+ assert_equal(median(x,0),[[99,10],[11,99],[13,14]])
x = numpy.ma.arange(24).reshape(4,3,2)
x[x%5==0] = masked
- assert_equal(mmedian(x,0), [[12,10],[8,9],[16,17]])
+ assert_equal(median(x,0), [[12,10],[8,9],[16,17]])
#..............................................................................
class TestTrimming(NumpyTestCase):