diff options
author | pierregm <pierregm@localhost> | 2008-03-22 20:03:03 +0000 |
---|---|---|
committer | pierregm <pierregm@localhost> | 2008-03-22 20:03:03 +0000 |
commit | 5cb370e9234582f62231384c26a426301dad3331 (patch) | |
tree | 8d95cfba8219304ec211fb53f0d829468d458624 /numpy/ma | |
parent | a70012ade70ba9912061ad2bac3e2a8196c72823 (diff) | |
download | numpy-5cb370e9234582f62231384c26a426301dad3331.tar.gz |
core : fixed sort when axis is None
mstats : fixed mmedian, renamed to median and moved to numpy.ma.extras for compatibility
extras : introduced median from mstats
mrecords: * fixed __array_finalize__ to keep the mask of a masked array when viewing it as a mrecarray
* simplified _getdata
Diffstat (limited to 'numpy/ma')
-rw-r--r-- | numpy/ma/core.py | 5 | ||||
-rw-r--r-- | numpy/ma/extras.py | 93 | ||||
-rw-r--r-- | numpy/ma/morestats.py | 6 | ||||
-rw-r--r-- | numpy/ma/mrecords.py | 42 | ||||
-rw-r--r-- | numpy/ma/mstats.py | 24 | ||||
-rw-r--r-- | numpy/ma/tests/test_extras.py | 30 | ||||
-rw-r--r-- | numpy/ma/tests/test_mstats.py | 11 |
7 files changed, 166 insertions, 45 deletions
diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 027410295..c38e887ba 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -2856,7 +2856,10 @@ argmin.__doc__ = MaskedArray.argmax.__doc__ def sort(a, axis=-1, kind='quicksort', order=None, endwith=True, fill_value=None): "Function version of the eponymous method." - a = narray(a, copy=False, subok=True) + a = narray(a, copy=True, subok=True) + if axis is None: + a = a.flatten() + axis = 0 if fill_value is None: if endwith: filler = minimum_fill_value(a) diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 6324e3190..ada19ec98 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -11,15 +11,12 @@ __version__ = '1.0' __revision__ = "$Revision: 3473 $" __date__ = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $' -__all__ = [ -'apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', -'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack', -'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked', -'dot', 'hsplit', -'mask_rowcols','mask_rows','mask_cols','masked_all','masked_all_like', -'mediff1d', 'mr_', -'notmasked_edges','notmasked_contiguous', -'stdu', 'varu', +__all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', + 'average', 'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack', + 'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked', + 'dot', 'hsplit', 'mask_rowcols','mask_rows','mask_cols','masked_all', + 'masked_all_like', 'mediff1d', 'median', 'mr_', 'notmasked_edges', + 'notmasked_contiguous', 'stdu', 'varu', ] from itertools import groupby @@ -412,6 +409,84 @@ def average(a, axis=None, weights=None, returned=False): else: return result + + +def median(a, axis=0, out=None, overwrite_input=False): + """Compute the median along the specified axis. + + Returns the median of the array elements. The median is taken + over the first axis of the array by default, otherwise over + the specified axis. + + Parameters + ---------- + a : array-like + Input array or object that can be converted to an array + axis : {int, None}, optional + Axis along which the medians are computed. The default is to + compute the median along the first dimension. axis=None + returns the median of the flattened array + + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output + but the type will be cast if necessary. + + overwrite_input : {False, True}, optional + If True, then allow use of memory of input array (a) for + calculations. The input array will be modified by the call to + median. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. Note that, if overwrite_input is true, and the input + is not already an ndarray, an error will be raised. + + Returns + ------- + median : ndarray. + A new array holding the result is returned unless out is + specified, in which case a reference to out is returned. + Return datatype is float64 for ints and floats smaller than + float64, or the input datatype otherwise. + + See Also + ------- + mean + + Notes + ----- + Given a vector V length N, the median of V is the middle value of + a sorted copy of V (Vs) - i.e. Vs[(N-1)/2], when N is odd. It is + the mean of the two middle values of Vs, when N is even. + + """ + def _median1D(data): + counts = filled(count(data,axis),0) + (idx,rmd) = divmod(counts, 2) + if rmd: + choice = slice(idx,idx+1) + else: + choice = slice(idx-1,idx+1) + return data[choice].mean(0) + # + if overwrite_input: + if axis is None: + sorted = a.ravel() + sorted.sort() + else: + a.sort(axis=axis) + sorted = a + else: + sorted = sort(a, axis=axis) + if axis is None: + result = _median1D(sorted) + else: + result = apply_along_axis(_median1D, axis, sorted) + return result + + + + #.............................................................................. def compress_rowcols(x, axis=None): """Suppress the rows and/or columns of a 2D array that contains diff --git a/numpy/ma/morestats.py b/numpy/ma/morestats.py index b9e77a3c9..9816608b7 100644 --- a/numpy/ma/morestats.py +++ b/numpy/ma/morestats.py @@ -23,8 +23,8 @@ from numpy.core.numeric import concatenate import numpy.ma as MA from numpy.ma.core import masked, nomask, MaskedArray, masked_array -from numpy.ma.extras import apply_along_axis, dot -from numpy.ma.mstats import trim_both, trimmed_stde, mquantiles, mmedian, stde_median +from numpy.ma.extras import apply_along_axis, dot, median +from numpy.ma.mstats import trim_both, trimmed_stde, mquantiles, stde_median from scipy.stats.distributions import norm, beta, t, binom from scipy.stats.morestats import find_repeats @@ -328,7 +328,7 @@ Returns A (p,) array of comparison values. """ - (med_1, med_2) = (mmedian(group_1, axis=axis), mmedian(group_2, axis=axis)) + (med_1, med_2) = (median(group_1, axis=axis), median(group_2, axis=axis)) (std_1, std_2) = (stde_median(group_1, axis=axis), stde_median(group_2, axis=axis)) W = abs(med_1 - med_2) / sqrt(std_1**2 + std_2**2) diff --git a/numpy/ma/mrecords.py b/numpy/ma/mrecords.py index 4c0bdfa76..f60f46c82 100644 --- a/numpy/ma/mrecords.py +++ b/numpy/ma/mrecords.py @@ -163,8 +163,13 @@ class MaskedRecords(MaskedArray, object): _fieldmask = getattr(obj, '_fieldmask', None) if _fieldmask is None: mdescr = [(n,'|b1') for (n,_) in self.dtype.descr] - _fieldmask = numpy.empty(self.shape, dtype=mdescr).view(recarray) - _fieldmask.flat = tuple([False]*len(mdescr)) + _mask = getattr(obj, '_mask', nomask) + if _mask is nomask: + _fieldmask = numpy.empty(self.shape, dtype=mdescr).view(recarray) + _fieldmask.flat = tuple([False]*len(mdescr)) + else: + _fieldmask = narray([tuple([m]*len(mdescr)) for m in _mask], + dtype=mdescr).view(recarray) # Update some of the attributes attrdict = dict(_fieldmask=_fieldmask, _hardmask=getattr(obj,'_hardmask',False), @@ -181,7 +186,7 @@ class MaskedRecords(MaskedArray, object): #...................................................... def _getdata(self): "Returns the data as a recarray." - return self.view(recarray) + return ndarray.view(self,recarray) _data = property(fget=_getdata) #...................................................... def __setmask__(self, mask): @@ -469,7 +474,7 @@ The fieldname base is either `_data` or `_mask`.""" result = narray(self.filled().tolist(), dtype=object) mask = narray(self._fieldmask.tolist()) result[mask] = None - return [tuple(r) for r in result] + return result.tolist() #-------------------------------------------- # Pickling def __getstate__(self): @@ -787,5 +792,30 @@ set to 'fi', where `i` is the number of existing fields. return newdata ############################################################################### - - +# +#if 1: +# from numpy.ma.testutils import assert_equal +# if 1: +# ilist = [1,2,3,4,5] +# flist = [1.1,2.2,3.3,4.4,5.5] +# slist = ['one','two','three','four','five'] +# ddtype = [('a',int_),('b',float_),('c','|S8')] +# mask = [0,1,0,0,1] +# self_base = masked_array(zip(ilist,flist,slist), mask=mask, dtype=ddtype) +# if 1: +# base = self_base.copy() +# mbase = base.view(mrecarray) +# mbase = mbase.copy() +# mbase.fill_value = (999999,1e20,'N/A') +# +# print mbase.a +# +# # Change the data, the mask should be conserved +# mbase.a._data[:] = 5 +# assert_equal(mbase['a']._data, [5,5,5,5,5]) +# assert_equal(mbase['a']._mask, [0,1,0,0,1]) +# # +# z = mbase.a +# print z.tolist() +# z = base[:2] +# print z.view(mrecarray)
\ No newline at end of file diff --git a/numpy/ma/mstats.py b/numpy/ma/mstats.py index cd2c93c78..07974e486 100644 --- a/numpy/ma/mstats.py +++ b/numpy/ma/mstats.py @@ -13,7 +13,7 @@ __date__ = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $' import numpy -from numpy import bool_, float_, int_, \ +from numpy import bool_, float_, int_, ndarray, \ sqrt from numpy import array as narray import numpy.core.numeric as numeric @@ -21,9 +21,9 @@ from numpy.core.numeric import concatenate import numpy.ma from numpy.ma.core import masked, nomask, MaskedArray, masked_array -from numpy.ma.extras import apply_along_axis, dot +from numpy.ma.extras import apply_along_axis, dot, median as mmedian -__all__ = ['cov','meppf','plotting_positions','meppf','mmedian','mquantiles', +__all__ = ['cov','meppf','plotting_positions','meppf','mquantiles', 'stde_median','trim_tail','trim_both','trimmed_mean','trimmed_stde', 'winsorize'] @@ -352,24 +352,6 @@ Parameters meppf = plotting_positions -def mmedian(data, axis=None): - """Returns the median of data along the given axis. - - Missing data are discarded. - - """ - def _median1D(data): - x = numpy.sort(data.compressed()) - if x.size == 0: - return masked - return numpy.median(x) - data = masked_array(data, subok=True, copy=True) - if axis is None: - return _median1D(data) - else: - return apply_along_axis(_median1D, axis, data) - - def cov(x, y=None, rowvar=True, bias=False, strict=False): """Estimates the covariance matrix. diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py index 5a52aeeee..2e1eebb04 100644 --- a/numpy/ma/tests/test_extras.py +++ b/numpy/ma/tests/test_extras.py @@ -324,6 +324,36 @@ class TestApplyAlongAxis(NumpyTestCase): return b[1] xa = apply_along_axis(myfunc,2,a) assert_equal(xa,[[1,4],[7,10]]) + +class TestMedian(NumpyTestCase): + def __init__(self, *args, **kwds): + NumpyTestCase.__init__(self, *args, **kwds) + # + def test_2d(self): + "Tests median w/ 2D" + (n,p) = (101,30) + x = masked_array(numpy.linspace(-1.,1.,n),) + x[:10] = x[-10:] = masked + z = masked_array(numpy.empty((n,p), dtype=numpy.float_)) + z[:,0] = x[:] + idx = numpy.arange(len(x)) + for i in range(1,p): + numpy.random.shuffle(idx) + z[:,i] = x[idx] + assert_equal(median(z[:,0]), 0) + assert_equal(median(z), numpy.zeros((p,))) + # + def test_3d(self): + "Tests median w/ 3D" + x = numpy.ma.arange(24).reshape(3,4,2) + x[x%3==0] = masked + assert_equal(median(x,0), [[12,9],[6,15],[12,9],[18,15]]) + x.shape = (4,3,2) + assert_equal(median(x,0),[[99,10],[11,99],[13,14]]) + x = numpy.ma.arange(24).reshape(4,3,2) + x[x%5==0] = masked + assert_equal(median(x,0), [[12,10],[8,9],[16,17]]) + ############################################################################### #------------------------------------------------------------------------------ diff --git a/numpy/ma/tests/test_mstats.py b/numpy/ma/tests/test_mstats.py index e4657a58f..6fb5732e7 100644 --- a/numpy/ma/tests/test_mstats.py +++ b/numpy/ma/tests/test_mstats.py @@ -19,6 +19,7 @@ import numpy.ma.testutils from numpy.ma.testutils import * from numpy.ma.mstats import * +from numpy.ma import median #.............................................................................. class TestQuantiles(NumpyTestCase): @@ -96,19 +97,19 @@ class TestMedian(NumpyTestCase): for i in range(1,p): numpy.random.shuffle(idx) z[:,i] = x[idx] - assert_equal(mmedian(z[:,0]), 0) - assert_equal(mmedian(z), numpy.zeros((p,))) + assert_equal(median(z[:,0]), 0) + assert_equal(median(z), numpy.zeros((p,))) def test_3d(self): "Tests median w/ 3D" x = numpy.ma.arange(24).reshape(3,4,2) x[x%3==0] = masked - assert_equal(mmedian(x,0), [[12,9],[6,15],[12,9],[18,15]]) + assert_equal(median(x,0), [[12,9],[6,15],[12,9],[18,15]]) x.shape = (4,3,2) - assert_equal(mmedian(x,0),[[99,10],[11,99],[13,14]]) + assert_equal(median(x,0),[[99,10],[11,99],[13,14]]) x = numpy.ma.arange(24).reshape(4,3,2) x[x%5==0] = masked - assert_equal(mmedian(x,0), [[12,10],[8,9],[16,17]]) + assert_equal(median(x,0), [[12,10],[8,9],[16,17]]) #.............................................................................. class TestTrimming(NumpyTestCase): |