diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2010-07-07 04:32:00 +0000 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2010-07-07 04:32:00 +0000 |
commit | 8bb282307481e208f972a72c5745c63e2404cd66 (patch) | |
tree | 31d7f1f3ff41423e5d0d90ba20b655072dc414b9 /numpy | |
parent | b1c994b77d851e49a1c62248b09aeaea5645fbdf (diff) | |
download | numpy-8bb282307481e208f972a72c5745c63e2404cd66.tar.gz |
ENH: Add ddof keyword to masked versions of cov and corrcoef.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/function_base.py | 12 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 51 | ||||
-rw-r--r-- | numpy/ma/extras.py | 49 | ||||
-rw-r--r-- | numpy/ma/tests/test_extras.py | 6 |
4 files changed, 83 insertions, 35 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 3f49af5f1..b2ec9bb5a 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1846,8 +1846,8 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): ddof : int, optional .. versionadded:: 1.5 If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is - the number of observations. When defined, ``ddof`` overrides the - value implied by ``bias``. The default value is ``None``. + the number of observations; this overrides the value implied by + ``bias``. The default value is ``None``. Returns ------- @@ -1893,6 +1893,10 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): 11.71 """ + # Check inputs + if ddof is not None and ddof != int(ddof): + raise ValueError("ddof must be integer") + X = array(m, ndmin=2, dtype=float) if X.shape[0] == 1: rowvar = 1 @@ -1961,8 +1965,8 @@ def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None): ddof : {None, int}, optional .. versionadded:: 1.5 If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is - the number of observations. When defined, ``ddof`` overrides the - value implied by ``bias``. The default value is ``None``. + the number of observations; this overrides the value implied by + ``bias``. The default value is ``None``. Returns ------- diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 5d0f8aa45..037e8043a 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -821,30 +821,35 @@ class TestNanFunctsIntTypes(TestCase): class TestCorrCoef(TestCase): + A = array([[ 0.15391142, 0.18045767, 0.14197213], + [ 0.70461506, 0.96474128, 0.27906989], + [ 0.9297531 , 0.32296769, 0.19267156]]) + B = array([[ 0.10377691, 0.5417086 , 0.49807457], + [ 0.82872117, 0.77801674, 0.39226705], + [ 0.9314666 , 0.66800209, 0.03538394]]) + res1 = array([[ 1. , 0.9379533 , -0.04931983], + [ 0.9379533 , 1. , 0.30007991], + [-0.04931983, 0.30007991, 1. ]]) + res2 = array([[ 1. , 0.9379533 , -0.04931983, + 0.30151751, 0.66318558, 0.51532523], + [ 0.9379533 , 1. , 0.30007991, + - 0.04781421, 0.88157256, 0.78052386], + [-0.04931983, 0.30007991, 1. , + - 0.96717111, 0.71483595, 0.83053601], + [ 0.30151751, -0.04781421, -0.96717111, + 1. , -0.51366032, -0.66173113], + [ 0.66318558, 0.88157256, 0.71483595, + - 0.51366032, 1. , 0.98317823], + [ 0.51532523, 0.78052386, 0.83053601, + - 0.66173113, 0.98317823, 1. ]]) + def test_simple(self): - A = array([[ 0.15391142, 0.18045767, 0.14197213], - [ 0.70461506, 0.96474128, 0.27906989], - [ 0.9297531 , 0.32296769, 0.19267156]]) - B = array([[ 0.10377691, 0.5417086 , 0.49807457], - [ 0.82872117, 0.77801674, 0.39226705], - [ 0.9314666 , 0.66800209, 0.03538394]]) - assert_almost_equal(corrcoef(A), - array([[ 1. , 0.9379533 , -0.04931983], - [ 0.9379533 , 1. , 0.30007991], - [-0.04931983, 0.30007991, 1. ]])) - assert_almost_equal(corrcoef(A, B), - array([[ 1. , 0.9379533 , -0.04931983, - 0.30151751, 0.66318558, 0.51532523], - [ 0.9379533 , 1. , 0.30007991, - - 0.04781421, 0.88157256, 0.78052386], - [-0.04931983, 0.30007991, 1. , - - 0.96717111, 0.71483595, 0.83053601], - [ 0.30151751, -0.04781421, -0.96717111, - 1. , -0.51366032, -0.66173113], - [ 0.66318558, 0.88157256, 0.71483595, - - 0.51366032, 1. , 0.98317823], - [ 0.51532523, 0.78052386, 0.83053601, - - 0.66173113, 0.98317823, 1. ]])) + assert_almost_equal(corrcoef(self.A), self.res1) + assert_almost_equal(corrcoef(self.A, self.B), self.res2) + + def test_ddof(self): + assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1) + assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2) class Test_i0(TestCase): diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 094478545..a84e81cb9 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -1299,7 +1299,7 @@ def _covhelper(x, y=None, rowvar=True, allow_masked=True): return (x, xnotmask, rowvar) -def cov(x, y=None, rowvar=True, bias=False, allow_masked=True): +def cov(x, y=None, rowvar=True, bias=False, allow_masked=True, ddof=None): """ Estimate the covariance matrix. @@ -1329,11 +1329,18 @@ def cov(x, y=None, rowvar=True, bias=False, allow_masked=True): bias : bool, optional Default normalization (False) is by ``(N-1)``, where ``N`` is the number of observations given (unbiased estimate). If `bias` is True, - then normalization is by ``N``. + then normalization is by ``N``. This keyword can be overridden by + the keyword ``ddof`` in numpy versions >= 1.5. allow_masked : bool, optional If True, masked values are propagated pair-wise: if a value is masked in `x`, the corresponding value is masked in `y`. If False, raises a `ValueError` exception when some values are missing. + ddof : {None, int}, optional + .. versionadded:: 1.5 + If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is + the number of observations; this overrides the value implied by + ``bias``. The default value is ``None``. + Raises ------ @@ -1345,17 +1352,27 @@ def cov(x, y=None, rowvar=True, bias=False, allow_masked=True): numpy.cov """ + # Check inputs + if ddof is not None and ddof != int(ddof): + raise ValueError("ddof must be an integer") + # Set up ddof + if ddof is None: + if bias: + ddof = 0 + else: + ddof = 1 + (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked) if not rowvar: - fact = np.dot(xnotmask.T, xnotmask) * 1. - (1 - bool(bias)) + fact = np.dot(xnotmask.T, xnotmask) * 1. - ddof result = (dot(x.T, x.conj(), strict=False) / fact).squeeze() else: - fact = np.dot(xnotmask, xnotmask.T) * 1. - (1 - bool(bias)) + fact = np.dot(xnotmask, xnotmask.T) * 1. - ddof result = (dot(x, x.T.conj(), strict=False) / fact).squeeze() return result -def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True): +def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True, ddof=None): """ Return correlation coefficients of the input array. @@ -1379,11 +1396,17 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True): bias : bool, optional Default normalization (False) is by ``(N-1)``, where ``N`` is the number of observations given (unbiased estimate). If `bias` is 1, - then normalization is by ``N``. + then normalization is by ``N``. This keyword can be overridden by + the keyword ``ddof`` in numpy versions >= 1.5. allow_masked : bool, optional If True, masked values are propagated pair-wise: if a value is masked in `x`, the corresponding value is masked in `y`. If False, raises an exception. + ddof : {None, int}, optional + .. versionadded:: 1.5 + If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is + the number of observations; this overrides the value implied by + ``bias``. The default value is ``None``. See Also -------- @@ -1391,14 +1414,24 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True): cov : Estimate the covariance matrix. """ + # Check inputs + if ddof is not None and ddof != int(ddof): + raise ValueError("ddof must be an integer") + # Set up ddof + if ddof is None: + if bias: + ddof = 0 + else: + ddof = 1 + # Get the data (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked) # Compute the covariance matrix if not rowvar: - fact = np.dot(xnotmask.T, xnotmask) * 1. - (1 - bool(bias)) + fact = np.dot(xnotmask.T, xnotmask) * 1. - ddof c = (dot(x.T, x.conj(), strict=False) / fact).squeeze() else: - fact = np.dot(xnotmask, xnotmask.T) * 1. - (1 - bool(bias)) + fact = np.dot(xnotmask, xnotmask.T) * 1. - ddof c = (dot(x, x.T.conj(), strict=False) / fact).squeeze() # Check whether we have a scalar try: diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py index d6cda7e70..359e07446 100644 --- a/numpy/ma/tests/test_extras.py +++ b/numpy/ma/tests/test_extras.py @@ -528,6 +528,12 @@ class TestCorrcoef(TestCase): def setUp(self): self.data = array(np.random.rand(12)) + def test_ddof(self): + "Test ddof keyword" + x = self.data + assert_almost_equal(np.corrcoef(x, ddof=0), corrcoef(x, ddof=0)) + + def test_1d_wo_missing(self): "Test cov on 1D variable w/o missing values" x = self.data |