summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2010-07-07 04:32:00 +0000
committerCharles Harris <charlesr.harris@gmail.com>2010-07-07 04:32:00 +0000
commit8bb282307481e208f972a72c5745c63e2404cd66 (patch)
tree31d7f1f3ff41423e5d0d90ba20b655072dc414b9 /numpy
parentb1c994b77d851e49a1c62248b09aeaea5645fbdf (diff)
downloadnumpy-8bb282307481e208f972a72c5745c63e2404cd66.tar.gz
ENH: Add ddof keyword to masked versions of cov and corrcoef.
Diffstat (limited to 'numpy')
-rw-r--r--numpy/lib/function_base.py12
-rw-r--r--numpy/lib/tests/test_function_base.py51
-rw-r--r--numpy/ma/extras.py49
-rw-r--r--numpy/ma/tests/test_extras.py6
4 files changed, 83 insertions, 35 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 3f49af5f1..b2ec9bb5a 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1846,8 +1846,8 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None):
ddof : int, optional
.. versionadded:: 1.5
If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
- the number of observations. When defined, ``ddof`` overrides the
- value implied by ``bias``. The default value is ``None``.
+ the number of observations; this overrides the value implied by
+ ``bias``. The default value is ``None``.
Returns
-------
@@ -1893,6 +1893,10 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None):
11.71
"""
+ # Check inputs
+ if ddof is not None and ddof != int(ddof):
+ raise ValueError("ddof must be integer")
+
X = array(m, ndmin=2, dtype=float)
if X.shape[0] == 1:
rowvar = 1
@@ -1961,8 +1965,8 @@ def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None):
ddof : {None, int}, optional
.. versionadded:: 1.5
If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
- the number of observations. When defined, ``ddof`` overrides the
- value implied by ``bias``. The default value is ``None``.
+ the number of observations; this overrides the value implied by
+ ``bias``. The default value is ``None``.
Returns
-------
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 5d0f8aa45..037e8043a 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -821,30 +821,35 @@ class TestNanFunctsIntTypes(TestCase):
class TestCorrCoef(TestCase):
+ A = array([[ 0.15391142, 0.18045767, 0.14197213],
+ [ 0.70461506, 0.96474128, 0.27906989],
+ [ 0.9297531 , 0.32296769, 0.19267156]])
+ B = array([[ 0.10377691, 0.5417086 , 0.49807457],
+ [ 0.82872117, 0.77801674, 0.39226705],
+ [ 0.9314666 , 0.66800209, 0.03538394]])
+ res1 = array([[ 1. , 0.9379533 , -0.04931983],
+ [ 0.9379533 , 1. , 0.30007991],
+ [-0.04931983, 0.30007991, 1. ]])
+ res2 = array([[ 1. , 0.9379533 , -0.04931983,
+ 0.30151751, 0.66318558, 0.51532523],
+ [ 0.9379533 , 1. , 0.30007991,
+ - 0.04781421, 0.88157256, 0.78052386],
+ [-0.04931983, 0.30007991, 1. ,
+ - 0.96717111, 0.71483595, 0.83053601],
+ [ 0.30151751, -0.04781421, -0.96717111,
+ 1. , -0.51366032, -0.66173113],
+ [ 0.66318558, 0.88157256, 0.71483595,
+ - 0.51366032, 1. , 0.98317823],
+ [ 0.51532523, 0.78052386, 0.83053601,
+ - 0.66173113, 0.98317823, 1. ]])
+
def test_simple(self):
- A = array([[ 0.15391142, 0.18045767, 0.14197213],
- [ 0.70461506, 0.96474128, 0.27906989],
- [ 0.9297531 , 0.32296769, 0.19267156]])
- B = array([[ 0.10377691, 0.5417086 , 0.49807457],
- [ 0.82872117, 0.77801674, 0.39226705],
- [ 0.9314666 , 0.66800209, 0.03538394]])
- assert_almost_equal(corrcoef(A),
- array([[ 1. , 0.9379533 , -0.04931983],
- [ 0.9379533 , 1. , 0.30007991],
- [-0.04931983, 0.30007991, 1. ]]))
- assert_almost_equal(corrcoef(A, B),
- array([[ 1. , 0.9379533 , -0.04931983,
- 0.30151751, 0.66318558, 0.51532523],
- [ 0.9379533 , 1. , 0.30007991,
- - 0.04781421, 0.88157256, 0.78052386],
- [-0.04931983, 0.30007991, 1. ,
- - 0.96717111, 0.71483595, 0.83053601],
- [ 0.30151751, -0.04781421, -0.96717111,
- 1. , -0.51366032, -0.66173113],
- [ 0.66318558, 0.88157256, 0.71483595,
- - 0.51366032, 1. , 0.98317823],
- [ 0.51532523, 0.78052386, 0.83053601,
- - 0.66173113, 0.98317823, 1. ]]))
+ assert_almost_equal(corrcoef(self.A), self.res1)
+ assert_almost_equal(corrcoef(self.A, self.B), self.res2)
+
+ def test_ddof(self):
+ assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1)
+ assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2)
class Test_i0(TestCase):
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 094478545..a84e81cb9 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -1299,7 +1299,7 @@ def _covhelper(x, y=None, rowvar=True, allow_masked=True):
return (x, xnotmask, rowvar)
-def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
+def cov(x, y=None, rowvar=True, bias=False, allow_masked=True, ddof=None):
"""
Estimate the covariance matrix.
@@ -1329,11 +1329,18 @@ def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
bias : bool, optional
Default normalization (False) is by ``(N-1)``, where ``N`` is the
number of observations given (unbiased estimate). If `bias` is True,
- then normalization is by ``N``.
+ then normalization is by ``N``. This keyword can be overridden by
+ the keyword ``ddof`` in numpy versions >= 1.5.
allow_masked : bool, optional
If True, masked values are propagated pair-wise: if a value is masked
in `x`, the corresponding value is masked in `y`.
If False, raises a `ValueError` exception when some values are missing.
+ ddof : {None, int}, optional
+ .. versionadded:: 1.5
+ If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
+ the number of observations; this overrides the value implied by
+ ``bias``. The default value is ``None``.
+
Raises
------
@@ -1345,17 +1352,27 @@ def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
numpy.cov
"""
+ # Check inputs
+ if ddof is not None and ddof != int(ddof):
+ raise ValueError("ddof must be an integer")
+ # Set up ddof
+ if ddof is None:
+ if bias:
+ ddof = 0
+ else:
+ ddof = 1
+
(x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked)
if not rowvar:
- fact = np.dot(xnotmask.T, xnotmask) * 1. - (1 - bool(bias))
+ fact = np.dot(xnotmask.T, xnotmask) * 1. - ddof
result = (dot(x.T, x.conj(), strict=False) / fact).squeeze()
else:
- fact = np.dot(xnotmask, xnotmask.T) * 1. - (1 - bool(bias))
+ fact = np.dot(xnotmask, xnotmask.T) * 1. - ddof
result = (dot(x, x.T.conj(), strict=False) / fact).squeeze()
return result
-def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
+def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True, ddof=None):
"""
Return correlation coefficients of the input array.
@@ -1379,11 +1396,17 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
bias : bool, optional
Default normalization (False) is by ``(N-1)``, where ``N`` is the
number of observations given (unbiased estimate). If `bias` is 1,
- then normalization is by ``N``.
+ then normalization is by ``N``. This keyword can be overridden by
+ the keyword ``ddof`` in numpy versions >= 1.5.
allow_masked : bool, optional
If True, masked values are propagated pair-wise: if a value is masked
in `x`, the corresponding value is masked in `y`.
If False, raises an exception.
+ ddof : {None, int}, optional
+ .. versionadded:: 1.5
+ If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
+ the number of observations; this overrides the value implied by
+ ``bias``. The default value is ``None``.
See Also
--------
@@ -1391,14 +1414,24 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
cov : Estimate the covariance matrix.
"""
+ # Check inputs
+ if ddof is not None and ddof != int(ddof):
+ raise ValueError("ddof must be an integer")
+ # Set up ddof
+ if ddof is None:
+ if bias:
+ ddof = 0
+ else:
+ ddof = 1
+
# Get the data
(x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked)
# Compute the covariance matrix
if not rowvar:
- fact = np.dot(xnotmask.T, xnotmask) * 1. - (1 - bool(bias))
+ fact = np.dot(xnotmask.T, xnotmask) * 1. - ddof
c = (dot(x.T, x.conj(), strict=False) / fact).squeeze()
else:
- fact = np.dot(xnotmask, xnotmask.T) * 1. - (1 - bool(bias))
+ fact = np.dot(xnotmask, xnotmask.T) * 1. - ddof
c = (dot(x, x.T.conj(), strict=False) / fact).squeeze()
# Check whether we have a scalar
try:
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index d6cda7e70..359e07446 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -528,6 +528,12 @@ class TestCorrcoef(TestCase):
def setUp(self):
self.data = array(np.random.rand(12))
+ def test_ddof(self):
+ "Test ddof keyword"
+ x = self.data
+ assert_almost_equal(np.corrcoef(x, ddof=0), corrcoef(x, ddof=0))
+
+
def test_1d_wo_missing(self):
"Test cov on 1D variable w/o missing values"
x = self.data