ENH: Add ddof keyword to masked versions of cov and corrcoef.

author: Charles Harris <charlesr.harris@gmail.com> 2010-07-07 04:32:00 +0000
committer: Charles Harris <charlesr.harris@gmail.com> 2010-07-07 04:32:00 +0000
commit: 8bb282307481e208f972a72c5745c63e2404cd66 (patch)
tree: 31d7f1f3ff41423e5d0d90ba20b655072dc414b9 /numpy
parent: b1c994b77d851e49a1c62248b09aeaea5645fbdf (diff)
download: numpy-8bb282307481e208f972a72c5745c63e2404cd66.tar.gz
4 files changed, 83 insertions, 35 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 3f49af5f1..b2ec9bb5a 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -1846,8 +1846,8 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None):
     ddof : int, optional
         .. versionadded:: 1.5
         If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
-        the number of observations. When defined, ``ddof`` overrides the
-        value implied by ``bias``. The default value is ``None``.
+        the number of observations; this overrides the value implied by
+        ``bias``. The default value is ``None``.
 
     Returns
     -------
@@ -1893,6 +1893,10 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None):
     11.71
 
     """
+    # Check inputs
+    if ddof is not None and ddof != int(ddof):
+        raise ValueError("ddof must be integer")
+
     X = array(m, ndmin=2, dtype=float)
     if X.shape[0] == 1:
         rowvar = 1
@@ -1961,8 +1965,8 @@ def corrcoef(x, y=None, rowvar=1, bias=0, ddof=None):
     ddof : {None, int}, optional
         .. versionadded:: 1.5
         If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
-        the number of observations. When defined, ``ddof`` overrides the
-        value implied by ``bias``. The default value is ``None``.
+        the number of observations; this overrides the value implied by
+        ``bias``. The default value is ``None``.
 
     Returns
     -------
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 5d0f8aa45..037e8043a 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -821,30 +821,35 @@ class TestNanFunctsIntTypes(TestCase):
 
 
 class TestCorrCoef(TestCase):
+    A = array([[ 0.15391142, 0.18045767, 0.14197213],
+               [ 0.70461506, 0.96474128, 0.27906989],
+               [ 0.9297531 , 0.32296769, 0.19267156]])
+    B = array([[ 0.10377691, 0.5417086 , 0.49807457],
+               [ 0.82872117, 0.77801674, 0.39226705],
+               [ 0.9314666 , 0.66800209, 0.03538394]])
+    res1 = array([[ 1.        , 0.9379533 , -0.04931983],
+               [ 0.9379533 , 1.        , 0.30007991],
+               [-0.04931983, 0.30007991, 1.        ]])
+    res2 = array([[ 1.        , 0.9379533 , -0.04931983,
+                 0.30151751, 0.66318558, 0.51532523],
+               [ 0.9379533 , 1.        , 0.30007991,
+                 - 0.04781421, 0.88157256, 0.78052386],
+               [-0.04931983, 0.30007991, 1.        ,
+                 - 0.96717111, 0.71483595, 0.83053601],
+               [ 0.30151751, -0.04781421, -0.96717111,
+                 1.        , -0.51366032, -0.66173113],
+               [ 0.66318558, 0.88157256, 0.71483595,
+                 - 0.51366032, 1.        , 0.98317823],
+               [ 0.51532523, 0.78052386, 0.83053601,
+                 - 0.66173113, 0.98317823, 1.        ]])
+
     def test_simple(self):
-        A = array([[ 0.15391142, 0.18045767, 0.14197213],
-                   [ 0.70461506, 0.96474128, 0.27906989],
-                   [ 0.9297531 , 0.32296769, 0.19267156]])
-        B = array([[ 0.10377691, 0.5417086 , 0.49807457],
-                   [ 0.82872117, 0.77801674, 0.39226705],
-                   [ 0.9314666 , 0.66800209, 0.03538394]])
-        assert_almost_equal(corrcoef(A),
-                            array([[ 1.        , 0.9379533 , -0.04931983],
-                                   [ 0.9379533 , 1.        , 0.30007991],
-                                   [-0.04931983, 0.30007991, 1.        ]]))
-        assert_almost_equal(corrcoef(A, B),
-                            array([[ 1.        , 0.9379533 , -0.04931983,
-                                     0.30151751, 0.66318558, 0.51532523],
-                                   [ 0.9379533 , 1.        , 0.30007991,
-                                     - 0.04781421, 0.88157256, 0.78052386],
-                                   [-0.04931983, 0.30007991, 1.        ,
-                                     - 0.96717111, 0.71483595, 0.83053601],
-                                   [ 0.30151751, -0.04781421, -0.96717111,
-                                     1.        , -0.51366032, -0.66173113],
-                                   [ 0.66318558, 0.88157256, 0.71483595,
-                                     - 0.51366032, 1.        , 0.98317823],
-                                   [ 0.51532523, 0.78052386, 0.83053601,
-                                     - 0.66173113, 0.98317823, 1.        ]]))
+        assert_almost_equal(corrcoef(self.A), self.res1)
+        assert_almost_equal(corrcoef(self.A, self.B), self.res2)
+
+    def test_ddof(self):
+        assert_almost_equal(corrcoef(self.A, ddof=-1), self.res1)
+        assert_almost_equal(corrcoef(self.A, self.B, ddof=-1), self.res2)
 
 
 class Test_i0(TestCase):
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index 094478545..a84e81cb9 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -1299,7 +1299,7 @@ def _covhelper(x, y=None, rowvar=True, allow_masked=True):
     return (x, xnotmask, rowvar)
 
 
-def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
+def cov(x, y=None, rowvar=True, bias=False, allow_masked=True, ddof=None):
     """
     Estimate the covariance matrix.
 
@@ -1329,11 +1329,18 @@ def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
     bias : bool, optional
         Default normalization (False) is by ``(N-1)``, where ``N`` is the
         number of observations given (unbiased estimate). If `bias` is True,
-        then normalization is by ``N``.
+        then normalization is by ``N``. This keyword can be overridden by
+        the keyword ``ddof`` in numpy versions >= 1.5.
     allow_masked : bool, optional
         If True, masked values are propagated pair-wise: if a value is masked
         in `x`, the corresponding value is masked in `y`.
         If False, raises a `ValueError` exception when some values are missing.
+    ddof : {None, int}, optional
+        .. versionadded:: 1.5
+        If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
+        the number of observations; this overrides the value implied by
+        ``bias``. The default value is ``None``.
+
 
     Raises
     ------
@@ -1345,17 +1352,27 @@ def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
     numpy.cov
 
     """
+    # Check inputs
+    if ddof is not None and ddof != int(ddof):
+        raise ValueError("ddof must be an integer")
+    # Set up ddof
+    if ddof is None:
+        if bias:
+            ddof = 0
+        else:
+            ddof = 1
+
     (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked)
     if not rowvar:
-        fact = np.dot(xnotmask.T, xnotmask) * 1. - (1 - bool(bias))
+        fact = np.dot(xnotmask.T, xnotmask) * 1. - ddof
         result = (dot(x.T, x.conj(), strict=False) / fact).squeeze()
     else:
-        fact = np.dot(xnotmask, xnotmask.T) * 1. - (1 - bool(bias))
+        fact = np.dot(xnotmask, xnotmask.T) * 1. - ddof
         result = (dot(x, x.T.conj(), strict=False) / fact).squeeze()
     return result
 
 
-def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
+def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True, ddof=None):
     """
     Return correlation coefficients of the input array.
 
@@ -1379,11 +1396,17 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
     bias : bool, optional
         Default normalization (False) is by ``(N-1)``, where ``N`` is the
         number of observations given (unbiased estimate). If `bias` is 1,
-        then normalization is by ``N``.
+        then normalization is by ``N``. This keyword can be overridden by
+        the keyword ``ddof`` in numpy versions >= 1.5.
     allow_masked : bool, optional
         If True, masked values are propagated pair-wise: if a value is masked
         in `x`, the corresponding value is masked in `y`.
         If False, raises an exception.
+    ddof : {None, int}, optional
+        .. versionadded:: 1.5
+        If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is
+        the number of observations; this overrides the value implied by
+        ``bias``. The default value is ``None``.
 
     See Also
     --------
@@ -1391,14 +1414,24 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
     cov : Estimate the covariance matrix.
 
     """
+    # Check inputs
+    if ddof is not None and ddof != int(ddof):
+        raise ValueError("ddof must be an integer")
+    # Set up ddof
+    if ddof is None:
+        if bias:
+            ddof = 0
+        else:
+            ddof = 1
+
     # Get the data
     (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked)
     # Compute the covariance matrix
     if not rowvar:
-        fact = np.dot(xnotmask.T, xnotmask) * 1. - (1 - bool(bias))
+        fact = np.dot(xnotmask.T, xnotmask) * 1. - ddof
         c = (dot(x.T, x.conj(), strict=False) / fact).squeeze()
     else:
-        fact = np.dot(xnotmask, xnotmask.T) * 1. - (1 - bool(bias))
+        fact = np.dot(xnotmask, xnotmask.T) * 1. - ddof
         c = (dot(x, x.T.conj(), strict=False) / fact).squeeze()
     # Check whether we have a scalar
     try:
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index d6cda7e70..359e07446 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -528,6 +528,12 @@ class TestCorrcoef(TestCase):
     def setUp(self):
         self.data = array(np.random.rand(12))
 
+    def test_ddof(self):
+        "Test ddof keyword"
+        x = self.data
+        assert_almost_equal(np.corrcoef(x, ddof=0), corrcoef(x, ddof=0))
+
+
     def test_1d_wo_missing(self):
         "Test cov on 1D variable w/o missing values"
         x = self.data
author	Charles Harris <charlesr.harris@gmail.com>	2010-07-07 04:32:00 +0000
committer	Charles Harris <charlesr.harris@gmail.com>	2010-07-07 04:32:00 +0000
commit	8bb282307481e208f972a72c5745c63e2404cd66 (patch)
tree	31d7f1f3ff41423e5d0d90ba20b655072dc414b9 /numpy
parent	b1c994b77d851e49a1c62248b09aeaea5645fbdf (diff)
download	numpy-8bb282307481e208f972a72c5745c63e2404cd66.tar.gz