ENH: Add weights and covariance estimate to standard polyfit.

author: Travis E. Oliphant <teoliphant@gmail.com> 2011-09-13 00:11:32 -0500
committer: Travis E. Oliphant <teoliphant@gmail.com> 2011-09-13 00:12:20 -0500
commit: af22fc43921e2e7f77e909a1acf77011f682ff7b (patch)
tree: f52908796c4ed7b629806c2e5b8d7b50c9be0ea3 /numpy
parent: 073bc39c58a6788ffda6aaa7549955cc3d4fdc93 (diff)
download: numpy-af22fc43921e2e7f77e909a1acf77011f682ff7b.tar.gz
2 files changed, 72 insertions, 20 deletions
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 229fe020d..fad06f4df 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -10,11 +10,11 @@ import re
 import warnings
 import numpy.core.numeric as NX
 
-from numpy.core import isscalar, abs, finfo, atleast_1d, hstack
+from numpy.core import isscalar, abs, finfo, atleast_1d, hstack, dot
 from numpy.lib.twodim_base import diag, vander
 from numpy.lib.function_base import trim_zeros, sort_complex
 from numpy.lib.type_check import iscomplex, real, imag
-from numpy.linalg import eigvals, lstsq
+from numpy.linalg import eigvals, lstsq, inv
 
 class RankWarning(UserWarning):
     """
@@ -391,7 +391,7 @@ def polyder(p, m=1):
         val = poly1d(val)
     return val
 
-def polyfit(x, y, deg, rcond=None, full=False):
+def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
     """
     Least squares polynomial fit.
 
@@ -419,6 +419,11 @@ def polyfit(x, y, deg, rcond=None, full=False):
         False (the default) just the coefficients are returned, when True
         diagnostic information from the singular value decomposition is also
         returned.
+    w : array_like, shape (M,), optional
+        weights to apply to the y-coordinates of the sample points.
+    cov : bool, optional
+        Return the estimate and the covariance matrix of the estimate
+        If full is True, then cov is not returned.
 
     Returns
     -------
@@ -431,6 +436,12 @@ def polyfit(x, y, deg, rcond=None, full=False):
         Vandermonde coefficient matrix, its singular values, and the specified
         value of `rcond`. For more details, see `linalg.lstsq`.
 
+    V : ndaray, shape (M,M) or (M,M,K) : present only if `full` = False and `cov`=True
+        The covariance matrix of the polynomial coefficient estimates.  The diagonal
+        of this matrix are the variance estimates for each coefficient.  If y is a 2-d
+        array, then the covariance matrix for the `k`-th data set are in ``V[:,:,k]``
+
+
     Warns
     -----
     RankWarning
@@ -545,34 +556,52 @@ def polyfit(x, y, deg, rcond=None, full=False):
     if rcond is None :
         rcond = len(x)*finfo(x.dtype).eps
 
-    # scale x to improve condition number
-    scale = abs(x).max()
-    if scale != 0 :
-        x /= scale
+    # set up least squares equation for powers of x
+    lhs = vander(x, order)
+    rhs = y
+
+    # apply weighting
+    if w is not None:
+        w = NX.asarray(w) + 0.0
+        if w.ndim != 1:
+            raise TypeError, "expected a 1-d array for weights"
+        if w.shape[0] != y.shape[0] :
+            raise TypeError, "expected w and y to have the same length"
+        lhs *= w[:, NX.newaxis]
+        if rhs.ndim == 2:
+            rhs *= w[:, NX.newaxis]
+        else:
+            rhs *= w
 
-    # solve least squares equation for powers of x
-    v = vander(x, order)
-    c, resids, rank, s = lstsq(v, y, rcond)
+    # scale lhs to improve condition number and solve
+    scale = NX.sqrt((lhs*lhs).sum(axis=0))
+    lhs /= scale
+    c, resids, rank, s = lstsq(lhs, rhs, rcond)
+    c = (c.T/scale).T  # broadcast scale coefficients
 
     # warn on rank reduction, which indicates an ill conditioned matrix
     if rank != order and not full:
         msg = "Polyfit may be poorly conditioned"
         warnings.warn(msg, RankWarning)
 
-    # scale returned coefficients
-    if scale != 0 :
-        if c.ndim == 1 :
-            c /= vander([scale], order)[0]
-        else :
-            c /= vander([scale], order).T
-
     if full :
         return c, resids, rank, s, rcond
+    elif cov :
+        Vbase = inv(dot(lhs.T,lhs))
+        Vbase /= NX.outer(scale, scale)
+        # Some literature ignores the extra -2.0 factor in the denominator, but
+        #  it is included here because the covariance of Multivariate Student-T
+        #  (which is implied by a Bayesian uncertainty analysis) includes it.
+        #  Plus, it gives a slightly more conservative estimate of uncertainty.
+        fac = resids / (len(x) - order - 2.0)
+        if y.ndim == 1:
+            return c, Vbase * fac
+        else:
+            return c, Vbase[:,:,NX.newaxis] * fac
     else :
         return c
 
 
-
 def polyval(p, x):
     """
     Evaluate a polynomial at specific values.
diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py
index 8c3a6d62b..6860257e2 100644
--- a/numpy/lib/tests/test_polynomial.py
+++ b/numpy/lib/tests/test_polynomial.py
@@ -100,10 +100,27 @@ class TestDocs(TestCase):
 
     def test_polyfit(self) :
         c = np.array([3., 2., 1.])
-        x = np.linspace(0,2,5)
+        x = np.linspace(0,2,7)
         y = np.polyval(c,x)
+        err = [1,-1,1,-1,1,-1,1]
+        weights = arange(8,1,-1)**2/7.0
+
         # check 1D case
-        assert_almost_equal(c, np.polyfit(x,y,2))
+        m, cov = np.polyfit(x,y+err,2,cov=True)
+        est = [3.8571, 0.2857, 1.619]
+        assert_almost_equal(est, m, decimal=4)
+        val0 = [[2.9388, -5.8776, 1.6327],
+                [-5.8776, 12.7347, -4.2449],
+                [1.6327, -4.2449, 2.3220]]
+        assert_almost_equal(val0, cov, decimal=4)
+
+        m2, cov2 = np.polyfit(x,y+err,2,w=weights,cov=True)
+        assert_almost_equal([4.8927, -1.0177,  1.7768], m2, decimal=4)
+        val = [[  8.7929, -10.0103,   0.9756],
+               [-10.0103,  13.6134,  -1.8178],
+               [  0.9756,  -1.8178,   0.6674]]
+        assert_almost_equal(val, cov2, decimal=4)
+
         # check 2D (n,1) case
         y = y[:,np.newaxis]
         c = c[:,np.newaxis]
@@ -113,6 +130,12 @@ class TestDocs(TestCase):
         cc = np.concatenate((c,c), axis=1)
         assert_almost_equal(cc, np.polyfit(x,yy,2))
 
+        m, cov = np.polyfit(x,yy+array(err)[:,np.newaxis],2,cov=True)
+        assert_almost_equal(est, m[:,0], decimal=4)
+        assert_almost_equal(est, m[:,1], decimal=4)
+        assert_almost_equal(val0, cov[:,:,0], decimal=4)
+        assert_almost_equal(val0, cov[:,:,1], decimal=4)
+
     def test_objects(self):
         from decimal import Decimal
         p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')])
author	Travis E. Oliphant <teoliphant@gmail.com>	2011-09-13 00:11:32 -0500
committer	Travis E. Oliphant <teoliphant@gmail.com>	2011-09-13 00:12:20 -0500
commit	af22fc43921e2e7f77e909a1acf77011f682ff7b (patch)
tree	f52908796c4ed7b629806c2e5b8d7b50c9be0ea3 /numpy
parent	073bc39c58a6788ffda6aaa7549955cc3d4fdc93 (diff)
download	numpy-af22fc43921e2e7f77e909a1acf77011f682ff7b.tar.gz