diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2013-06-20 20:44:54 -0600 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2013-08-12 22:33:55 -0600 |
commit | fcb0fef5c673ed0a5442b18bcd8c391907b4f9a7 (patch) | |
tree | 24726ff3fbb7a167a8fdf89ac5cb74792c9cc6e7 /numpy/core/fromnumeric.py | |
parent | 777b6453e166df252298a47ef4f0e867614ac94a (diff) | |
download | numpy-fcb0fef5c673ed0a5442b18bcd8c391907b4f9a7.tar.gz |
MAINT: Separate nan functions into their own module.
New files lib/nanfunctions.py and lib/tests/test_nanfunctions.py are
added and both the previous and new nan functions and tests are moved
into them.
The existing nan functions moved from lib/function_base are:
nansum, nanmin, nanmax, nanargmin, nanargmax
The added nan functions moved from core/numeric are:
nanmean, nanvar, nanstd
Diffstat (limited to 'numpy/core/fromnumeric.py')
-rw-r--r-- | numpy/core/fromnumeric.py | 279 |
1 files changed, 10 insertions, 269 deletions
diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 8686e0531..35d36d960 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -14,18 +14,16 @@ from . import _methods _dt_ = nt.sctype2char -# functions that are now methods -__all__ = ['take', 'reshape', 'choose', 'repeat', 'put', - 'swapaxes', 'transpose', 'sort', 'argsort', 'partition', 'argpartition', - 'argmax', 'argmin', - 'searchsorted', 'alen', - 'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape', - 'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue', - 'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim', - 'rank', 'size', 'around', 'round_', 'mean', 'nanmean', - 'std', 'nanstd', 'var', 'nanvar', 'squeeze', - 'amax', 'amin', - ] +# functions that are methods +__all__ = [ + 'alen', 'all', 'alltrue', 'amax', 'amin', 'any', 'argmax', + 'argmin', 'argpartition', 'argsort', 'around', 'choose', 'clip', + 'compress', 'cumprod', 'cumproduct', 'cumsum', 'diagonal', 'mean', + 'ndim', 'nonzero', 'partition', 'prod', 'product', 'ptp', 'put', + 'rank', 'ravel', 'repeat', 'reshape', 'resize', 'round_', + 'searchsorted', 'shape', 'size', 'sometrue', 'sort', 'squeeze', + 'std', 'sum', 'swapaxes', 'take', 'trace', 'transpose', 'var', + ] try: @@ -2714,81 +2712,6 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): return _methods._mean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) -def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): - """ - Compute the arithmetic mean along the specified axis, ignoring NaNs. - - Returns the average of the array elements. The average is taken over - the flattened array by default, otherwise over the specified axis. - `float64` intermediate and return values are used for integer inputs. - - Parameters - ---------- - a : array_like - Array containing numbers whose mean is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the means are computed. The default is to compute - the mean of the flattened array. - dtype : data-type, optional - Type to use in computing the mean. For integer inputs, the default - is `float64`; for floating point inputs, it is the same as the - input dtype. - out : ndarray, optional - Alternate output array in which to place the result. The default - is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `doc.ufuncs` for details. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - m : ndarray, see dtype parameter above - If `out=None`, returns a new array containing the mean values, - otherwise a reference to the output array is returned. - - See Also - -------- - average : Weighted average - mean : Arithmetic mean taken while not ignoring NaNs - var, nanvar - - Notes - ----- - The arithmetic mean is the sum of the non-nan elements along the axis - divided by the number of non-nan elements. - - Note that for floating-point input, the mean is computed using the - same precision the input has. Depending on the input data, this can - cause the results to be inaccurate, especially for `float32`. - Specifying a higher-precision accumulator using the `dtype` keyword - can alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.nanmean(a) - 2.6666666666666665 - >>> np.nanmean(a, axis=0) - array([ 2., 4.]) - >>> np.nanmean(a, axis=1) - array([ 1., 3.5]) - - """ - if not (type(a) is mu.ndarray): - try: - mean = a.nanmean - return mean(axis=axis, dtype=dtype, out=out) - except AttributeError: - pass - - return _methods._nanmean(a, axis=axis, dtype=dtype, - out=out, keepdims=keepdims) - - def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ Compute the standard deviation along the specified axis. @@ -2891,97 +2814,6 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) -def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): - """ - Compute the standard deviation along the specified axis, while - ignoring NaNs. - - Returns the standard deviation, a measure of the spread of a distribution, - of the non-NaN array elements. The standard deviation is computed for the - flattened array by default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Calculate the standard deviation of the non-NaN values. - axis : int, optional - Axis along which the standard deviation is computed. The default is - to compute the standard deviation of the flattened array. - dtype : dtype, optional - Type to use in computing the standard deviation. For arrays of - integer type the default is float64, for arrays of float types it is - the same as the array type. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output but the type (of the calculated - values) will be cast if necessary. - ddof : int, optional - Means Delta Degrees of Freedom. The divisor used in calculations - is ``N - ddof``, where ``N`` represents the number of elements. - By default `ddof` is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - standard_deviation : ndarray, see dtype parameter above. - If `out` is None, return a new array containing the standard deviation, - otherwise return a reference to the output array. - - See Also - -------- - var, mean, std - nanvar, nanmean - numpy.doc.ufuncs : Section "Output arguments" - - Notes - ----- - The standard deviation is the square root of the average of the squared - deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. - - The average squared deviation is normally calculated as - ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, - the divisor ``N - ddof`` is used instead. In standard statistical - practice, ``ddof=1`` provides an unbiased estimator of the variance - of the infinite population. ``ddof=0`` provides a maximum likelihood - estimate of the variance for normally distributed variables. The - standard deviation computed in this function is the square root of - the estimated variance, so even with ``ddof=1``, it will not be an - unbiased estimate of the standard deviation per se. - - Note that, for complex numbers, `std` takes the absolute - value before squaring, so that the result is always real and nonnegative. - - For floating-point input, the *std* is computed using the same - precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for float32 (see example below). - Specifying a higher-accuracy accumulator using the `dtype` keyword can - alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.nanstd(a) - 1.247219128924647 - >>> np.nanstd(a, axis=0) - array([ 1., 0.]) - >>> np.nanstd(a, axis=1) - array([ 0., 0.5]) - - """ - - if not (type(a) is mu.ndarray): - try: - nanstd = a.nanstd - return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof) - except AttributeError: - pass - - return _methods._nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) - def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -3085,94 +2917,3 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) - -def nanvar(a, axis=None, dtype=None, out=None, ddof=0, - keepdims=False): - """ - Compute the variance along the specified axis, while ignoring NaNs. - - Returns the variance of the array elements, a measure of the spread of a - distribution. The variance is computed for the flattened array by - default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Array containing numbers whose variance is desired. If `a` is not an - array, a conversion is attempted. - axis : int, optional - Axis along which the variance is computed. The default is to compute - the variance of the flattened array. - dtype : data-type, optional - Type to use in computing the variance. For arrays of integer type - the default is `float32`; for arrays of float types it is the same as - the array type. - out : ndarray, optional - Alternate output array in which to place the result. It must have - the same shape as the expected output, but the type is cast if - necessary. - ddof : int, optional - "Delta Degrees of Freedom": the divisor used in the calculation is - ``N - ddof``, where ``N`` represents the number of elements. By - default `ddof` is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the original `arr`. - - Returns - ------- - variance : ndarray, see dtype parameter above - If ``out=None``, returns a new array containing the variance; - otherwise, a reference to the output array is returned. - - See Also - -------- - std : Standard deviation - mean : Average - var : Variance while not ignoring NaNs - nanstd, nanmean - numpy.doc.ufuncs : Section "Output arguments" - - Notes - ----- - The variance is the average of the squared deviations from the mean, - i.e., ``var = mean(abs(x - x.mean())**2)``. - - The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. - If, however, `ddof` is specified, the divisor ``N - ddof`` is used - instead. In standard statistical practice, ``ddof=1`` provides an - unbiased estimator of the variance of a hypothetical infinite population. - ``ddof=0`` provides a maximum likelihood estimate of the variance for - normally distributed variables. - - Note that for complex numbers, the absolute value is taken before - squaring, so that the result is always real and nonnegative. - - For floating-point input, the variance is computed using the same - precision the input has. Depending on the input data, this can cause - the results to be inaccurate, especially for `float32` (see example - below). Specifying a higher-accuracy accumulator using the ``dtype`` - keyword can alleviate this issue. - - Examples - -------- - >>> a = np.array([[1, np.nan], [3, 4]]) - >>> np.var(a) - 1.5555555555555554 - >>> np.nanvar(a, axis=0) - array([ 1., 0.]) - >>> np.nanvar(a, axis=1) - array([ 0., 0.25]) - - """ - - if not (type(a) is mu.ndarray): - try: - nanvar = a.nanvar - return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) - except AttributeError: - pass - - return _methods._nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, - keepdims=keepdims) |