diff options
author | abel <aoun@cerfacs.fr> | 2021-10-19 15:22:05 +0200 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2021-11-04 14:50:27 -0500 |
commit | d5e275b2bf65c1848203f6bff7606623793daeed (patch) | |
tree | 36d217d9b13f7ec64b468bbf663d8fbf021b983d /numpy/lib/function_base.py | |
parent | ab19ed256bf9b20340c92cebcfd6158241122c88 (diff) | |
download | numpy-d5e275b2bf65c1848203f6bff7606623793daeed.tar.gz |
DOC: Improve quantile documentation
Also removed unused imports
Diffstat (limited to 'numpy/lib/function_base.py')
-rw-r--r-- | numpy/lib/function_base.py | 87 |
1 files changed, 67 insertions, 20 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 353490fc2..a5ccc1189 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -9,8 +9,7 @@ import numpy.core.numeric as _nx from numpy.core import transpose from numpy.core.numeric import ( ones, zeros_like, arange, concatenate, array, asarray, asanyarray, empty, - ndarray, around, floor, ceil, take, dot, where, intp, - integer, isscalar, absolute + ndarray, take, dot, where, intp, integer, isscalar, absolute ) from numpy.core.umath import ( pi, add, arctan2, frompyfunc, cos, less_equal, sqrt, sin, @@ -51,7 +50,22 @@ __all__ = [ 'quantile' ] - +# _QuantileInterpolation is a dictionary listing all the supported +# interpolation methods to compute quantile/percentile. +# +# Below virtual_index refer to the index of the element where the percentile +# would be found in the sorted sample. +# When the sample contains exactly the percentile wanted, the virtual_index is +# an integer to the index of this element. +# When the percentile wanted is in between two elements, the virtual_index +# is made of a integer part (a.k.a 'i' or 'left') and a fractional part +# (a.k.a 'g' or 'gamma') +# +# Each _QuantileInterpolation have two properties +# get_virtual_index : Callable +# The function used to compute the virtual_index. +# fix_gamma : Callable +# A function used for discret methods to force the index to a specific value. _QuantileInterpolation = dict( # --- HYNDMAN and FAN methods # Discrete methods @@ -75,33 +89,33 @@ _QuantileInterpolation = dict( # Continuous methods interpolated_inverted_cdf=dict( get_virtual_index=lambda n, quantiles: - _virtual_index_formula(n, quantiles, 0, 1), + _compute_virtual_index(n, quantiles, 0, 1), fix_gamma=lambda gamma, _: gamma, ), hazen=dict( get_virtual_index=lambda n, quantiles: - _virtual_index_formula(n, quantiles, 0.5, 0.5), + _compute_virtual_index(n, quantiles, 0.5, 0.5), fix_gamma=lambda gamma, _: gamma, ), weibull=dict( get_virtual_index=lambda n, quantiles: - _virtual_index_formula(n, quantiles, 0, 0), + _compute_virtual_index(n, quantiles, 0, 0), fix_gamma=lambda gamma, _: gamma, ), # Default value linear=dict( get_virtual_index=lambda n, quantiles: - _virtual_index_formula(n, quantiles, 1, 1), + _compute_virtual_index(n, quantiles, 1, 1), fix_gamma=lambda gamma, _: gamma, ), median_unbiased=dict( get_virtual_index=lambda n, quantiles: - _virtual_index_formula(n, quantiles, 1 / 3.0, 1 / 3.0), + _compute_virtual_index(n, quantiles, 1 / 3.0, 1 / 3.0), fix_gamma=lambda gamma, _: gamma, ), normal_unbiased=dict( get_virtual_index=lambda n, quantiles: - _virtual_index_formula(n, quantiles, 3 / 8.0, 3 / 8.0), + _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0), fix_gamma=lambda gamma, _: gamma, ), # --- OTHER METHODS fixme add deprecated ? @@ -3945,6 +3959,12 @@ def percentile(a, ``i < j``. If ``g`` is the fractional part of the index surrounded by ``i`` and alpha and beta are correction constants modifying i and j. + Below, 'q' is the quantile value, 'n' is the samle size and + alpha and beta are constants. + The following formula gives an interpolation "i + g" of where the quantile + would be in the sorted sample. + With 'i' being the floor and 'g' the fractional part of the result. + .. math:: i + g = (q - alpha) / ( n - alpha - beta + 1 ) @@ -4353,13 +4373,22 @@ def _quantile_is_valid(q): return True -def _virtual_index_formula(n: np.array, - quantiles: np.array, - alpha: float, - beta: float) -> np.array: +def _compute_virtual_index(n, quantiles, alpha: float, beta: float): """ Compute the floating point indexes of an array for the linear interpolation of quantiles. + n : array_like + The sample sizes. + quantiles : array_like + The quantiles values. + alpha : float + A constant used to correct the index computed. + beta : float + A constant used to correct the index computed. + + alpha and beta values depend on the chosen method + (see quantile documentation) + Reference: Hyndman&Fan paper "Sample Quantiles in Statistical Packages", DOI: 10.1080/00031305.1996.10473566 @@ -4369,14 +4398,24 @@ def _virtual_index_formula(n: np.array, ) - 1 -def _get_gamma(virtual_indexes: np.array, - previous_indexes: np.array, - interpolation: _QuantileInterpolation) -> np.array: +def _get_gamma(virtual_indexes, + previous_indexes, + interpolation: _QuantileInterpolation): """ - Compute the gamma (a.k.a 'm' or weight) for the linear interpolation + Compute gamma (a.k.a 'm' or 'weight') for the linear interpolation of quantiles. - When gamma == 0 the left bound will be picked, - When gamma == 1 the right bound will be. + + virtual_indexes : array_like + The indexes where the percentile is supposed to be found in the sorted + sample. + previous_indexes : array_like + The floor values of virtual_indexes. + interpolation : _QuantileInterpolation + The interpolation method chosen, which may have a specific rule + modifying gamma. + + gamma is usually the fractional part of virtual_indexes but can be modified + by the interpolation method. """ gamma = np.asanyarray(virtual_indexes - previous_indexes) gamma = interpolation["fix_gamma"](gamma, virtual_indexes) @@ -4387,8 +4426,16 @@ def _lerp(a, b, t, out=None): """ Compute the linear interpolation weighted by gamma on each point of two same shape array. + + a : array_like + Left bound. + b : array_like + Right bound. + t : array_like + The interpolation weight. + out : array_like + Output array. """ - # Equivalent to gamma * right + (1 - gamma) * left, see gh-14685 diff_b_a = subtract(b, a) # asanyarray is a stop-gap until gh-13105 lerp_interpolation = asanyarray(add(a, diff_b_a * t, out=out)) |