summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/function_base.py593
-rw-r--r--numpy/lib/nanfunctions.py212
-rw-r--r--numpy/lib/tests/test_function_base.py177
3 files changed, 788 insertions, 194 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py
index 20e32a78d..67a34f6e1 100644
--- a/numpy/lib/function_base.py
+++ b/numpy/lib/function_base.py
@@ -52,6 +52,89 @@ __all__ = [
]
+_QuantileInterpolation = dict(
+ # --- HYNDMAN and FAN methods
+ # Discrete methods
+ inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles),
+ fix_gamma=lambda gamma, _: gamma, # should never be called
+ ),
+ averaged_inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles: (n * quantiles) - 1,
+ fix_gamma=lambda gamma, _: _get_gamma_mask(
+ shape=gamma.shape,
+ default_value=1.,
+ conditioned_value=0.5,
+ where=gamma == 0),
+ ),
+ closest_observation=dict(
+ get_virtual_index=lambda n, quantiles: _closest_observation(n,
+ quantiles),
+ fix_gamma=lambda gamma, _: gamma, # should never be called
+ ),
+ # Continuous methods
+ interpolated_inverted_cdf=dict(
+ get_virtual_index=lambda n, quantiles:
+ _virtual_index_formula(n, quantiles, 0, 1),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ hazen=dict(
+ get_virtual_index=lambda n, quantiles:
+ _virtual_index_formula(n, quantiles, 0.5, 0.5),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ weibull=dict(
+ get_virtual_index=lambda n, quantiles:
+ _virtual_index_formula(n, quantiles, 0, 0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ # Default value
+ linear=dict(
+ get_virtual_index=lambda n, quantiles:
+ _virtual_index_formula(n, quantiles, 1, 1),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ median_unbiased=dict(
+ get_virtual_index=lambda n, quantiles:
+ _virtual_index_formula(n, quantiles, 1 / 3.0, 1 / 3.0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ normal_unbiased=dict(
+ get_virtual_index=lambda n, quantiles:
+ _virtual_index_formula(n, quantiles, 3 / 8.0, 3 / 8.0),
+ fix_gamma=lambda gamma, _: gamma,
+ ),
+ # --- OTHER METHODS fixme add deprecated ?
+ lower=dict(
+ get_virtual_index=lambda n, quantiles: np.floor(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ),
+ higher=dict(
+ get_virtual_index=lambda n, quantiles: np.ceil(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ),
+ midpoint=dict(
+ get_virtual_index=lambda n, quantiles: 0.5 * (
+ np.floor((n - 1) * quantiles)
+ + np.ceil((n - 1) * quantiles)),
+ fix_gamma=lambda gamma, index: _get_gamma_mask(
+ shape=gamma.shape,
+ default_value=0.5,
+ conditioned_value=0.,
+ where=index % 1 == 0),
+ ),
+ nearest=dict(
+ get_virtual_index=lambda n, quantiles: np.around(
+ (n - 1) * quantiles).astype(np.intp),
+ fix_gamma=lambda gamma, _: gamma,
+ # should never be called, index dtype is int
+ ))
+
+
def _rot90_dispatcher(m, k=None, axes=None):
return (m,)
@@ -3760,8 +3843,13 @@ def _percentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
@array_function_dispatch(_percentile_dispatcher)
-def percentile(a, q, axis=None, out=None,
- overwrite_input=False, interpolation='linear', keepdims=False):
+def percentile(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=False):
"""
Compute the q-th percentile of the data along the specified axis.
@@ -3790,20 +3878,75 @@ def percentile(a, q, axis=None, out=None,
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+ interpolation : str
+ Possible values: 'linear' (default),
+ 'inverted_cdf', 'averaged_inverted_cdf',
+ 'closest_observation', 'interpolated_inverted_cdf',
+ 'hazen', 'weibull',
+ 'median_unbiased', 'normal_unbiased',
+ 'lower', 'higher',
+ 'midpoint', 'nearest'.
This optional parameter specifies the interpolation method to
- use when the desired percentile lies between two data points
- ``i < j``:
+ use when the desired quantile lies between two data points ``i < j``.
+ g is the fractional part of the index surrounded by ``i``.
+ alpha and beta are correction constants modifying i and j:
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+ * inverted_cdf:
+ method 1 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then take i
+ * averaged_inverted_cdf:
+ method 2 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then average between bounds
+ * closest_observation:
+ method 3 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 and index is odd ; then take j
+ if g = 0 and index is even ; then take i
+ * interpolated_inverted_cdf:
+ method 4 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 1
+ * hazen:
+ method 5 of H&F.
+ This method give continuous results using:
+ alpha = 1/2
+ beta = 1/2
+ * weibull:
+ method 6 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 0
+ * inclusive:
+ Default method, aliased with "linear".
+ method 7 of H&F.
+ This method give continuous results using:
+ alpha = 1
+ beta = 1
+ * median_unbiased:
+ method 8 of H&F.
+ This method is probably the best method if the sample distribution
+ function is unknown (see reference).
+ This method give continuous results using:
+ alpha = 1/3
+ beta = 1/3
+ * normal_unbiased:
+ method 9 of H&F.
+ This method is probably the best method if the sample distribution
+ function is known to be normal.
+ This method give continuous results using:
+ alpha = 3/8
+ beta = 3/8
+ * lower: ``i``.
+ * higher: ``j``.
+ * nearest: ``i`` or ``j``, whichever is nearest.
+ * midpoint: ``(i + j) / 2``.
- * 'linear': ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
- * 'lower': ``i``.
- * 'higher': ``j``.
- * 'nearest': ``i`` or ``j``, whichever is nearest.
- * 'midpoint': ``(i + j) / 2``.
-
- .. versionadded:: 1.9.0
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
@@ -3897,6 +4040,11 @@ def percentile(a, q, axis=None, out=None,
ax.legend()
plt.show()
+ References
+ ----------
+ [Hyndman&Fan] Hyndman, R. J., & Fan, Y. (1996).
+ Sample quantiles in statistical packages.
+ The American Statistician, 50(4), 361-365.
"""
q = np.true_divide(q, 100)
q = asanyarray(q) # undo any decay that the ufunc performed (see gh-13105)
@@ -3912,8 +4060,13 @@ def _quantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
@array_function_dispatch(_quantile_dispatcher)
-def quantile(a, q, axis=None, out=None,
- overwrite_input=False, interpolation='linear', keepdims=False):
+def quantile(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=False):
"""
Compute the q-th quantile of the data along the specified axis.
@@ -3938,18 +4091,75 @@ def quantile(a, q, axis=None, out=None,
If True, then allow the input array `a` to be modified by intermediate
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+ interpolation : str
+ Possible values: 'linear' (default),
+ 'inverted_cdf', 'averaged_inverted_cdf',
+ 'closest_observation', 'interpolated_inverted_cdf',
+ 'hazen', 'weibull',
+ 'median_unbiased', 'normal_unbiased',
+ 'lower', 'higher',
+ 'midpoint', 'nearest'.
This optional parameter specifies the interpolation method to
- use when the desired quantile lies between two data points
- ``i < j``:
-
- * linear: ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
- * lower: ``i``.
- * higher: ``j``.
- * nearest: ``i`` or ``j``, whichever is nearest.
- * midpoint: ``(i + j) / 2``.
+ use when the desired quantile lies between two data points ``i < j``.
+ g is the fractional part of the index surrounded by ``i``.
+ alpha and beta are correction constants modifying i and j:
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+ * inverted_cdf:
+ method 1 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then take i
+ * averaged_inverted_cdf:
+ method 2 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then average between bounds
+ * closest_observation:
+ method 3 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 and index is odd ; then take j
+ if g = 0 and index is even ; then take i
+ * interpolated_inverted_cdf:
+ method 4 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 1
+ * hazen:
+ method 5 of H&F.
+ This method give continuous results using:
+ alpha = 1/2
+ beta = 1/2
+ * weibull:
+ method 6 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 0
+ * linear:
+ Default method.
+ method 7 of H&F.
+ This method give continuous results using:
+ alpha = 1
+ beta = 1
+ * median_unbiased:
+ method 8 of H&F.
+ This method is probably the best method if the sample distribution
+ function is unknown (see reference).
+ This method give continuous results using:
+ alpha = 1/3
+ beta = 1/3
+ * normal_unbiased:
+ method 9 of H&F.
+ This method is probably the best method if the sample distribution
+ function is known to be normal.
+ This method give continuous results using:
+ alpha = 3/8
+ beta = 3/8
+ * lower: ``i``.
+ * higher: ``j``.
+ * nearest: ``i`` or ``j``, whichever is nearest.
+ * midpoint: ``(i + j) / 2``.
+
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
@@ -4010,6 +4220,12 @@ def quantile(a, q, axis=None, out=None,
>>> np.quantile(b, 0.5, axis=1, overwrite_input=True)
array([7., 2.])
>>> assert not np.all(a == b)
+
+ References
+ ----------
+ [Hyndman&Fan] Hyndman, R. J., & Fan, Y. (1996).
+ Sample quantiles in statistical packages.
+ The American Statistician, 50(4), 361-365.
"""
q = np.asanyarray(q)
if not _quantile_is_valid(q):
@@ -4018,10 +4234,19 @@ def quantile(a, q, axis=None, out=None,
a, q, axis, out, overwrite_input, interpolation, keepdims)
-def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=False):
+def _quantile_unchecked(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=False):
"""Assumes that q is in [0, 1], and is an ndarray"""
- r, k = _ureduce(a, func=_quantile_ureduce_func, q=q, axis=axis, out=out,
+ r, k = _ureduce(a,
+ func=_quantiles_ureduce_func,
+ q=q,
+ axis=axis,
+ out=out,
overwrite_input=overwrite_input,
interpolation=interpolation)
if keepdims:
@@ -4042,122 +4267,242 @@ def _quantile_is_valid(q):
return True
-def _lerp(a, b, t, out=None):
- """ Linearly interpolate from a to b by a factor of t """
- diff_b_a = subtract(b, a)
- # asanyarray is a stop-gap until gh-13105
- lerp_interpolation = asanyarray(add(a, diff_b_a*t, out=out))
- subtract(b, diff_b_a * (1 - t), out=lerp_interpolation, where=t>=0.5)
- if lerp_interpolation.ndim == 0 and out is None:
- lerp_interpolation = lerp_interpolation[()] # unpack 0d arrays
- return lerp_interpolation
+def _virtual_index_formula(n: np.array,
+ quantiles: np.array,
+ alpha: float,
+ beta: float) -> np.array:
+ """
+ Compute the floating point indexes of an array for the linear
+ interpolation of quantiles.
+ Reference:
+ Hyndman&Fan paper "Sample Quantiles in Statistical Packages",
+ DOI: 10.1080/00031305.1996.10473566
+ """
+ return n * quantiles + (
+ alpha + quantiles * (1 - alpha - beta)
+ ) - 1
-def _quantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=False):
- a = asarray(a)
+def _get_gamma(virtual_indexes: np.array,
+ previous_indexes: np.array,
+ interpolation: _QuantileInterpolation) -> np.array:
+ """
+ Compute the gamma (a.k.a 'm' or weight) for the linear interpolation
+ of quantiles.
+ When gamma == 0 the left bound will be picked,
+ When gamma == 1 the right bound will be.
+ """
+ gamma = np.asanyarray(virtual_indexes - previous_indexes)
+ gamma = interpolation["fix_gamma"](gamma, virtual_indexes)
+ return np.asanyarray(gamma)
- # ufuncs cause 0d array results to decay to scalars (see gh-13105), which
- # makes them problematic for __setitem__ and attribute access. As a
- # workaround, we call this on the result of every ufunc on a possibly-0d
- # array.
- not_scalar = np.asanyarray
- # prepare a for partitioning
- if overwrite_input:
- if axis is None:
- ap = a.ravel()
- else:
- ap = a
- else:
- if axis is None:
- ap = a.flatten()
- else:
- ap = a.copy()
+def _linear_interpolation_formula(
+ left: np.array, right: np.array, gamma: np.array, out: np.array = None
+) -> np.array:
+ """
+ Compute the linear interpolation weighted by gamma on each point of
+ two same shape array.
+ """
+ # Equivalent to gamma * right + (1 - gamma) * left
+ # see gh-14685
+ diff_right_left = subtract(right, left)
+ result = asanyarray(add(left, diff_right_left * gamma, out=out))
+ result = subtract(right,
+ diff_right_left * (1 - gamma),
+ out=result,
+ where=gamma >= 0.5)
+ return result
- if axis is None:
- axis = 0
+def _get_gamma_mask(shape, default_value, conditioned_value, where):
+ out = np.full(shape, default_value)
+ out[where] = conditioned_value
+ return out
+
+
+def _discret_interpolation_to_boundaries(index, gamma_condition_fun):
+ previous = np.floor(index)
+ next = previous + 1
+ gamma = index - previous
+ return _get_gamma_mask(shape=index.shape,
+ default_value=next,
+ conditioned_value=previous,
+ where=gamma_condition_fun(gamma, index)
+ ).astype(np.intp)
+
+
+def _closest_observation(n, quantiles):
+ gamma_fun = lambda gamma, index: (gamma == 0) & (np.floor(index) % 2 == 0)
+ return _discret_interpolation_to_boundaries((n * quantiles) - 1 - 0.5,
+ gamma_fun)
+
+
+def _inverted_cdf(n, quantiles):
+ gamma_fun = lambda gamma, _: (gamma == 0)
+ return _discret_interpolation_to_boundaries((n * quantiles) - 1,
+ gamma_fun)
+
+
+def _quantiles_ureduce_func(
+ a: np.array,
+ q: np.array,
+ axis: int = None,
+ out=None,
+ overwrite_input: bool = False,
+ interpolation="linear",
+) -> np.array:
if q.ndim > 2:
# The code below works fine for nd, but it might not have useful
# semantics. For now, keep the supported dimensions the same as it was
# before.
raise ValueError("q must be a scalar or 1d")
-
- Nx = ap.shape[axis]
- indices = not_scalar(q * (Nx - 1))
- # round fractional indices according to interpolation method
- if interpolation == 'lower':
- indices = floor(indices).astype(intp)
- elif interpolation == 'higher':
- indices = ceil(indices).astype(intp)
- elif interpolation == 'midpoint':
- indices = 0.5 * (floor(indices) + ceil(indices))
- elif interpolation == 'nearest':
- indices = around(indices).astype(intp)
- elif interpolation == 'linear':
- pass # keep index as fraction and interpolate
+ if overwrite_input:
+ if axis is None:
+ axis = 0
+ arr = a.ravel()
+ else:
+ arr = a
else:
- raise ValueError(
- "interpolation can only be 'linear', 'lower' 'higher', "
- "'midpoint', or 'nearest'")
+ if axis is None:
+ axis = 0
+ arr = a.flatten()
+ else:
+ arr = a.copy()
+ result = _quantile(arr,
+ quantiles=q,
+ axis=axis,
+ interpolation=interpolation,
+ out=out)
+ if result.ndim == 0 and out is None:
+ result = result[()] # unpack 0d arrays
+ elif result.size == 1 and out is None and q.ndim == 0:
+ result = result[0]
+ return result
- # The dimensions of `q` are prepended to the output shape, so we need the
- # axis being sampled from `ap` to be first.
- ap = np.moveaxis(ap, axis, 0)
- del axis
- if np.issubdtype(indices.dtype, np.integer):
- # take the points along axis
+def _get_indexes(arr, virtual_indexes, valid_values_count):
+ """
+ Get the valid indexes of arr neighbouring virtual_indexes.
+ Note
+ This is a companion function to linear interpolation of
+ Quantiles
- if np.issubdtype(a.dtype, np.inexact):
+ Returns
+ -------
+ (previous_indexes, next_indexes): Tuple
+ A Tuple of virtual_indexes neighbouring indexes
+ """
+ previous_indexes = np.asanyarray(np.floor(virtual_indexes))
+ next_indexes = np.asanyarray(previous_indexes + 1)
+ indexes_above_bounds = virtual_indexes >= valid_values_count - 1
+ # When indexes is above max index, take the max value of the array
+ if indexes_above_bounds.any():
+ previous_indexes[indexes_above_bounds] = -1
+ next_indexes[indexes_above_bounds] = -1
+ # When indexes is below min index, take the min value of the array
+ indexes_below_bounds = virtual_indexes < 0
+ if indexes_below_bounds.any():
+ previous_indexes[indexes_below_bounds] = 0
+ next_indexes[indexes_below_bounds] = 0
+ if np.issubdtype(arr.dtype, np.inexact):
+ # After the sort, slices having NaNs will have for last element a NaN
+ virtual_indexes_nans = np.isnan(virtual_indexes)
+ if virtual_indexes_nans.any():
+ previous_indexes[virtual_indexes_nans] = -1
+ next_indexes[virtual_indexes_nans] = -1
+ previous_indexes = previous_indexes.astype(np.intp)
+ next_indexes = next_indexes.astype(np.intp)
+ return previous_indexes, next_indexes
+
+
+def _quantile(
+ arr: np.array,
+ quantiles: np.array,
+ axis: int = -1,
+ interpolation="linear",
+ out=None,
+):
+ """
+ Private function that doesn't support extended axis or keepdims.
+ These methods are extended to this function using _ureduce
+ See nanpercentile for parameter usage
+ It computes the quantiles of the array for the given axis.
+ A linear interpolation is performed based on the `interpolation`.
+
+ By default, the interpolation is "linear" where
+ alpha == beta == 1 which performs the 7th method of Hyndman&Fan.
+ With "median_unbiased" we get alpha == beta == 1/3
+ thus the 8th method of Hyndman&Fan.
+ """
+ # --- Setup
+ arr = np.asanyarray(arr)
+ values_count = arr.shape[axis]
+ # The dimensions of `q` are prepended to the output shape, so we need the
+ # axis being sampled from `arr` to be last.
+ DATA_AXIS = 0
+ arr = np.moveaxis(arr, axis, destination=DATA_AXIS)
+ # --- Computation of indexes
+ # Index where to find the value in the sorted array.
+ # Virtual because it is a floating point value, not an valid index.
+ # The nearest neighbours are used for interpolation
+ try:
+ interpolation = _QuantileInterpolation[interpolation]
+ except KeyError:
+ raise ValueError(
+ f"{interpolation!r} is not a valid interpolation. Use one of: "
+ f"{_QuantileInterpolation.keys()}")
+ virtual_indexes = interpolation["get_virtual_index"](values_count,
+ quantiles)
+ virtual_indexes = np.asanyarray(virtual_indexes)
+ if np.issubdtype(virtual_indexes.dtype, np.integer):
+ # No interpolation needed, take the points along axis
+ if np.issubdtype(arr.dtype, np.inexact):
# may contain nan, which would sort to the end
- ap.partition(concatenate((indices.ravel(), [-1])), axis=0)
- n = np.isnan(ap[-1])
+ arr.partition(concatenate((virtual_indexes.ravel(), [-1])), axis=0)
+ slices_having_nans = np.isnan(arr[-1])
else:
# cannot contain nan
- ap.partition(indices.ravel(), axis=0)
- n = np.array(False, dtype=bool)
-
- r = take(ap, indices, axis=0, out=out)
-
+ arr.partition(virtual_indexes.ravel(), axis=0)
+ slices_having_nans = np.array(False, dtype=bool)
+ result = np.asanyarray(take(arr, virtual_indexes, axis=0, out=out))
else:
- # weight the points above and below the indices
-
- indices_below = not_scalar(floor(indices)).astype(intp)
- indices_above = not_scalar(indices_below + 1)
- indices_above[indices_above > Nx - 1] = Nx - 1
-
- if np.issubdtype(a.dtype, np.inexact):
- # may contain nan, which would sort to the end
- ap.partition(concatenate((
- indices_below.ravel(), indices_above.ravel(), [-1]
- )), axis=0)
- n = np.isnan(ap[-1])
+ previous_indexes, next_indexes = _get_indexes(arr,
+ virtual_indexes,
+ values_count)
+ # --- Sorting
+ arr.partition(
+ np.unique(np.concatenate(([0, -1],
+ previous_indexes.ravel(),
+ next_indexes.ravel(),
+ ))),
+ axis=DATA_AXIS)
+ if np.issubdtype(arr.dtype, np.inexact):
+ slices_having_nans = np.isnan(
+ take(arr, indices=-1, axis=DATA_AXIS)
+ )
else:
- # cannot contain nan
- ap.partition(concatenate((
- indices_below.ravel(), indices_above.ravel()
- )), axis=0)
- n = np.array(False, dtype=bool)
-
- weights_shape = indices.shape + (1,) * (ap.ndim - 1)
- weights_above = not_scalar(indices - indices_below).reshape(weights_shape)
-
- x_below = take(ap, indices_below, axis=0)
- x_above = take(ap, indices_above, axis=0)
-
- r = _lerp(x_below, x_above, weights_above, out=out)
-
- # if any slice contained a nan, then all results on that slice are also nan
- if np.any(n):
- if r.ndim == 0 and out is None:
+ slices_having_nans = None
+ # --- Get values from indexes
+ previous = np.take(arr, previous_indexes, axis=DATA_AXIS)
+ next = np.take(arr, next_indexes, axis=DATA_AXIS)
+ # --- Linear interpolation
+ gamma = _get_gamma(virtual_indexes,
+ previous_indexes,
+ interpolation)
+ result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1)
+ gamma = gamma.reshape(result_shape)
+ result = _linear_interpolation_formula(previous,
+ next,
+ gamma,
+ out=out)
+ if np.any(slices_having_nans):
+ if result.ndim == 0 and out is None:
# can't write to a scalar
- r = a.dtype.type(np.nan)
+ result = np.array(np.nan, dtype=arr.dtype)
else:
- r[..., n] = a.dtype.type(np.nan)
-
- return r
+ result[..., slices_having_nans] = np.nan
+ return result
def _trapz_dispatcher(y, x=None, dx=None, axis=None):
diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py
index 08d9b42bb..e90c19b4a 100644
--- a/numpy/lib/nanfunctions.py
+++ b/numpy/lib/nanfunctions.py
@@ -23,6 +23,7 @@ Functions
import functools
import warnings
import numpy as np
+from numpy.lib.function_base import _QuantileInterpolation
from numpy.lib import function_base
from numpy.core import overrides
@@ -1229,8 +1230,15 @@ def _nanpercentile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
@array_function_dispatch(_nanpercentile_dispatcher)
-def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=np._NoValue):
+def nanpercentile(
+ a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=np._NoValue,
+):
"""
Compute the qth percentile of the data along the specified axis,
while ignoring nan values.
@@ -1259,18 +1267,74 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
If True, then allow the input array `a` to be modified by intermediate
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+ interpolation : str
+ Possible values: 'linear' (default),
+ 'inverted_cdf', 'averaged_inverted_cdf',
+ 'closest_observation', 'interpolated_inverted_cdf',
+ 'hazen', 'weibull',
+ 'median_unbiased', 'normal_unbiased',
+ 'lower', 'higher',
+ 'midpoint', 'nearest'.
This optional parameter specifies the interpolation method to
- use when the desired percentile lies between two data points
- ``i < j``:
-
- * 'linear': ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
- * 'lower': ``i``.
- * 'higher': ``j``.
- * 'nearest': ``i`` or ``j``, whichever is nearest.
- * 'midpoint': ``(i + j) / 2``.
+ use when the desired quantile lies between two data points ``i < j``.
+ g is the fractional part of the index surrounded by ``i``.
+ alpha and beta are correction constants modifying i and j:
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+ * inverted_cdf:
+ method 1 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then take i
+ * averaged_inverted_cdf:
+ method 2 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then average between bounds
+ * closest_observation:
+ method 3 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 and index is odd ; then take j
+ if g = 0 and index is even ; then take i
+ * interpolated_inverted_cdf:
+ method 4 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 1
+ * hazen:
+ method 5 of H&F.
+ This method give continuous results using:
+ alpha = 1/2
+ beta = 1/2
+ * weibull:
+ method 6 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 0
+ * linear:
+ Default method.
+ method 7 of H&F.
+ This method give continuous results using:
+ alpha = 1
+ beta = 1
+ * median_unbiased:
+ method 8 of H&F.
+ This method is probably the best method if the sample distribution
+ function is unknown (see reference).
+ This method give continuous results using:
+ alpha = 1/3
+ beta = 1/3
+ * normal_unbiased:
+ method 9 of H&F.
+ This method is probably the best method if the sample distribution
+ function is known to be normal.
+ This method give continuous results using:
+ alpha = 3/8
+ beta = 3/8
+ * lower: ``i``.
+ * higher: ``j``.
+ * nearest: ``i`` or ``j``, whichever is nearest.
+ * midpoint: ``(i + j) / 2``.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
@@ -1342,7 +1406,9 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
"""
a = np.asanyarray(a)
- q = np.true_divide(q, 100.0) # handles the asarray for us too
+ q = np.true_divide(q, 100.0)
+ # undo any decay that the ufunc performed (see gh-13105)
+ q = np.asanyarray(q)
if not function_base._quantile_is_valid(q):
raise ValueError("Percentiles must be in the range [0, 100]")
return _nanquantile_unchecked(
@@ -1355,8 +1421,15 @@ def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
@array_function_dispatch(_nanquantile_dispatcher)
-def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=np._NoValue):
+def nanquantile(
+ a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=np._NoValue,
+):
"""
Compute the qth quantile of the data along the specified axis,
while ignoring nan values.
@@ -1384,19 +1457,74 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
If True, then allow the input array `a` to be modified by intermediate
calculations, to save memory. In this case, the contents of the input
`a` after this function completes is undefined.
- interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
+ interpolation : str
+ Possible values: 'linear' (default),
+ 'inverted_cdf', 'averaged_inverted_cdf',
+ 'closest_observation', 'interpolated_inverted_cdf',
+ 'hazen', 'weibull',
+ 'median_unbiased', 'normal_unbiased',
+ 'lower', 'higher',
+ 'midpoint', 'nearest'.
This optional parameter specifies the interpolation method to
- use when the desired quantile lies between two data points
- ``i < j``:
-
- * linear: ``i + (j - i) * fraction``, where ``fraction``
- is the fractional part of the index surrounded by ``i``
- and ``j``.
+ use when the desired quantile lies between two data points ``i < j``.
+ g is the fractional part of the index surrounded by ``i``.
+ alpha and beta are correction constants modifying i and j:
+ i + g = (q - alpha) / ( n - alpha - beta + 1 )
+ * inverted_cdf:
+ method 1 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then take i
+ * averaged_inverted_cdf:
+ method 2 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 ; then average between bounds
+ * closest_observation:
+ method 3 of H&F.
+ This method give discontinuous results:
+ if g > 0 ; then take j
+ if g = 0 and index is odd ; then take j
+ if g = 0 and index is even ; then take i
+ * interpolated_inverted_cdf:
+ method 4 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 1
+ * hazen:
+ method 5 of H&F.
+ This method give continuous results using:
+ alpha = 1/2
+ beta = 1/2
+ * weibull:
+ method 6 of H&F.
+ This method give continuous results using:
+ alpha = 0
+ beta = 0
+ * linear:
+ Default method.
+ method 7 of H&F.
+ This method give continuous results using:
+ alpha = 1
+ beta = 1
+ * median_unbiased:
+ method 8 of H&F.
+ This method is probably the best method if the sample distribution
+ function is unknown (see reference).
+ This method give continuous results using:
+ alpha = 1/3
+ beta = 1/3
+ * normal_unbiased:
+ method 9 of H&F.
+ This method is probably the best method if the sample distribution
+ function is known to be normal.
+ This method give continuous results using:
+ alpha = 3/8
+ beta = 3/8
* lower: ``i``.
* higher: ``j``.
* nearest: ``i`` or ``j``, whichever is nearest.
* midpoint: ``(i + j) / 2``.
-
keepdims : bool, optional
If this is set to True, the axes which are reduced are left in
the result as dimensions with size one. With this option, the
@@ -1462,26 +1590,39 @@ def nanquantile(a, q, axis=None, out=None, overwrite_input=False,
a, q, axis, out, overwrite_input, interpolation, keepdims)
-def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear', keepdims=np._NoValue):
+def _nanquantile_unchecked(
+ a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation="linear",
+ keepdims=np._NoValue,
+):
"""Assumes that q is in [0, 1], and is an ndarray"""
# apply_along_axis in _nanpercentile doesn't handle empty arrays well,
# so deal them upfront
if a.size == 0:
return np.nanmean(a, axis, out=out, keepdims=keepdims)
-
- r, k = function_base._ureduce(
- a, func=_nanquantile_ureduce_func, q=q, axis=axis, out=out,
- overwrite_input=overwrite_input, interpolation=interpolation
- )
+ r, k = function_base._ureduce(a,
+ func=_nanquantile_ureduce_func,
+ q=q,
+ axis=axis,
+ out=out,
+ overwrite_input=overwrite_input,
+ interpolation=interpolation)
if keepdims and keepdims is not np._NoValue:
return r.reshape(q.shape + k)
else:
return r
-def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
- interpolation='linear'):
+def _nanquantile_ureduce_func(a,
+ q,
+ axis=None,
+ out=None,
+ overwrite_input=False,
+ interpolation= "linear"):
"""
Private function that doesn't support extended axis or keepdims.
These methods are extended to this function using _ureduce
@@ -1504,7 +1645,10 @@ def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
return result
-def _nanquantile_1d(arr1d, q, overwrite_input=False, interpolation='linear'):
+def _nanquantile_1d(arr1d,
+ q,
+ overwrite_input=False,
+ interpolation= "linear"):
"""
Private function for rank 1 arrays. Compute quantile ignoring NaNs.
See nanpercentile for parameter usage
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index c7dfe5673..4f0db7bdb 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -2903,36 +2903,134 @@ class TestPercentile:
[1, 1, 1]])
assert_array_equal(np.percentile(x, 50, axis=0), [1, 1, 1])
- def test_linear(self):
-
+ @pytest.mark.parametrize("dtype", np.typecodes["AllFloat"])
+ def test_linear_nan_1D(self, dtype):
+ # METHOD 1 of H&F
+ arr = np.asarray([15.0, np.NAN, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="linear")
+ np.testing.assert_equal(res, np.NAN)
+ np.testing.assert_equal(res.dtype, arr.dtype)
+
+ TYPE_CODES = np.typecodes["AllInteger"] + np.typecodes["AllFloat"] + "O"
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_inverted_cdf(self, dtype):
+ # METHOD 1 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="inverted_cdf")
+ np.testing.assert_almost_equal(res, 20, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_averaged_inverted_cdf(self, dtype):
+ # METHOD 2 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="averaged_inverted_cdf")
+ np.testing.assert_almost_equal(res, 27.5, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_closest_observation(self, dtype):
+ # METHOD 3 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="closest_observation")
+ np.testing.assert_almost_equal(res, 20, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_interpolated_inverted_cdf(self, dtype):
+ # METHOD 4 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="interpolated_inverted_cdf")
+ np.testing.assert_almost_equal(res, 20, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_hazen(self, dtype):
+ # METHOD 5 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="hazen")
+ np.testing.assert_almost_equal(res, 27.5, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_weibull(self, dtype):
+ # METHOD 6 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="weibull")
+ np.testing.assert_almost_equal(res, 26, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_linear(self, dtype):
+ # METHOD 7 of H&F
# Test defaults
assert_equal(np.percentile(range(10), 50), 4.5)
-
- # explicitly specify interpolation_method 'linear' (the default)
- assert_equal(np.percentile(range(10), 50,
- interpolation='linear'), 4.5)
-
- def test_lower_higher(self):
-
- # interpolation_method 'lower'/'higher'
- assert_equal(np.percentile(range(10), 50,
+ # explicit interpolation_method (the default)
+ res = np.percentile([15.0, 20.0, 35.0, 40.0, 50.0],
+ 40,
+ interpolation="linear")
+ np.testing.assert_almost_equal(res, 29, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_median_unbiased(self, dtype):
+ # METHOD 8 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="median_unbiased")
+ np.testing.assert_almost_equal(res, 27, 14)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_linear_normal_unbiased(self, dtype):
+ # METHOD 9 of H&F
+ arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=dtype)
+ res = np.percentile(
+ arr,
+ 40.0,
+ interpolation="normal_unbiased")
+ np.testing.assert_almost_equal(res, 27.125, 15)
+
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_lower_higher(self, dtype):
+ assert_equal(np.percentile(np.arange(10, dtype=dtype), 50,
interpolation='lower'), 4)
- assert_equal(np.percentile(range(10), 50,
+ assert_equal(np.percentile(np.arange(10, dtype=dtype), 50,
interpolation='higher'), 5)
- def test_midpoint(self):
- assert_equal(np.percentile(range(10), 51,
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_midpoint(self, dtype):
+ assert_equal(np.percentile(np.arange(10, dtype=dtype), 51,
interpolation='midpoint'), 4.5)
- assert_equal(np.percentile(range(11), 51,
+ assert_equal(np.percentile(np.arange(9, dtype=dtype) + 1, 50,
+ interpolation='midpoint'), 5)
+ assert_equal(np.percentile(np.arange(11, dtype=dtype), 51,
interpolation='midpoint'), 5.5)
- assert_equal(np.percentile(range(11), 50,
+ assert_equal(np.percentile(np.arange(11, dtype=dtype), 50,
interpolation='midpoint'), 5)
- def test_nearest(self):
- assert_equal(np.percentile(range(10), 51,
+ @pytest.mark.parametrize("dtype", TYPE_CODES)
+ def test_nearest(self, dtype):
+ assert_equal(np.percentile(np.arange(10, dtype=dtype), 51,
interpolation='nearest'), 5)
- assert_equal(np.percentile(range(10), 49,
- interpolation='nearest'), 4)
+ assert_equal(np.percentile(np.arange(10, dtype=dtype), 49,
+ interpolation='nearest'), 4)
def test_sequence(self):
x = np.arange(8) * 0.5
@@ -3038,18 +3136,18 @@ class TestPercentile:
y = np.zeros((3,))
p = (1, 2, 3)
np.percentile(x, p, out=y)
- assert_equal(y, np.percentile(x, p))
+ assert_equal(np.percentile(x, p), y)
x = np.array([[1, 2, 3],
[4, 5, 6]])
y = np.zeros((3, 3))
np.percentile(x, p, axis=0, out=y)
- assert_equal(y, np.percentile(x, p, axis=0))
+ assert_equal(np.percentile(x, p, axis=0), y)
y = np.zeros((3, 2))
np.percentile(x, p, axis=1, out=y)
- assert_equal(y, np.percentile(x, p, axis=1))
+ assert_equal(np.percentile(x, p, axis=1), y)
x = np.arange(12).reshape(3, 4)
# q.dim > 1, float
@@ -3293,6 +3391,7 @@ class TestPercentile:
with pytest.raises(ValueError, match="Percentiles must be in"):
np.percentile([1, 2, 3, 4.0], q)
+
class TestQuantile:
# most of this is already tested by TestPercentile
@@ -3302,6 +3401,7 @@ class TestQuantile:
assert_equal(np.quantile(x, 1), 3.5)
assert_equal(np.quantile(x, 0.5), 1.75)
+ @pytest.mark.xfail(reason="See gh-19154")
def test_correct_quantile_value(self):
a = np.array([True])
tf_quant = np.quantile(True, False)
@@ -3310,12 +3410,11 @@ class TestQuantile:
a = np.array([False, True, True])
quant_res = np.quantile(a, a)
assert_array_equal(quant_res, a)
- assert_equal(a.dtype, quant_res.dtype)
+ assert_equal(quant_res.dtype, a.dtype)
def test_fraction(self):
# fractional input, integral quantile
x = [Fraction(i, 2) for i in range(8)]
-
q = np.quantile(x, 0)
assert_equal(q, 0)
assert_equal(type(q), Fraction)
@@ -3352,6 +3451,12 @@ class TestQuantile:
np.quantile(np.arange(100.), p, interpolation="midpoint")
assert_array_equal(p, p0)
+ @pytest.mark.parametrize("dtype", np.typecodes["AllInteger"])
+ def test_quantile_preserve_int_type(self, dtype):
+ res = np.quantile(np.array([1, 2], dtype=dtype), [0.5],
+ interpolation="nearest")
+ assert res.dtype == dtype
+
def test_quantile_monotonic(self):
# GH 14685
# test that the return value of quantile is monotonic if p0 is ordered
@@ -3380,9 +3485,9 @@ class TestLerp:
min_value=-1e300, max_value=1e300),
b = st.floats(allow_nan=False, allow_infinity=False,
min_value=-1e300, max_value=1e300))
- def test_lerp_monotonic(self, t0, t1, a, b):
- l0 = np.lib.function_base._lerp(a, b, t0)
- l1 = np.lib.function_base._lerp(a, b, t1)
+ def test_linear_interpolation_formula_monotonic(self, t0, t1, a, b):
+ l0 = nfb._linear_interpolation_formula(a, b, t0)
+ l1 = nfb._linear_interpolation_formula(a, b, t1)
if t0 == t1 or a == b:
assert l0 == l1 # uninteresting
elif (t0 < t1) == (a < b):
@@ -3396,11 +3501,11 @@ class TestLerp:
min_value=-1e300, max_value=1e300),
b=st.floats(allow_nan=False, allow_infinity=False,
min_value=-1e300, max_value=1e300))
- def test_lerp_bounded(self, t, a, b):
+ def test_linear_interpolation_formula_bounded(self, t, a, b):
if a <= b:
- assert a <= np.lib.function_base._lerp(a, b, t) <= b
+ assert a <= nfb._linear_interpolation_formula(a, b, t) <= b
else:
- assert b <= np.lib.function_base._lerp(a, b, t) <= a
+ assert b <= nfb._linear_interpolation_formula(a, b, t) <= a
@hypothesis.given(t=st.floats(allow_nan=False, allow_infinity=False,
min_value=0, max_value=1),
@@ -3408,17 +3513,17 @@ class TestLerp:
min_value=-1e300, max_value=1e300),
b=st.floats(allow_nan=False, allow_infinity=False,
min_value=-1e300, max_value=1e300))
- def test_lerp_symmetric(self, t, a, b):
+ def test_linear_interpolation_formula_symmetric(self, t, a, b):
# double subtraction is needed to remove the extra precision of t < 0.5
- left = np.lib.function_base._lerp(a, b, 1 - (1 - t))
- right = np.lib.function_base._lerp(b, a, 1 - t)
+ left = nfb._linear_interpolation_formula(a, b, 1 - (1 - t))
+ right = nfb._linear_interpolation_formula(b, a, 1 - t)
assert left == right
- def test_lerp_0d_inputs(self):
+ def test_linear_interpolation_formula_0d_inputs(self):
a = np.array(2)
b = np.array(5)
t = np.array(0.2)
- assert np.lib.function_base._lerp(a, b, t) == 2.6
+ assert nfb._linear_interpolation_formula(a, b, t) == 2.6
class TestMedian: