summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/lib/tests/test_function_base.py105
1 files changed, 100 insertions, 5 deletions
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py
index 09d1195ad..b0944ec85 100644
--- a/numpy/lib/tests/test_function_base.py
+++ b/numpy/lib/tests/test_function_base.py
@@ -3538,9 +3538,20 @@ class TestPercentile:
np.percentile([1, 2, 3, 4.0], q)
+quantile_methods = [
+ 'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
+ 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
+ 'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher',
+ 'midpoint']
+
+
class TestQuantile:
# most of this is already tested by TestPercentile
+ def V(self, x, y, alpha):
+ # Identification function used in several tests.
+ return (x >= y) - alpha
+
def test_max_ulp(self):
x = [0.0, 0.2, 0.4]
a = np.quantile(x, 0.45)
@@ -3619,11 +3630,7 @@ class TestQuantile:
method="nearest")
assert res.dtype == dtype
- @pytest.mark.parametrize("method",
- ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation',
- 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear',
- 'median_unbiased', 'normal_unbiased',
- 'nearest', 'lower', 'higher', 'midpoint'])
+ @pytest.mark.parametrize("method", quantile_methods)
def test_quantile_monotonic(self, method):
# GH 14685
# test that the return value of quantile is monotonic if p0 is ordered
@@ -3654,6 +3661,94 @@ class TestQuantile:
assert np.isscalar(actual)
assert_equal(np.quantile(a, 0.5), np.nan)
+ @pytest.mark.parametrize("method", quantile_methods)
+ @pytest.mark.parametrize("alpha", [0.2, 0.5, 0.9])
+ def test_quantile_identification_equation(self, method, alpha):
+ # Test that the identification equation holds for the empirical
+ # CDF:
+ # E[V(x, Y)] = 0 <=> x is quantile
+ # with Y the random variable for which we have observed values and
+ # V(x, y) the canonical identification function for the quantile (at
+ # level alpha), see
+ # https://doi.org/10.48550/arXiv.0912.0902
+ rng = np.random.default_rng(4321)
+ # We choose n and alpha such that we cover 3 cases:
+ # - n * alpha is an integer
+ # - n * alpha is a float that gets rounded down
+ # - n * alpha is a float that gest rounded up
+ n = 102 # n * alpha = 20.4, 51. , 91.8
+ y = rng.random(n)
+ x = np.quantile(y, alpha, method=method)
+ if method in ("higher",):
+ # These methods do not fulfill the identification equation.
+ assert np.abs(np.mean(self.V(x, y, alpha))) > 0.1 / n
+ elif int(n * alpha) == n * alpha:
+ # We can expect exact results, up to machine precision.
+ assert_allclose(np.mean(self.V(x, y, alpha)), 0, atol=1e-14)
+ else:
+ # V = (x >= y) - alpha cannot sum to zero exactly but within
+ # "sample precision".
+ assert_allclose(np.mean(self.V(x, y, alpha)), 0,
+ atol=1 / n / np.amin([alpha, 1 - alpha]))
+
+ @pytest.mark.parametrize("method", quantile_methods)
+ @pytest.mark.parametrize("alpha", [0.2, 0.5, 0.9])
+ def test_quantile_add_and_multiply_constant(self, method, alpha):
+ # Test that
+ # 1. quantile(c + x) = c + quantile(x)
+ # 2. quantile(c * x) = c * quantile(x)
+ # 3. quantile(-x) = -quantile(x, 1 - alpha)
+ # On empirical quantiles, this equation does not hold exactly.
+ # Koenker (2005) "Quantile Regression" Chapter 2.2.3 calls these
+ # properties equivariance.
+ rng = np.random.default_rng(4321)
+ # We choose n and alpha such that we have cases for
+ # - n * alpha is an integer
+ # - n * alpha is a float that gets rounded down
+ # - n * alpha is a float that gest rounded up
+ n = 102 # n * alpha = 20.4, 51. , 91.8
+ y = rng.random(n)
+ q = np.quantile(y, alpha, method=method)
+ c = 13.5
+
+ # 1
+ assert_allclose(np.quantile(c + y, alpha, method=method), c + q)
+ # 2
+ assert_allclose(np.quantile(c * y, alpha, method=method), c * q)
+ # 3
+ q = -np.quantile(-y, 1 - alpha, method=method)
+ if method == "inverted_cdf":
+ if (
+ n * alpha == int(n * alpha)
+ or np.round(n * alpha) == int(n * alpha) + 1
+ ):
+ assert_allclose(q, np.quantile(y, alpha, method="higher"))
+ else:
+ assert_allclose(q, np.quantile(y, alpha, method="lower"))
+ elif method == "closest_observation":
+ if n * alpha == int(n * alpha):
+ assert_allclose(q, np.quantile(y, alpha, method="higher"))
+ elif np.round(n * alpha) == int(n * alpha) + 1:
+ assert_allclose(
+ q, np.quantile(y, alpha + 1/n, method="higher"))
+ else:
+ assert_allclose(q, np.quantile(y, alpha, method="lower"))
+ elif method == "interpolated_inverted_cdf":
+ assert_allclose(q, np.quantile(y, alpha + 1/n, method=method))
+ elif method == "nearest":
+ if n * alpha == int(n * alpha):
+ assert_allclose(q, np.quantile(y, alpha + 1/n, method=method))
+ else:
+ assert_allclose(q, np.quantile(y, alpha, method=method))
+ elif method == "lower":
+ assert_allclose(q, np.quantile(y, alpha, method="higher"))
+ elif method == "higher":
+ assert_allclose(q, np.quantile(y, alpha, method="lower"))
+ else:
+ # "averaged_inverted_cdf", "hazen", "weibull", "linear",
+ # "median_unbiased", "normal_unbiased", "midpoint"
+ assert_allclose(q, np.quantile(y, alpha, method=method))
+
class TestLerp:
@hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False,