diff options
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 105 |
1 files changed, 100 insertions, 5 deletions
diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 09d1195ad..b0944ec85 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -3538,9 +3538,20 @@ class TestPercentile: np.percentile([1, 2, 3, 4.0], q) +quantile_methods = [ + 'inverted_cdf', 'averaged_inverted_cdf', 'closest_observation', + 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear', + 'median_unbiased', 'normal_unbiased', 'nearest', 'lower', 'higher', + 'midpoint'] + + class TestQuantile: # most of this is already tested by TestPercentile + def V(self, x, y, alpha): + # Identification function used in several tests. + return (x >= y) - alpha + def test_max_ulp(self): x = [0.0, 0.2, 0.4] a = np.quantile(x, 0.45) @@ -3619,11 +3630,7 @@ class TestQuantile: method="nearest") assert res.dtype == dtype - @pytest.mark.parametrize("method", - ['inverted_cdf', 'averaged_inverted_cdf', 'closest_observation', - 'interpolated_inverted_cdf', 'hazen', 'weibull', 'linear', - 'median_unbiased', 'normal_unbiased', - 'nearest', 'lower', 'higher', 'midpoint']) + @pytest.mark.parametrize("method", quantile_methods) def test_quantile_monotonic(self, method): # GH 14685 # test that the return value of quantile is monotonic if p0 is ordered @@ -3654,6 +3661,94 @@ class TestQuantile: assert np.isscalar(actual) assert_equal(np.quantile(a, 0.5), np.nan) + @pytest.mark.parametrize("method", quantile_methods) + @pytest.mark.parametrize("alpha", [0.2, 0.5, 0.9]) + def test_quantile_identification_equation(self, method, alpha): + # Test that the identification equation holds for the empirical + # CDF: + # E[V(x, Y)] = 0 <=> x is quantile + # with Y the random variable for which we have observed values and + # V(x, y) the canonical identification function for the quantile (at + # level alpha), see + # https://doi.org/10.48550/arXiv.0912.0902 + rng = np.random.default_rng(4321) + # We choose n and alpha such that we cover 3 cases: + # - n * alpha is an integer + # - n * alpha is a float that gets rounded down + # - n * alpha is a float that gest rounded up + n = 102 # n * alpha = 20.4, 51. , 91.8 + y = rng.random(n) + x = np.quantile(y, alpha, method=method) + if method in ("higher",): + # These methods do not fulfill the identification equation. + assert np.abs(np.mean(self.V(x, y, alpha))) > 0.1 / n + elif int(n * alpha) == n * alpha: + # We can expect exact results, up to machine precision. + assert_allclose(np.mean(self.V(x, y, alpha)), 0, atol=1e-14) + else: + # V = (x >= y) - alpha cannot sum to zero exactly but within + # "sample precision". + assert_allclose(np.mean(self.V(x, y, alpha)), 0, + atol=1 / n / np.amin([alpha, 1 - alpha])) + + @pytest.mark.parametrize("method", quantile_methods) + @pytest.mark.parametrize("alpha", [0.2, 0.5, 0.9]) + def test_quantile_add_and_multiply_constant(self, method, alpha): + # Test that + # 1. quantile(c + x) = c + quantile(x) + # 2. quantile(c * x) = c * quantile(x) + # 3. quantile(-x) = -quantile(x, 1 - alpha) + # On empirical quantiles, this equation does not hold exactly. + # Koenker (2005) "Quantile Regression" Chapter 2.2.3 calls these + # properties equivariance. + rng = np.random.default_rng(4321) + # We choose n and alpha such that we have cases for + # - n * alpha is an integer + # - n * alpha is a float that gets rounded down + # - n * alpha is a float that gest rounded up + n = 102 # n * alpha = 20.4, 51. , 91.8 + y = rng.random(n) + q = np.quantile(y, alpha, method=method) + c = 13.5 + + # 1 + assert_allclose(np.quantile(c + y, alpha, method=method), c + q) + # 2 + assert_allclose(np.quantile(c * y, alpha, method=method), c * q) + # 3 + q = -np.quantile(-y, 1 - alpha, method=method) + if method == "inverted_cdf": + if ( + n * alpha == int(n * alpha) + or np.round(n * alpha) == int(n * alpha) + 1 + ): + assert_allclose(q, np.quantile(y, alpha, method="higher")) + else: + assert_allclose(q, np.quantile(y, alpha, method="lower")) + elif method == "closest_observation": + if n * alpha == int(n * alpha): + assert_allclose(q, np.quantile(y, alpha, method="higher")) + elif np.round(n * alpha) == int(n * alpha) + 1: + assert_allclose( + q, np.quantile(y, alpha + 1/n, method="higher")) + else: + assert_allclose(q, np.quantile(y, alpha, method="lower")) + elif method == "interpolated_inverted_cdf": + assert_allclose(q, np.quantile(y, alpha + 1/n, method=method)) + elif method == "nearest": + if n * alpha == int(n * alpha): + assert_allclose(q, np.quantile(y, alpha + 1/n, method=method)) + else: + assert_allclose(q, np.quantile(y, alpha, method=method)) + elif method == "lower": + assert_allclose(q, np.quantile(y, alpha, method="higher")) + elif method == "higher": + assert_allclose(q, np.quantile(y, alpha, method="lower")) + else: + # "averaged_inverted_cdf", "hazen", "weibull", "linear", + # "median_unbiased", "normal_unbiased", "midpoint" + assert_allclose(q, np.quantile(y, alpha, method=method)) + class TestLerp: @hypothesis.given(t0=st.floats(allow_nan=False, allow_infinity=False, |