diff options
author | Cheahuychou Mao <mao.cheahuychou@gmail.com> | 2022-09-19 23:54:14 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-20 19:14:39 +0000 |
commit | 0baa1bacf682849154e1df565f0bd63ee203c8ca (patch) | |
tree | ba481de5a12cebe8cde8d21aae11a6e71875b1d2 | |
parent | 0b96a8f6f85bb2ec55c2f96b98344105d3a92d24 (diff) | |
download | mongo-0baa1bacf682849154e1df565f0bd63ee203c8ca.tar.gz |
SERVER-69583 Add boost math/statistics library to boost_get_sources
11 files changed, 3276 insertions, 1 deletions
diff --git a/src/third_party/boost/boost/math/statistics/anderson_darling.hpp b/src/third_party/boost/boost/math/statistics/anderson_darling.hpp new file mode 100644 index 00000000000..f892f27e0f6 --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/anderson_darling.hpp @@ -0,0 +1,112 @@ +/* + * Copyright Nick Thompson, 2019 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#ifndef BOOST_MATH_STATISTICS_ANDERSON_DARLING_HPP +#define BOOST_MATH_STATISTICS_ANDERSON_DARLING_HPP + +#include <cmath> +#include <algorithm> +#include <boost/math/statistics/univariate_statistics.hpp> +#include <boost/math/special_functions/erf.hpp> + +namespace boost { namespace math { namespace statistics { + +template<class RandomAccessContainer> +auto anderson_darling_normality_statistic(RandomAccessContainer const & v, + typename RandomAccessContainer::value_type mu = std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN(), + typename RandomAccessContainer::value_type sd = std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN()) +{ + using Real = typename RandomAccessContainer::value_type; + using std::log; + using std::sqrt; + using boost::math::erfc; + + if (std::isnan(mu)) { + mu = boost::math::statistics::mean(v); + } + if (std::isnan(sd)) { + sd = sqrt(boost::math::statistics::sample_variance(v)); + } + + typedef boost::math::policies::policy< + boost::math::policies::promote_float<false>, + boost::math::policies::promote_double<false> > + no_promote_policy; + + // This is where Knuth's literate programming could really come in handy! + // I need some LaTeX. The idea is that before any observation, the ecdf is identically zero. + // So we need to compute: + // \int_{-\infty}^{v_0} \frac{F(x)F'(x)}{1- F(x)} \, \mathrm{d}x, where F(x) := \frac{1}{2}[1+\erf(\frac{x-\mu}{\sigma \sqrt{2}})] + // Astonishingly, there is an analytic evaluation to this integral, as you can validate with the following Mathematica command: + // Integrate[(1/2 (1 + Erf[(x - mu)/Sqrt[2*sigma^2]])*Exp[-(x - mu)^2/(2*sigma^2)]*1/Sqrt[2*\[Pi]*sigma^2])/(1 - 1/2 (1 + Erf[(x - mu)/Sqrt[2*sigma^2]])), + // {x, -Infinity, x0}, Assumptions -> {x0 \[Element] Reals && mu \[Element] Reals && sigma > 0}] + // This gives (for s = x-mu/sqrt(2sigma^2)) + // -1/2 + erf(s) + log(2/(1+erf(s))) + + + Real inv_var_scale = 1/(sd*sqrt(Real(2))); + Real s0 = (v[0] - mu)*inv_var_scale; + Real erfcs0 = erfc(s0, no_promote_policy()); + // Note that if erfcs0 == 0, then left_tail = inf (numerically), and hence the entire integral is numerically infinite: + if (erfcs0 <= 0) { + return std::numeric_limits<Real>::infinity(); + } + + // Note that we're going to add erfcs0/2 when we compute the integral over [x_0, x_1], so drop it here: + Real left_tail = -1 + log(Real(2)); + + + // For the right tail, the ecdf is identically 1. + // Hence we need the integral: + // \int_{v_{n-1}}^{\infty} \frac{(1-F(x))F'(x)}{F(x)} \, \mathrm{d}x + // This also has an analytic evaluation! It can be found via the following Mathematica command: + // Integrate[(E^(-(z^2/2)) *(1 - 1/2 (1 + Erf[z/Sqrt[2]])))/(Sqrt[2 \[Pi]] (1/2 (1 + Erf[z/Sqrt[2]]))), + // {z, zn, \[Infinity]}, Assumptions -> {zn \[Element] Reals && mu \[Element] Reals}] + // This gives (for sf = xf-mu/sqrt(2sigma^2)) + // -1/2 + erf(sf)/2 + 2log(2/(1+erf(sf))) + + Real sf = (v[v.size()-1] - mu)*inv_var_scale; + //Real erfcsf = erfc<Real>(sf, no_promote_policy()); + // This is the actual value of the tail integral. However, the -erfcsf/2 cancels from the integral over [v_{n-2}, v_{n-1}]: + //Real right_tail = -erfcsf/2 + log(Real(2)) - log(2-erfcsf); + + // Use erfc(-x) = 2 - erfc(x) + Real erfcmsf = erfc<Real>(-sf, no_promote_policy()); + // Again if this is precisely zero then the integral is numerically infinite: + if (erfcmsf == 0) { + return std::numeric_limits<Real>::infinity(); + } + Real right_tail = log(2/erfcmsf); + + // Now we need each integral: + // \int_{v_i}^{v_{i+1}} \frac{(i+1/n - F(x))^2F'(x)}{F(x)(1-F(x))} \, \mathrm{d}x + // Again we get an analytical evaluation via the following Mathematica command: + // Integrate[((E^(-(z^2/2))/Sqrt[2 \[Pi]])*(k1 - F[z])^2)/(F[z]*(1 - F[z])), + // {z, z1, z2}, Assumptions -> {z1 \[Element] Reals && z2 \[Element] Reals &&k1 \[Element] Reals}] // FullSimplify + + Real integrals = 0; + int64_t N = v.size(); + for (int64_t i = 0; i < N - 1; ++i) { + if (v[i] > v[i+1]) { + throw std::domain_error("Input data must be sorted in increasing order v[0] <= v[1] <= . . . <= v[n-1]"); + } + + Real k = (i+1)/Real(N); + Real s1 = (v[i+1]-mu)*inv_var_scale; + Real erfcs1 = erfc<Real>(s1, no_promote_policy()); + Real term = k*(k*log(erfcs0*(-2 + erfcs1)/(erfcs1*(-2 + erfcs0))) + 2*log(erfcs1/erfcs0)); + + integrals += term; + s0 = s1; + erfcs0 = erfcs1; + } + integrals -= log(erfcs0); + return v.size()*(left_tail + right_tail + integrals); +} + +}}} +#endif diff --git a/src/third_party/boost/boost/math/statistics/bivariate_statistics.hpp b/src/third_party/boost/boost/math/statistics/bivariate_statistics.hpp new file mode 100644 index 00000000000..1854898139b --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/bivariate_statistics.hpp @@ -0,0 +1,473 @@ +// (C) Copyright Nick Thompson 2018. +// (C) Copyright Matt Borland 2021. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_BIVARIATE_STATISTICS_HPP +#define BOOST_MATH_STATISTICS_BIVARIATE_STATISTICS_HPP + +#include <iterator> +#include <tuple> +#include <type_traits> +#include <stdexcept> +#include <vector> +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <boost/math/tools/assert.hpp> +#include <boost/math/tools/config.hpp> + +// Support compilers with P0024R2 implemented without linking TBB +// https://en.cppreference.com/w/cpp/compiler_support +#if !defined(BOOST_NO_CXX17_HDR_EXECUTION) && defined(BOOST_HAS_THREADS) +#include <execution> +#include <future> +#include <thread> +#define EXEC_COMPATIBLE +#endif + +namespace boost{ namespace math{ namespace statistics { namespace detail { + +// See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al. +template<typename ReturnType, typename ForwardIterator> +ReturnType means_and_covariance_seq_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + Real cov = 0; + ForwardIterator u_it = u_begin; + ForwardIterator v_it = v_begin; + Real mu_u = *u_it++; + Real mu_v = *v_it++; + std::size_t i = 1; + + while(u_it != u_end && v_it != v_end) + { + Real u_temp = (*u_it++ - mu_u)/(i+1); + Real v_temp = *v_it++ - mu_v; + cov += i*u_temp*v_temp; + mu_u = mu_u + u_temp; + mu_v = mu_v + v_temp/(i+1); + i = i + 1; + } + + if(u_it != u_end || v_it != v_end) + { + throw std::domain_error("The size of each sample set must be the same to compute covariance"); + } + + return std::make_tuple(mu_u, mu_v, cov/i, Real(i)); +} + +#ifdef EXEC_COMPATIBLE + +// Numerically stable parallel computation of (co-)variance +// https://dl.acm.org/doi/10.1145/3221269.3223036 +template<typename ReturnType, typename ForwardIterator> +ReturnType means_and_covariance_parallel_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + const auto u_elements = std::distance(u_begin, u_end); + const auto v_elements = std::distance(v_begin, v_end); + + if(u_elements != v_elements) + { + throw std::domain_error("The size of each sample set must be the same to compute covariance"); + } + + const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency(); + unsigned num_threads = 2u; + + // 5.16 comes from benchmarking. See boost/math/reporting/performance/bivariate_statistics_performance.cpp + // Threading is faster for: 10 + 5.16e-3 N/j <= 5.16e-3N => N >= 10^4j/5.16(j-1). + const auto parallel_lower_bound = 10e4*max_concurrency/(5.16*(max_concurrency-1)); + const auto parallel_upper_bound = 10e4*2/5.16; // j = 2 + + // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/ + if(u_elements < parallel_lower_bound) + { + return means_and_covariance_seq_impl<ReturnType>(u_begin, u_end, v_begin, v_end); + } + else if(u_elements >= parallel_upper_bound) + { + num_threads = max_concurrency; + } + else + { + for(unsigned i = 3; i < max_concurrency; ++i) + { + if(parallel_lower_bound < 10e4*i/(5.16*(i-1))) + { + num_threads = i; + break; + } + } + } + + std::vector<std::future<ReturnType>> future_manager; + const auto elements_per_thread = std::ceil(static_cast<double>(u_elements)/num_threads); + + ForwardIterator u_it = u_begin; + ForwardIterator v_it = v_begin; + + for(std::size_t i = 0; i < num_threads - 1; ++i) + { + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, v_it, elements_per_thread]() -> ReturnType + { + return means_and_covariance_seq_impl<ReturnType>(u_it, std::next(u_it, elements_per_thread), v_it, std::next(v_it, elements_per_thread)); + })); + u_it = std::next(u_it, elements_per_thread); + v_it = std::next(v_it, elements_per_thread); + } + + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, u_end, v_it, v_end]() -> ReturnType + { + return means_and_covariance_seq_impl<ReturnType>(u_it, u_end, v_it, v_end); + })); + + ReturnType temp = future_manager[0].get(); + Real mu_u_a = std::get<0>(temp); + Real mu_v_a = std::get<1>(temp); + Real cov_a = std::get<2>(temp); + Real n_a = std::get<3>(temp); + + for(std::size_t i = 1; i < future_manager.size(); ++i) + { + temp = future_manager[i].get(); + Real mu_u_b = std::get<0>(temp); + Real mu_v_b = std::get<1>(temp); + Real cov_b = std::get<2>(temp); + Real n_b = std::get<3>(temp); + + const Real n_ab = n_a + n_b; + const Real delta_u = mu_u_b - mu_u_a; + const Real delta_v = mu_v_b - mu_v_a; + + cov_a = cov_a + cov_b + (-delta_u)*(-delta_v)*((n_a*n_b)/n_ab); + mu_u_a = mu_u_a + delta_u*(n_b/n_ab); + mu_v_a = mu_v_a + delta_v*(n_b/n_ab); + n_a = n_ab; + } + + return std::make_tuple(mu_u_a, mu_v_a, cov_a, n_a); +} + +#endif // EXEC_COMPATIBLE + +template<typename ReturnType, typename ForwardIterator> +ReturnType correlation_coefficient_seq_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using std::sqrt; + + Real cov = 0; + ForwardIterator u_it = u_begin; + ForwardIterator v_it = v_begin; + Real mu_u = *u_it++; + Real mu_v = *v_it++; + Real Qu = 0; + Real Qv = 0; + std::size_t i = 1; + + while(u_it != u_end && v_it != v_end) + { + Real u_tmp = *u_it++ - mu_u; + Real v_tmp = *v_it++ - mu_v; + Qu = Qu + (i*u_tmp*u_tmp)/(i+1); + Qv = Qv + (i*v_tmp*v_tmp)/(i+1); + cov += i*u_tmp*v_tmp/(i+1); + mu_u = mu_u + u_tmp/(i+1); + mu_v = mu_v + v_tmp/(i+1); + ++i; + } + + + // If one dataset is constant, then the correlation coefficient is undefined. + // See https://stats.stackexchange.com/questions/23676/normalized-correlation-with-a-constant-vector + // Thanks to zbjornson for pointing this out. + if (Qu == 0 || Qv == 0) + { + return std::make_tuple(mu_u, Qu, mu_v, Qv, cov, std::numeric_limits<Real>::quiet_NaN(), Real(i)); + } + + // Make sure rho in [-1, 1], even in the presence of numerical noise. + Real rho = cov/sqrt(Qu*Qv); + if (rho > 1) { + rho = 1; + } + if (rho < -1) { + rho = -1; + } + + return std::make_tuple(mu_u, Qu, mu_v, Qv, cov, rho, Real(i)); +} + +#ifdef EXEC_COMPATIBLE + +// Numerically stable parallel computation of (co-)variance: +// https://dl.acm.org/doi/10.1145/3221269.3223036 +// +// Parallel computation of variance: +// http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf +template<typename ReturnType, typename ForwardIterator> +ReturnType correlation_coefficient_parallel_impl(ForwardIterator u_begin, ForwardIterator u_end, ForwardIterator v_begin, ForwardIterator v_end) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + const auto u_elements = std::distance(u_begin, u_end); + const auto v_elements = std::distance(v_begin, v_end); + + if(u_elements != v_elements) + { + throw std::domain_error("The size of each sample set must be the same to compute covariance"); + } + + const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency(); + unsigned num_threads = 2u; + + // 3.25 comes from benchmarking. See boost/math/reporting/performance/bivariate_statistics_performance.cpp + // Threading is faster for: 10 + 3.25e-3 N/j <= 3.25e-3N => N >= 10^4j/3.25(j-1). + const auto parallel_lower_bound = 10e4*max_concurrency/(3.25*(max_concurrency-1)); + const auto parallel_upper_bound = 10e4*2/3.25; // j = 2 + + // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/ + if(u_elements < parallel_lower_bound) + { + return correlation_coefficient_seq_impl<ReturnType>(u_begin, u_end, v_begin, v_end); + } + else if(u_elements >= parallel_upper_bound) + { + num_threads = max_concurrency; + } + else + { + for(unsigned i = 3; i < max_concurrency; ++i) + { + if(parallel_lower_bound < 10e4*i/(3.25*(i-1))) + { + num_threads = i; + break; + } + } + } + + std::vector<std::future<ReturnType>> future_manager; + const auto elements_per_thread = std::ceil(static_cast<double>(u_elements)/num_threads); + + ForwardIterator u_it = u_begin; + ForwardIterator v_it = v_begin; + + for(std::size_t i = 0; i < num_threads - 1; ++i) + { + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, v_it, elements_per_thread]() -> ReturnType + { + return correlation_coefficient_seq_impl<ReturnType>(u_it, std::next(u_it, elements_per_thread), v_it, std::next(v_it, elements_per_thread)); + })); + u_it = std::next(u_it, elements_per_thread); + v_it = std::next(v_it, elements_per_thread); + } + + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [u_it, u_end, v_it, v_end]() -> ReturnType + { + return correlation_coefficient_seq_impl<ReturnType>(u_it, u_end, v_it, v_end); + })); + + ReturnType temp = future_manager[0].get(); + Real mu_u_a = std::get<0>(temp); + Real Qu_a = std::get<1>(temp); + Real mu_v_a = std::get<2>(temp); + Real Qv_a = std::get<3>(temp); + Real cov_a = std::get<4>(temp); + Real n_a = std::get<6>(temp); + + for(std::size_t i = 1; i < future_manager.size(); ++i) + { + temp = future_manager[i].get(); + Real mu_u_b = std::get<0>(temp); + Real Qu_b = std::get<1>(temp); + Real mu_v_b = std::get<2>(temp); + Real Qv_b = std::get<3>(temp); + Real cov_b = std::get<4>(temp); + Real n_b = std::get<6>(temp); + + const Real n_ab = n_a + n_b; + const Real delta_u = mu_u_b - mu_u_a; + const Real delta_v = mu_v_b - mu_v_a; + + cov_a = cov_a + cov_b + (-delta_u)*(-delta_v)*((n_a*n_b)/n_ab); + mu_u_a = mu_u_a + delta_u*(n_b/n_ab); + mu_v_a = mu_v_a + delta_v*(n_b/n_ab); + Qu_a = Qu_a + Qu_b + delta_u*delta_u*((n_a*n_b)/n_ab); + Qv_b = Qv_a + Qv_b + delta_v*delta_v*((n_a*n_b)/n_ab); + n_a = n_ab; + } + + // If one dataset is constant, then the correlation coefficient is undefined. + // See https://stats.stackexchange.com/questions/23676/normalized-correlation-with-a-constant-vector + // Thanks to zbjornson for pointing this out. + if (Qu_a == 0 || Qv_a == 0) + { + return std::make_tuple(mu_u_a, Qu_a, mu_v_a, Qv_a, cov_a, std::numeric_limits<Real>::quiet_NaN(), n_a); + } + + // Make sure rho in [-1, 1], even in the presence of numerical noise. + Real rho = cov_a/sqrt(Qu_a*Qv_a); + if (rho > 1) { + rho = 1; + } + if (rho < -1) { + rho = -1; + } + + return std::make_tuple(mu_u_a, Qu_a, mu_v_a, Qv_a, cov_a, rho, n_a); +} + +#endif // EXEC_COMPATIBLE + +} // namespace detail + +#ifdef EXEC_COMPATIBLE + +template<typename ExecutionPolicy, typename Container, typename Real = typename Container::value_type> +inline auto means_and_covariance(ExecutionPolicy&& exec, Container const & u, Container const & v) +{ + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + if constexpr (std::is_integral_v<Real>) + { + using ReturnType = std::tuple<double, double, double, double>; + ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)); + return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp)); + } + else + { + using ReturnType = std::tuple<Real, Real, Real, Real>; + ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)); + return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp)); + } + } + else + { + if constexpr (std::is_integral_v<Real>) + { + using ReturnType = std::tuple<double, double, double, double>; + ReturnType temp = detail::means_and_covariance_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)); + return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp)); + } + else + { + using ReturnType = std::tuple<Real, Real, Real, Real>; + ReturnType temp = detail::means_and_covariance_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)); + return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp)); + } + } +} + +template<typename Container> +inline auto means_and_covariance(Container const & u, Container const & v) +{ + return means_and_covariance(std::execution::seq, u, v); +} + +template<typename ExecutionPolicy, typename Container> +inline auto covariance(ExecutionPolicy&& exec, Container const & u, Container const & v) +{ + return std::get<2>(means_and_covariance(exec, u, v)); +} + +template<typename Container> +inline auto covariance(Container const & u, Container const & v) +{ + return covariance(std::execution::seq, u, v); +} + +template<typename ExecutionPolicy, typename Container, typename Real = typename Container::value_type> +inline auto correlation_coefficient(ExecutionPolicy&& exec, Container const & u, Container const & v) +{ + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + if constexpr (std::is_integral_v<Real>) + { + using ReturnType = std::tuple<double, double, double, double, double, double, double>; + return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); + } + else + { + using ReturnType = std::tuple<Real, Real, Real, Real, Real, Real, Real>; + return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); + } + } + else + { + if constexpr (std::is_integral_v<Real>) + { + using ReturnType = std::tuple<double, double, double, double, double, double, double>; + return std::get<5>(detail::correlation_coefficient_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); + } + else + { + using ReturnType = std::tuple<Real, Real, Real, Real, Real, Real, Real>; + return std::get<5>(detail::correlation_coefficient_parallel_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); + } + } +} + +template<typename Container, typename Real = typename Container::value_type> +inline auto correlation_coefficient(Container const & u, Container const & v) +{ + return correlation_coefficient(std::execution::seq, u, v); +} + +#else // C++11 and single threaded bindings + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto means_and_covariance(Container const & u, Container const & v) -> std::tuple<double, double, double> +{ + using ReturnType = std::tuple<double, double, double, double>; + ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)); + return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp)); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto means_and_covariance(Container const & u, Container const & v) -> std::tuple<Real, Real, Real> +{ + using ReturnType = std::tuple<Real, Real, Real, Real>; + ReturnType temp = detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v)); + return std::make_tuple(std::get<0>(temp), std::get<1>(temp), std::get<2>(temp)); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline double covariance(Container const & u, Container const & v) +{ + using ReturnType = std::tuple<double, double, double, double>; + return std::get<2>(detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline Real covariance(Container const & u, Container const & v) +{ + using ReturnType = std::tuple<Real, Real, Real, Real>; + return std::get<2>(detail::means_and_covariance_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline double correlation_coefficient(Container const & u, Container const & v) +{ + using ReturnType = std::tuple<double, double, double, double, double, double, double>; + return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline Real correlation_coefficient(Container const & u, Container const & v) +{ + using ReturnType = std::tuple<Real, Real, Real, Real, Real, Real, Real>; + return std::get<5>(detail::correlation_coefficient_seq_impl<ReturnType>(std::begin(u), std::end(u), std::begin(v), std::end(v))); +} + +#endif + +}}} // namespace boost::math::statistics + +#endif diff --git a/src/third_party/boost/boost/math/statistics/detail/single_pass.hpp b/src/third_party/boost/boost/math/statistics/detail/single_pass.hpp new file mode 100644 index 00000000000..7388509658d --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/detail/single_pass.hpp @@ -0,0 +1,401 @@ +// (C) Copyright Nick Thompson 2018 +// (C) Copyright Matt Borland 2020 +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_DETAIL_SINGLE_PASS_HPP +#define BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_DETAIL_SINGLE_PASS_HPP + +#include <boost/math/tools/config.hpp> +#include <boost/math/tools/assert.hpp> +#include <tuple> +#include <iterator> +#include <type_traits> +#include <cmath> +#include <algorithm> +#include <valarray> +#include <stdexcept> +#include <functional> +#include <vector> + +#ifdef BOOST_HAS_THREADS +#include <future> +#include <thread> +#endif + +namespace boost { namespace math { namespace statistics { namespace detail { + +template<typename ReturnType, typename ForwardIterator> +ReturnType mean_sequential_impl(ForwardIterator first, ForwardIterator last) +{ + const std::size_t elements {static_cast<std::size_t>(std::distance(first, last))}; + std::valarray<ReturnType> mu {0, 0, 0, 0}; + std::valarray<ReturnType> temp {0, 0, 0, 0}; + ReturnType i {1}; + const ForwardIterator end {std::next(first, elements - (elements % 4))}; + ForwardIterator it {first}; + + while(it != end) + { + const ReturnType inv {ReturnType(1) / i}; + temp = {static_cast<ReturnType>(*it++), static_cast<ReturnType>(*it++), static_cast<ReturnType>(*it++), static_cast<ReturnType>(*it++)}; + temp -= mu; + mu += (temp *= inv); + i += 1; + } + + const ReturnType num1 {ReturnType(elements - (elements % 4))/ReturnType(4)}; + const ReturnType num2 {num1 + ReturnType(elements % 4)}; + + while(it != last) + { + mu[3] += (*it-mu[3])/i; + i += 1; + ++it; + } + + return (num1 * std::valarray<ReturnType>(mu[std::slice(0,3,1)]).sum() + num2 * mu[3]) / ReturnType(elements); +} + +// Higham, Accuracy and Stability, equation 1.6a and 1.6b: +// Calculates Mean, M2, and variance +template<typename ReturnType, typename ForwardIterator> +ReturnType variance_sequential_impl(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + Real M = *first; + Real Q = 0; + Real k = 2; + Real M2 = 0; + std::size_t n = 1; + + for(auto it = std::next(first); it != last; ++it) + { + Real tmp = (*it - M) / k; + Real delta_1 = *it - M; + Q += k*(k-1)*tmp*tmp; + M += tmp; + k += 1; + Real delta_2 = *it - M; + M2 += delta_1 * delta_2; + ++n; + } + + return std::make_tuple(M, M2, Q/(k-1), Real(n)); +} + +// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics +template<typename ReturnType, typename ForwardIterator> +ReturnType first_four_moments_sequential_impl(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using Size = typename std::tuple_element<4, ReturnType>::type; + + Real M1 = *first; + Real M2 = 0; + Real M3 = 0; + Real M4 = 0; + Size n = 2; + for (auto it = std::next(first); it != last; ++it) + { + Real delta21 = *it - M1; + Real tmp = delta21/n; + M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3); + M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 = M2 + tmp*(n-1)*delta21; + M1 = M1 + tmp; + n += 1; + } + + return std::make_tuple(M1, M2, M3, M4, n-1); +} + +#ifdef BOOST_HAS_THREADS + +// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics +// EQN 3.1: https://www.osti.gov/servlets/purl/1426900 +template<typename ReturnType, typename ForwardIterator> +ReturnType first_four_moments_parallel_impl(ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + const auto elements = std::distance(first, last); + const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency(); + unsigned num_threads = 2u; + + // Threading is faster for: 10 + 5.13e-3 N/j <= 5.13e-3N => N >= 10^4j/5.13(j-1). + const auto parallel_lower_bound = 10e4*max_concurrency/(5.13*(max_concurrency-1)); + const auto parallel_upper_bound = 10e4*2/5.13; // j = 2 + + // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/ + if(elements < parallel_lower_bound) + { + return detail::first_four_moments_sequential_impl<ReturnType>(first, last); + } + else if(elements >= parallel_upper_bound) + { + num_threads = max_concurrency; + } + else + { + for(unsigned i = 3; i < max_concurrency; ++i) + { + if(parallel_lower_bound < 10e4*i/(5.13*(i-1))) + { + num_threads = i; + break; + } + } + } + + std::vector<std::future<ReturnType>> future_manager; + const auto elements_per_thread = std::ceil(static_cast<double>(elements) / num_threads); + + auto it = first; + for(std::size_t i {}; i < num_threads - 1; ++i) + { + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, elements_per_thread]() -> ReturnType + { + return first_four_moments_sequential_impl<ReturnType>(it, std::next(it, elements_per_thread)); + })); + it = std::next(it, elements_per_thread); + } + + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, last]() -> ReturnType + { + return first_four_moments_sequential_impl<ReturnType>(it, last); + })); + + auto temp = future_manager[0].get(); + Real M1_a = std::get<0>(temp); + Real M2_a = std::get<1>(temp); + Real M3_a = std::get<2>(temp); + Real M4_a = std::get<3>(temp); + Real range_a = std::get<4>(temp); + + for(std::size_t i = 1; i < future_manager.size(); ++i) + { + temp = future_manager[i].get(); + Real M1_b = std::get<0>(temp); + Real M2_b = std::get<1>(temp); + Real M3_b = std::get<2>(temp); + Real M4_b = std::get<3>(temp); + Real range_b = std::get<4>(temp); + + const Real n_ab = range_a + range_b; + const Real delta = M1_b - M1_a; + + M1_a = (range_a * M1_a + range_b * M1_b) / n_ab; + M2_a = M2_a + M2_b + delta * delta * (range_a * range_b / n_ab); + M3_a = M3_a + M3_b + (delta * delta * delta) * range_a * range_b * (range_a - range_b) / (n_ab * n_ab) + + Real(3) * delta * (range_a * M2_b - range_b * M2_a) / n_ab; + M4_a = M4_a + M4_b + (delta * delta * delta * delta) * range_a * range_b * (range_a * range_a - range_a * range_b + range_b * range_b) / (n_ab * n_ab * n_ab) + + Real(6) * delta * delta * (range_a * range_a * M2_b + range_b * range_b * M2_a) / (n_ab * n_ab) + + Real(4) * delta * (range_a * M3_b - range_b * M3_a) / n_ab; + range_a = n_ab; + } + + return std::make_tuple(M1_a, M2_a, M3_a, M4_a, elements); +} + +#endif // BOOST_HAS_THREADS + +// Follows equation 1.5 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template<typename ReturnType, typename ForwardIterator> +ReturnType skewness_sequential_impl(ForwardIterator first, ForwardIterator last) +{ + using std::sqrt; + BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute skewness."); + + ReturnType M1 = *first; + ReturnType M2 = 0; + ReturnType M3 = 0; + ReturnType n = 2; + + for (auto it = std::next(first); it != last; ++it) + { + ReturnType delta21 = *it - M1; + ReturnType tmp = delta21/n; + M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2); + M2 += tmp*(n-1)*delta21; + M1 += tmp; + n += 1; + } + + ReturnType var = M2/(n-1); + + if (var == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + return ReturnType(0); + } + + ReturnType skew = M3/(M2*sqrt(var)); + return skew; +} + +template<typename ReturnType, typename ForwardIterator> +ReturnType gini_coefficient_sequential_impl(ForwardIterator first, ForwardIterator last) +{ + ReturnType i = 1; + ReturnType num = 0; + ReturnType denom = 0; + + for(auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if(denom == 0) + { + return ReturnType(0); + } + else + { + return ((2*num)/denom - i)/(i-1); + } +} + +template<typename ReturnType, typename ForwardIterator> +ReturnType gini_range_fraction(ForwardIterator first, ForwardIterator last, std::size_t starting_index) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + + std::size_t i = starting_index + 1; + Real num = 0; + Real denom = 0; + + for(auto it = first; it != last; ++it) + { + num += *it*i; + denom += *it; + ++i; + } + + return std::make_tuple(num, denom, i); +} + +#ifdef BOOST_HAS_THREADS + +template<typename ReturnType, typename ExecutionPolicy, typename ForwardIterator> +ReturnType gini_coefficient_parallel_impl(ExecutionPolicy&&, ForwardIterator first, ForwardIterator last) +{ + using range_tuple = std::tuple<ReturnType, ReturnType, std::size_t>; + + const auto elements = std::distance(first, last); + const unsigned max_concurrency = std::thread::hardware_concurrency() == 0 ? 2u : std::thread::hardware_concurrency(); + unsigned num_threads = 2u; + + // Threading is faster for: 10 + 10.12e-3 N/j <= 10.12e-3N => N >= 10^4j/10.12(j-1). + const auto parallel_lower_bound = 10e4*max_concurrency/(10.12*(max_concurrency-1)); + const auto parallel_upper_bound = 10e4*2/10.12; // j = 2 + + // https://lemire.me/blog/2020/01/30/cost-of-a-thread-in-c-under-linux/ + if(elements < parallel_lower_bound) + { + return gini_coefficient_sequential_impl<ReturnType>(first, last); + } + else if(elements >= parallel_upper_bound) + { + num_threads = max_concurrency; + } + else + { + for(unsigned i = 3; i < max_concurrency; ++i) + { + if(parallel_lower_bound < 10e4*i/(10.12*(i-1))) + { + num_threads = i; + break; + } + } + } + + std::vector<std::future<range_tuple>> future_manager; + const auto elements_per_thread = std::ceil(static_cast<double>(elements) / num_threads); + + auto it = first; + for(std::size_t i {}; i < num_threads - 1; ++i) + { + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, elements_per_thread, i]() -> range_tuple + { + return gini_range_fraction<range_tuple>(it, std::next(it, elements_per_thread), i*elements_per_thread); + })); + it = std::next(it, elements_per_thread); + } + + future_manager.emplace_back(std::async(std::launch::async | std::launch::deferred, [it, last, num_threads, elements_per_thread]() -> range_tuple + { + return gini_range_fraction<range_tuple>(it, last, (num_threads - 1)*elements_per_thread); + })); + + ReturnType num = 0; + ReturnType denom = 0; + + for(std::size_t i = 0; i < future_manager.size(); ++i) + { + auto temp = future_manager[i].get(); + num += std::get<0>(temp); + denom += std::get<1>(temp); + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if(denom == 0) + { + return ReturnType(0); + } + else + { + return ((2*num)/denom - elements)/(elements-1); + } +} + +#endif // BOOST_HAS_THREADS + +template<typename ForwardIterator, typename OutputIterator> +OutputIterator mode_impl(ForwardIterator first, ForwardIterator last, OutputIterator output) +{ + using Z = typename std::iterator_traits<ForwardIterator>::value_type; + using Size = typename std::iterator_traits<ForwardIterator>::difference_type; + + std::vector<Z> modes {}; + modes.reserve(16); + Size max_counter {0}; + + while(first != last) + { + Size current_count {0}; + ForwardIterator end_it {first}; + while(end_it != last && *end_it == *first) + { + ++current_count; + ++end_it; + } + + if(current_count > max_counter) + { + modes.resize(1); + modes[0] = *first; + max_counter = current_count; + } + + else if(current_count == max_counter) + { + modes.emplace_back(*first); + } + + first = end_it; + } + + return std::move(modes.begin(), modes.end(), output); +} +}}}} + +#endif // BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_DETAIL_SINGLE_PASS_HPP diff --git a/src/third_party/boost/boost/math/statistics/linear_regression.hpp b/src/third_party/boost/boost/math/statistics/linear_regression.hpp new file mode 100644 index 00000000000..2def7ab7ea7 --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/linear_regression.hpp @@ -0,0 +1,133 @@ +/* + * Copyright Nick Thompson, 2019 + * Copyright Matt Borland, 2021 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#ifndef BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP +#define BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP + +#include <cmath> +#include <algorithm> +#include <utility> +#include <tuple> +#include <stdexcept> +#include <type_traits> +#include <boost/math/statistics/univariate_statistics.hpp> +#include <boost/math/statistics/bivariate_statistics.hpp> + +namespace boost { namespace math { namespace statistics { namespace detail { + + +template<class ReturnType, class RandomAccessContainer> +ReturnType simple_ordinary_least_squares_impl(RandomAccessContainer const & x, + RandomAccessContainer const & y) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + if (x.size() <= 1) + { + throw std::domain_error("At least 2 samples are required to perform a linear regression."); + } + + if (x.size() != y.size()) + { + throw std::domain_error("The same number of samples must be in the independent and dependent variable."); + } + std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y); + Real mu_x = std::get<0>(temp); + Real mu_y = std::get<1>(temp); + Real cov_xy = std::get<2>(temp); + + Real var_x = boost::math::statistics::variance(x); + + if (var_x <= 0) { + throw std::domain_error("Independent variable has no variance; this breaks linear regression."); + } + + + Real c1 = cov_xy/var_x; + Real c0 = mu_y - c1*mu_x; + + return std::make_pair(c0, c1); +} + +template<class ReturnType, class RandomAccessContainer> +ReturnType simple_ordinary_least_squares_with_R_squared_impl(RandomAccessContainer const & x, + RandomAccessContainer const & y) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + if (x.size() <= 1) + { + throw std::domain_error("At least 2 samples are required to perform a linear regression."); + } + + if (x.size() != y.size()) + { + throw std::domain_error("The same number of samples must be in the independent and dependent variable."); + } + std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y); + Real mu_x = std::get<0>(temp); + Real mu_y = std::get<1>(temp); + Real cov_xy = std::get<2>(temp); + + Real var_x = boost::math::statistics::variance(x); + + if (var_x <= 0) { + throw std::domain_error("Independent variable has no variance; this breaks linear regression."); + } + + + Real c1 = cov_xy/var_x; + Real c0 = mu_y - c1*mu_x; + + Real squared_residuals = 0; + Real squared_mean_deviation = 0; + for(decltype(y.size()) i = 0; i < y.size(); ++i) { + squared_mean_deviation += (y[i] - mu_y)*(y[i]-mu_y); + Real ei = (c0 + c1*x[i]) - y[i]; + squared_residuals += ei*ei; + } + + Real Rsquared; + if (squared_mean_deviation == 0) { + // Then y = constant, so the linear regression is perfect. + Rsquared = 1; + } else { + Rsquared = 1 - squared_residuals/squared_mean_deviation; + } + + return std::make_tuple(c0, c1, Rsquared); +} +} // namespace detail + +template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<double, double> +{ + return detail::simple_ordinary_least_squares_impl<std::pair<double, double>>(x, y); +} + +template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<Real, Real> +{ + return detail::simple_ordinary_least_squares_impl<std::pair<Real, Real>>(x, y); +} + +template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<double, double, double> +{ + return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<double, double, double>>(x, y); +} + +template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<Real, Real, Real> +{ + return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<Real, Real, Real>>(x, y); +} +}}} // namespace boost::math::statistics +#endif diff --git a/src/third_party/boost/boost/math/statistics/ljung_box.hpp b/src/third_party/boost/boost/math/statistics/ljung_box.hpp new file mode 100644 index 00000000000..6417665ce4b --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/ljung_box.hpp @@ -0,0 +1,70 @@ +// (C) Copyright Nick Thompson 2019. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_LJUNG_BOX_HPP +#define BOOST_MATH_STATISTICS_LJUNG_BOX_HPP + +#include <cmath> +#include <iterator> +#include <utility> +#include <boost/math/distributions/chi_squared.hpp> +#include <boost/math/statistics/univariate_statistics.hpp> + +namespace boost::math::statistics { + +template<class RandomAccessIterator> +auto ljung_box(RandomAccessIterator begin, RandomAccessIterator end, int64_t lags = -1, int64_t fit_dof = 0) { + using Real = typename std::iterator_traits<RandomAccessIterator>::value_type; + int64_t n = std::distance(begin, end); + if (lags >= n) { + throw std::domain_error("Number of lags must be < number of elements in array."); + } + + if (lags == -1) { + // This is the same default as Mathematica; it seems sensible enough . . . + lags = static_cast<int64_t>(std::ceil(std::log(Real(n)))); + } + + if (lags <= 0) { + throw std::domain_error("Must have at least one lag."); + } + + auto mu = boost::math::statistics::mean(begin, end); + + std::vector<Real> r(lags + 1, Real(0)); + for (size_t i = 0; i < r.size(); ++i) { + for (auto it = begin + i; it != end; ++it) { + Real ak = *(it) - mu; + Real akml = *(it-i) - mu; + r[i] += ak*akml; + } + } + + Real Q = 0; + + for (size_t k = 1; k < r.size(); ++k) { + Q += r[k]*r[k]/(r[0]*r[0]*(n-k)); + } + Q *= n*(n+2); + + typedef boost::math::policies::policy< + boost::math::policies::promote_float<false>, + boost::math::policies::promote_double<false> > + no_promote_policy; + + auto chi = boost::math::chi_squared_distribution<Real, no_promote_policy>(Real(lags - fit_dof)); + + Real pvalue = 1 - boost::math::cdf(chi, Q); + return std::make_pair(Q, pvalue); +} + + +template<class RandomAccessContainer> +auto ljung_box(RandomAccessContainer const & v, int64_t lags = -1, int64_t fit_dof = 0) { + return ljung_box(v.begin(), v.end(), lags, fit_dof); +} + +} +#endif diff --git a/src/third_party/boost/boost/math/statistics/runs_test.hpp b/src/third_party/boost/boost/math/statistics/runs_test.hpp new file mode 100644 index 00000000000..dc1a6ecf68c --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/runs_test.hpp @@ -0,0 +1,121 @@ +/* + * Copyright Nick Thompson, 2019 + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#ifndef BOOST_MATH_STATISTICS_RUNS_TEST_HPP +#define BOOST_MATH_STATISTICS_RUNS_TEST_HPP + +#include <cmath> +#include <algorithm> +#include <utility> +#include <boost/math/statistics/univariate_statistics.hpp> +#include <boost/math/distributions/normal.hpp> + +namespace boost::math::statistics { + +template<class RandomAccessContainer> +auto runs_above_and_below_threshold(RandomAccessContainer const & v, + typename RandomAccessContainer::value_type threshold) +{ + using Real = typename RandomAccessContainer::value_type; + using std::sqrt; + using std::abs; + if (v.size() <= 1) + { + throw std::domain_error("At least 2 samples are required to get number of runs."); + } + typedef boost::math::policies::policy< + boost::math::policies::promote_float<false>, + boost::math::policies::promote_double<false> > + no_promote_policy; + + decltype(v.size()) nabove = 0; + decltype(v.size()) nbelow = 0; + + decltype(v.size()) imin = 0; + + // Take care of the case that v[0] == threshold: + while (imin < v.size() && v[imin] == threshold) { + ++imin; + } + + // Take care of the constant vector case: + if (imin == v.size()) { + return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0)); + } + + bool run_up = (v[imin] > threshold); + if (run_up) { + ++nabove; + } else { + ++nbelow; + } + decltype(v.size()) runs = 1; + for (decltype(v.size()) i = imin + 1; i < v.size(); ++i) { + if (v[i] == threshold) { + // skip values precisely equal to threshold (following R's randtests package) + continue; + } + bool above = (v[i] > threshold); + if (above) { + ++nabove; + } else { + ++nbelow; + } + if (run_up == above) { + continue; + } + else { + run_up = above; + runs++; + } + } + + // If you make n an int, the subtraction is gonna be bad in the variance: + Real n = nabove + nbelow; + + Real expected_runs = Real(1) + Real(2*nabove*nbelow)/Real(n); + Real variance = 2*nabove*nbelow*(2*nabove*nbelow-n)/Real(n*n*(n-1)); + + // Bizarre, pathological limits: + if (variance == 0) + { + if (runs == expected_runs) + { + Real statistic = 0; + Real pvalue = 1; + return std::make_pair(statistic, pvalue); + } + else + { + return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0)); + } + } + + Real sd = sqrt(variance); + Real statistic = (runs - expected_runs)/sd; + + auto normal = boost::math::normal_distribution<Real, no_promote_policy>(0,1); + Real pvalue = 2*boost::math::cdf(normal, -abs(statistic)); + return std::make_pair(statistic, pvalue); +} + +template<class RandomAccessContainer> +auto runs_above_and_below_median(RandomAccessContainer const & v) +{ + using Real = typename RandomAccessContainer::value_type; + using std::log; + using std::sqrt; + + // We have to memcpy v because the median does a partial sort, + // and that would be catastrophic for the runs test. + auto w = v; + Real median = boost::math::statistics::median(w); + return runs_above_and_below_threshold(v, median); +} + +} +#endif diff --git a/src/third_party/boost/boost/math/statistics/signal_statistics.hpp b/src/third_party/boost/boost/math/statistics/signal_statistics.hpp new file mode 100644 index 00000000000..c11e802c302 --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/signal_statistics.hpp @@ -0,0 +1,343 @@ +// (C) Copyright Nick Thompson 2018. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP +#define BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP + +#include <algorithm> +#include <iterator> +#include <boost/math/tools/assert.hpp> +#include <boost/math/tools/complex.hpp> +#include <boost/math/tools/roots.hpp> +#include <boost/math/statistics/univariate_statistics.hpp> + + +namespace boost::math::statistics { + +template<class ForwardIterator> +auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + using std::abs; + using RealOrComplex = typename std::iterator_traits<ForwardIterator>::value_type; + BOOST_MATH_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples."); + + std::sort(first, last, [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); }); + + + decltype(abs(*first)) i = 1; + decltype(abs(*first)) num = 0; + decltype(abs(*first)) denom = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + num += tmp*i; + denom += tmp; + ++i; + } + + // If the l1 norm is zero, all elements are zero, so every element is the same. + if (denom == 0) + { + decltype(abs(*first)) zero = 0; + return zero; + } + return ((2*num)/denom - i)/(i-1); +} + +template<class RandomAccessContainer> +inline auto absolute_gini_coefficient(RandomAccessContainer & v) +{ + return boost::math::statistics::absolute_gini_coefficient(v.begin(), v.end()); +} + +template<class ForwardIterator> +auto sample_absolute_gini_coefficient(ForwardIterator first, ForwardIterator last) +{ + size_t n = std::distance(first, last); + return n*boost::math::statistics::absolute_gini_coefficient(first, last)/(n-1); +} + +template<class RandomAccessContainer> +inline auto sample_absolute_gini_coefficient(RandomAccessContainer & v) +{ + return boost::math::statistics::sample_absolute_gini_coefficient(v.begin(), v.end()); +} + + +// The Hoyer sparsity measure is defined in: +// https://arxiv.org/pdf/0811.4706.pdf +template<class ForwardIterator> +auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last) +{ + using T = typename std::iterator_traits<ForwardIterator>::value_type; + using std::abs; + using std::sqrt; + BOOST_MATH_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Hoyer sparsity requires at least two samples."); + + if constexpr (std::is_unsigned<T>::value) + { + T l1 = 0; + T l2 = 0; + size_t n = 0; + for (auto it = first; it != last; ++it) + { + l1 += *it; + l2 += (*it)*(*it); + n += 1; + } + + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else { + decltype(abs(*first)) l1 = 0; + decltype(abs(*first)) l2 = 0; + // We wouldn't need to count the elements if it was a random access iterator, + // but our only constraint is that it's a forward iterator. + size_t n = 0; + for (auto it = first; it != last; ++it) + { + decltype(abs(*first)) tmp = abs(*it); + l1 += tmp; + l2 += tmp*tmp; + n += 1; + } + if constexpr (std::is_integral<T>::value) + { + double rootn = sqrt(n); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + else + { + decltype(abs(*first)) rootn = sqrt(static_cast<decltype(abs(*first))>(n)); + return (rootn - l1/sqrt(l2) )/ (rootn - 1); + } + } +} + +template<class Container> +inline auto hoyer_sparsity(Container const & v) +{ + return boost::math::statistics::hoyer_sparsity(v.cbegin(), v.cend()); +} + + +template<class Container> +auto oracle_snr(Container const & signal, Container const & noisy_signal) +{ + using Real = typename Container::value_type; + BOOST_MATH_ASSERT_MSG(signal.size() == noisy_signal.size(), + "Signal and noisy_signal must be have the same number of elements."); + if constexpr (std::is_integral<Real>::value) + { + double numerator = 0; + double denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += (noisy_signal[i] - signal[i])*(noisy_signal[i] - signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits<double>::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits<double>::infinity(); + } + return numerator/denominator; + } + else if constexpr (boost::math::tools::is_complex_type<Real>::value) + + { + using std::norm; + typename Real::value_type numerator = 0; + typename Real::value_type denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += norm(signal[i]); + denominator += norm(noisy_signal[i] - signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits<typename Real::value_type>::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits<typename Real::value_type>::infinity(); + } + + return numerator/denominator; + } + else + { + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + numerator += signal[i]*signal[i]; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits<Real>::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits<Real>::infinity(); + } + + return numerator/denominator; + } +} + +template<class Container> +auto mean_invariant_oracle_snr(Container const & signal, Container const & noisy_signal) +{ + using Real = typename Container::value_type; + BOOST_MATH_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noisy signal must be have the same number of elements."); + + Real mu = boost::math::statistics::mean(signal); + Real numerator = 0; + Real denominator = 0; + for (size_t i = 0; i < signal.size(); ++i) + { + Real tmp = signal[i] - mu; + numerator += tmp*tmp; + denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]); + } + if (numerator == 0 && denominator == 0) + { + return std::numeric_limits<Real>::quiet_NaN(); + } + if (denominator == 0) + { + return std::numeric_limits<Real>::infinity(); + } + + return numerator/denominator; + +} + +template<class Container> +auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noisy_signal) +{ + using std::log10; + return 10*log10(boost::math::statistics::mean_invariant_oracle_snr(signal, noisy_signal)); +} + + +// Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16. +template<class Container> +auto oracle_snr_db(Container const & signal, Container const & noisy_signal) +{ + using std::log10; + return 10*log10(boost::math::statistics::oracle_snr(signal, noisy_signal)); +} + +// A good reference on the M2M4 estimator: +// D. R. Pauluzzi and N. C. Beaulieu, "A comparison of SNR estimation techniques for the AWGN channel," IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000. +// A nice python implementation: +// https://github.com/gnuradio/gnuradio/blob/master/gr-digital/examples/snr_estimators.py +template<class ForwardIterator> +auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) +{ + BOOST_MATH_ASSERT_MSG(estimated_signal_kurtosis > 0, "The estimated signal kurtosis must be positive"); + BOOST_MATH_ASSERT_MSG(estimated_noise_kurtosis > 0, "The estimated noise kurtosis must be positive."); + using Real = typename std::iterator_traits<ForwardIterator>::value_type; + using std::sqrt; + if constexpr (std::is_floating_point<Real>::value || std::numeric_limits<Real>::max_exponent) + { + // If we first eliminate N, we obtain the quadratic equation: + // (ka+kw-6)S^2 + 2M2(3-kw)S + kw*M2^2 - M4 = 0 =: a*S^2 + bs*N + cs = 0 + // If we first eliminate S, we obtain the quadratic equation: + // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0 + // I believe these equations are totally independent quadratics; + // if one has a complex solution it is not necessarily the case that the other must also. + // However, I can't prove that, so there is a chance that this does unnecessary work. + // Future improvements: There are algorithms which can solve quadratics much more effectively than the naive implementation found here. + // See: https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711#50065711 + auto [M1, M2, M3, M4] = boost::math::statistics::first_four_moments(first, last); + if (M4 == 0) + { + // The signal is constant. There is no noise: + return std::numeric_limits<Real>::infinity(); + } + // Change to notation in Pauluzzi, equation 41: + auto kw = estimated_noise_kurtosis; + auto ka = estimated_signal_kurtosis; + // A common case, since it's the default: + Real a = (ka+kw-6); + Real bs = 2*M2*(3-kw); + Real cs = kw*M2*M2 - M4; + Real bn = 2*M2*(3-ka); + Real cn = ka*M2*M2 - M4; + auto [S0, S1] = boost::math::tools::quadratic_roots(a, bs, cs); + if (S1 > 0) + { + auto N = M2 - S1; + if (N > 0) + { + return S1/N; + } + if (S0 > 0) + { + N = M2 - S0; + if (N > 0) + { + return S0/N; + } + } + } + auto [N0, N1] = boost::math::tools::quadratic_roots(a, bn, cn); + if (N1 > 0) + { + auto S = M2 - N1; + if (S > 0) + { + return S/N1; + } + if (N0 > 0) + { + S = M2 - N0; + if (S > 0) + { + return S/N0; + } + } + } + // This happens distressingly often. It's a limitation of the method. + return std::numeric_limits<Real>::quiet_NaN(); + } + else + { + BOOST_MATH_ASSERT_MSG(false, "The M2M4 estimator has not been implemented for this type."); + return std::numeric_limits<Real>::quiet_NaN(); + } +} + +template<class Container> +inline auto m2m4_snr_estimator(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + return m2m4_snr_estimator(noisy_signal.cbegin(), noisy_signal.cend(), estimated_signal_kurtosis, estimated_noise_kurtosis); +} + +template<class ForwardIterator> +inline auto m2m4_snr_estimator_db(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(first, last, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} + + +template<class Container> +inline auto m2m4_snr_estimator_db(Container const & noisy_signal, typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3) +{ + using std::log10; + return 10*log10(m2m4_snr_estimator(noisy_signal, estimated_signal_kurtosis, estimated_noise_kurtosis)); +} + +} +#endif diff --git a/src/third_party/boost/boost/math/statistics/t_test.hpp b/src/third_party/boost/boost/math/statistics/t_test.hpp new file mode 100644 index 00000000000..ec06bda8c34 --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/t_test.hpp @@ -0,0 +1,275 @@ +// (C) Copyright Nick Thompson 2019. +// (C) Copyright Matt Borland 2021. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_T_TEST_HPP +#define BOOST_MATH_STATISTICS_T_TEST_HPP + +#include <cmath> +#include <cstddef> +#include <iterator> +#include <utility> +#include <type_traits> +#include <vector> +#include <stdexcept> +#include <boost/math/distributions/students_t.hpp> +#include <boost/math/statistics/univariate_statistics.hpp> + +namespace boost { namespace math { namespace statistics { namespace detail { + +template<typename ReturnType, typename T> +ReturnType one_sample_t_test_impl(T sample_mean, T sample_variance, T num_samples, T assumed_mean) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using std::sqrt; + typedef boost::math::policies::policy< + boost::math::policies::promote_float<false>, + boost::math::policies::promote_double<false> > + no_promote_policy; + + Real test_statistic = (sample_mean - assumed_mean)/sqrt(sample_variance/num_samples); + auto student = boost::math::students_t_distribution<Real, no_promote_policy>(num_samples - 1); + Real pvalue; + if (test_statistic > 0) { + pvalue = 2*boost::math::cdf<Real>(student, -test_statistic);; + } + else { + pvalue = 2*boost::math::cdf<Real>(student, test_statistic); + } + return std::make_pair(test_statistic, pvalue); +} + +template<typename ReturnType, typename ForwardIterator> +ReturnType one_sample_t_test_impl(ForwardIterator begin, ForwardIterator end, typename std::iterator_traits<ForwardIterator>::value_type assumed_mean) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + std::pair<Real, Real> temp = mean_and_sample_variance(begin, end); + Real mu = std::get<0>(temp); + Real s_sq = std::get<1>(temp); + return one_sample_t_test_impl<ReturnType>(mu, s_sq, Real(std::distance(begin, end)), Real(assumed_mean)); +} + +// https://en.wikipedia.org/wiki/Student%27s_t-test#Equal_or_unequal_sample_sizes,_unequal_variances_(sX1_%3E_2sX2_or_sX2_%3E_2sX1) +template<typename ReturnType, typename T> +ReturnType welchs_t_test_impl(T mean_1, T variance_1, T size_1, T mean_2, T variance_2, T size_2) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using no_promote_policy = boost::math::policies::policy<boost::math::policies::promote_float<false>, boost::math::policies::promote_double<false>>; + using std::sqrt; + + Real dof_num = (variance_1/size_1 + variance_2/size_2) * (variance_1/size_1 + variance_2/size_2); + Real dof_denom = ((variance_1/size_1) * (variance_1/size_1))/(size_1 - 1) + + ((variance_2/size_2) * (variance_2/size_2))/(size_2 - 1); + Real dof = dof_num / dof_denom; + + Real s_estimator = sqrt((variance_1/size_1) + (variance_2/size_2)); + + Real test_statistic = (static_cast<Real>(mean_1) - static_cast<Real>(mean_2))/s_estimator; + auto student = boost::math::students_t_distribution<Real, no_promote_policy>(dof); + Real pvalue; + if (test_statistic > 0) + { + pvalue = 2*boost::math::cdf<Real>(student, -test_statistic);; + } + else + { + pvalue = 2*boost::math::cdf<Real>(student, test_statistic); + } + + return std::make_pair(test_statistic, pvalue); +} + +// https://en.wikipedia.org/wiki/Student%27s_t-test#Equal_or_unequal_sample_sizes,_similar_variances_(1/2_%3C_sX1/sX2_%3C_2) +template<typename ReturnType, typename T> +ReturnType two_sample_t_test_impl(T mean_1, T variance_1, T size_1, T mean_2, T variance_2, T size_2) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using no_promote_policy = boost::math::policies::policy<boost::math::policies::promote_float<false>, boost::math::policies::promote_double<false>>; + using std::sqrt; + + Real dof = size_1 + size_2 - 2; + Real pooled_std_dev = sqrt(((size_1-1)*variance_1 + (size_2-1)*variance_2) / dof); + Real test_statistic = (mean_1-mean_2) / (pooled_std_dev*sqrt(1.0/static_cast<Real>(size_1) + 1.0/static_cast<Real>(size_2))); + + auto student = boost::math::students_t_distribution<Real, no_promote_policy>(dof); + Real pvalue; + if (test_statistic > 0) + { + pvalue = 2*boost::math::cdf<Real>(student, -test_statistic);; + } + else + { + pvalue = 2*boost::math::cdf<Real>(student, test_statistic); + } + + return std::make_pair(test_statistic, pvalue); +} + +template<typename ReturnType, typename ForwardIterator> +ReturnType two_sample_t_test_impl(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using std::sqrt; + auto n1 = std::distance(begin_1, end_1); + auto n2 = std::distance(begin_2, end_2); + + ReturnType temp_1 = mean_and_sample_variance(begin_1, end_1); + Real mean_1 = std::get<0>(temp_1); + Real variance_1 = std::get<1>(temp_1); + Real std_dev_1 = sqrt(variance_1); + + ReturnType temp_2 = mean_and_sample_variance(begin_2, end_2); + Real mean_2 = std::get<0>(temp_2); + Real variance_2 = std::get<1>(temp_2); + Real std_dev_2 = sqrt(variance_2); + + if(std_dev_1 > 2 * std_dev_2 || std_dev_2 > 2 * std_dev_1) + { + return welchs_t_test_impl<ReturnType>(mean_1, variance_1, Real(n1), mean_2, variance_2, Real(n2)); + } + else + { + return two_sample_t_test_impl<ReturnType>(mean_1, variance_1, Real(n1), mean_2, variance_2, Real(n2)); + } +} + +// https://en.wikipedia.org/wiki/Student%27s_t-test#Dependent_t-test_for_paired_samples +template<typename ReturnType, typename ForwardIterator> +ReturnType paired_samples_t_test_impl(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using no_promote_policy = boost::math::policies::policy<boost::math::policies::promote_float<false>, boost::math::policies::promote_double<false>>; + using std::sqrt; + + std::vector<Real> delta; + ForwardIterator it_1 = begin_1; + ForwardIterator it_2 = begin_2; + std::size_t n = 0; + while(it_1 != end_1 && it_2 != end_2) + { + delta.emplace_back(static_cast<Real>(*it_1++) - static_cast<Real>(*it_2++)); + ++n; + } + + if(it_1 != end_1 || it_2 != end_2) + { + throw std::domain_error("Both sets must have the same number of values."); + } + + std::pair<Real, Real> temp = mean_and_sample_variance(delta.begin(), delta.end()); + Real delta_mean = std::get<0>(temp); + Real delta_std_dev = sqrt(std::get<1>(temp)); + + Real test_statistic = delta_mean/(delta_std_dev/sqrt(n)); + + auto student = boost::math::students_t_distribution<Real, no_promote_policy>(n - 1); + Real pvalue; + if (test_statistic > 0) + { + pvalue = 2*boost::math::cdf<Real>(student, -test_statistic);; + } + else + { + pvalue = 2*boost::math::cdf<Real>(student, test_statistic); + } + + return std::make_pair(test_statistic, pvalue); +} +} // namespace detail + +template<typename Real, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_t_test(Real sample_mean, Real sample_variance, Real num_samples, Real assumed_mean) -> std::pair<double, double> +{ + return detail::one_sample_t_test_impl<std::pair<double, double>>(sample_mean, sample_variance, num_samples, assumed_mean); +} + +template<typename Real, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_t_test(Real sample_mean, Real sample_variance, Real num_samples, Real assumed_mean) -> std::pair<Real, Real> +{ + return detail::one_sample_t_test_impl<std::pair<Real, Real>>(sample_mean, sample_variance, num_samples, assumed_mean); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_t_test(ForwardIterator begin, ForwardIterator end, Real assumed_mean) -> std::pair<double, double> +{ + return detail::one_sample_t_test_impl<std::pair<double, double>>(begin, end, assumed_mean); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_t_test(ForwardIterator begin, ForwardIterator end, Real assumed_mean) -> std::pair<Real, Real> +{ + return detail::one_sample_t_test_impl<std::pair<Real, Real>>(begin, end, assumed_mean); +} + +template<typename Container, typename Real = typename Container::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_t_test(Container const & v, Real assumed_mean) -> std::pair<double, double> +{ + return detail::one_sample_t_test_impl<std::pair<double, double>>(std::begin(v), std::end(v), assumed_mean); +} + +template<typename Container, typename Real = typename Container::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_t_test(Container const & v, Real assumed_mean) -> std::pair<Real, Real> +{ + return detail::one_sample_t_test_impl<std::pair<Real, Real>>(std::begin(v), std::end(v), assumed_mean); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair<double, double> +{ + return detail::two_sample_t_test_impl<std::pair<double, double>>(begin_1, end_1, begin_2, end_2); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair<Real, Real> +{ + return detail::two_sample_t_test_impl<std::pair<Real, Real>>(begin_1, end_1, begin_2, end_2); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_t_test(Container const & u, Container const & v) -> std::pair<double, double> +{ + return detail::two_sample_t_test_impl<std::pair<double, double>>(std::begin(u), std::end(u), std::begin(v), std::end(v)); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_t_test(Container const & u, Container const & v) -> std::pair<Real, Real> +{ + return detail::two_sample_t_test_impl<std::pair<Real, Real>>(std::begin(u), std::end(u), std::begin(v), std::end(v)); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto paired_samples_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair<double, double> +{ + return detail::paired_samples_t_test_impl<std::pair<double, double>>(begin_1, end_1, begin_2, end_2); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto paired_samples_t_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair<Real, Real> +{ + return detail::paired_samples_t_test_impl<std::pair<Real, Real>>(begin_1, end_1, begin_2, end_2); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto paired_samples_t_test(Container const & u, Container const & v) -> std::pair<double, double> +{ + return detail::paired_samples_t_test_impl<std::pair<double, double>>(std::begin(u), std::end(u), std::begin(v), std::end(v)); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto paired_samples_t_test(Container const & u, Container const & v) -> std::pair<Real, Real> +{ + return detail::paired_samples_t_test_impl<std::pair<Real, Real>>(std::begin(u), std::end(u), std::begin(v), std::end(v)); +} + +}}} // namespace boost::math::statistics +#endif diff --git a/src/third_party/boost/boost/math/statistics/univariate_statistics.hpp b/src/third_party/boost/boost/math/statistics/univariate_statistics.hpp new file mode 100644 index 00000000000..8a366384d76 --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/univariate_statistics.hpp @@ -0,0 +1,1186 @@ +// (C) Copyright Nick Thompson 2018. +// (C) Copyright Matt Borland 2020. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP +#define BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP + +#include <boost/math/statistics/detail/single_pass.hpp> +#include <boost/math/tools/config.hpp> +#include <boost/math/tools/assert.hpp> +#include <algorithm> +#include <iterator> +#include <tuple> +#include <cmath> +#include <vector> +#include <type_traits> +#include <utility> +#include <numeric> +#include <list> + +// Support compilers with P0024R2 implemented without linking TBB +// https://en.cppreference.com/w/cpp/compiler_support +#if !defined(BOOST_NO_CXX17_HDR_EXECUTION) && defined(BOOST_HAS_THREADS) +#include <execution> + +namespace boost::math::statistics { + +template<class ExecutionPolicy, class ForwardIterator> +inline auto mean(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits<ForwardIterator>::value_type; + BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); + + if constexpr (std::is_integral_v<Real>) + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + return detail::mean_sequential_impl<double>(first, last); + } + else + { + return std::reduce(exec, first, last, 0.0) / std::distance(first, last); + } + } + else + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + return detail::mean_sequential_impl<Real>(first, last); + } + else + { + return std::reduce(exec, first, last, Real(0.0)) / Real(std::distance(first, last)); + } + } +} + +template<class ExecutionPolicy, class Container> +inline auto mean(ExecutionPolicy&& exec, Container const & v) +{ + return mean(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto mean(ForwardIterator first, ForwardIterator last) +{ + return mean(std::execution::seq, first, last); +} + +template<class Container> +inline auto mean(Container const & v) +{ + return mean(std::execution::seq, std::cbegin(v), std::cend(v)); +} + +template<class ExecutionPolicy, class ForwardIterator> +inline auto variance(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits<ForwardIterator>::value_type; + + if constexpr (std::is_integral_v<Real>) + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + return std::get<2>(detail::variance_sequential_impl<std::tuple<double, double, double, double>>(first, last)); + } + else + { + const auto results = detail::first_four_moments_parallel_impl<std::tuple<double, double, double, double, double>>(first, last); + return std::get<1>(results) / std::get<4>(results); + } + } + else + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + return std::get<2>(detail::variance_sequential_impl<std::tuple<Real, Real, Real, Real>>(first, last)); + } + else + { + const auto results = detail::first_four_moments_parallel_impl<std::tuple<Real, Real, Real, Real, Real>>(first, last); + return std::get<1>(results) / std::get<4>(results); + } + } +} + +template<class ExecutionPolicy, class Container> +inline auto variance(ExecutionPolicy&& exec, Container const & v) +{ + return variance(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto variance(ForwardIterator first, ForwardIterator last) +{ + return variance(std::execution::seq, first, last); +} + +template<class Container> +inline auto variance(Container const & v) +{ + return variance(std::execution::seq, std::cbegin(v), std::cend(v)); +} + +template<class ExecutionPolicy, class ForwardIterator> +inline auto sample_variance(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + const auto n = std::distance(first, last); + BOOST_MATH_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance."); + return n*variance(exec, first, last)/(n-1); +} + +template<class ExecutionPolicy, class Container> +inline auto sample_variance(ExecutionPolicy&& exec, Container const & v) +{ + return sample_variance(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto sample_variance(ForwardIterator first, ForwardIterator last) +{ + return sample_variance(std::execution::seq, first, last); +} + +template<class Container> +inline auto sample_variance(Container const & v) +{ + return sample_variance(std::execution::seq, std::cbegin(v), std::cend(v)); +} + +template<class ExecutionPolicy, class ForwardIterator> +inline auto mean_and_sample_variance(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits<ForwardIterator>::value_type; + + if constexpr (std::is_integral_v<Real>) + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + const auto results = detail::variance_sequential_impl<std::tuple<double, double, double, double>>(first, last); + return std::make_pair(std::get<0>(results), std::get<2>(results)*std::get<3>(results)/(std::get<3>(results)-1.0)); + } + else + { + const auto results = detail::first_four_moments_parallel_impl<std::tuple<double, double, double, double, double>>(first, last); + return std::make_pair(std::get<0>(results), std::get<1>(results) / (std::get<4>(results)-1.0)); + } + } + else + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + const auto results = detail::variance_sequential_impl<std::tuple<Real, Real, Real, Real>>(first, last); + return std::make_pair(std::get<0>(results), std::get<2>(results)*std::get<3>(results)/(std::get<3>(results)-Real(1))); + } + else + { + const auto results = detail::first_four_moments_parallel_impl<std::tuple<Real, Real, Real, Real, Real>>(first, last); + return std::make_pair(std::get<0>(results), std::get<1>(results) / (std::get<4>(results)-Real(1))); + } + } +} + +template<class ExecutionPolicy, class Container> +inline auto mean_and_sample_variance(ExecutionPolicy&& exec, Container const & v) +{ + return mean_and_sample_variance(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto mean_and_sample_variance(ForwardIterator first, ForwardIterator last) +{ + return mean_and_sample_variance(std::execution::seq, first, last); +} + +template<class Container> +inline auto mean_and_sample_variance(Container const & v) +{ + return mean_and_sample_variance(std::execution::seq, std::cbegin(v), std::cend(v)); +} + +template<class ExecutionPolicy, class ForwardIterator> +inline auto first_four_moments(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits<ForwardIterator>::value_type; + + if constexpr (std::is_integral_v<Real>) + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + const auto results = detail::first_four_moments_sequential_impl<std::tuple<double, double, double, double, double>>(first, last); + return std::make_tuple(std::get<0>(results), std::get<1>(results) / std::get<4>(results), std::get<2>(results) / std::get<4>(results), + std::get<3>(results) / std::get<4>(results)); + } + else + { + const auto results = detail::first_four_moments_parallel_impl<std::tuple<double, double, double, double, double>>(first, last); + return std::make_tuple(std::get<0>(results), std::get<1>(results) / std::get<4>(results), std::get<2>(results) / std::get<4>(results), + std::get<3>(results) / std::get<4>(results)); + } + } + else + { + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + const auto results = detail::first_four_moments_sequential_impl<std::tuple<Real, Real, Real, Real, Real>>(first, last); + return std::make_tuple(std::get<0>(results), std::get<1>(results) / std::get<4>(results), std::get<2>(results) / std::get<4>(results), + std::get<3>(results) / std::get<4>(results)); + } + else + { + const auto results = detail::first_four_moments_parallel_impl<std::tuple<Real, Real, Real, Real, Real>>(first, last); + return std::make_tuple(std::get<0>(results), std::get<1>(results) / std::get<4>(results), std::get<2>(results) / std::get<4>(results), + std::get<3>(results) / std::get<4>(results)); + } + } +} + +template<class ExecutionPolicy, class Container> +inline auto first_four_moments(ExecutionPolicy&& exec, Container const & v) +{ + return first_four_moments(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto first_four_moments(ForwardIterator first, ForwardIterator last) +{ + return first_four_moments(std::execution::seq, first, last); +} + +template<class Container> +inline auto first_four_moments(Container const & v) +{ + return first_four_moments(std::execution::seq, std::cbegin(v), std::cend(v)); +} + +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template<class ExecutionPolicy, class ForwardIterator> +inline auto skewness(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits<ForwardIterator>::value_type; + using std::sqrt; + + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + if constexpr (std::is_integral_v<Real>) + { + return detail::skewness_sequential_impl<double>(first, last); + } + else + { + return detail::skewness_sequential_impl<Real>(first, last); + } + } + else + { + const auto [M1, M2, M3, M4] = first_four_moments(exec, first, last); + const auto n = std::distance(first, last); + const auto var = M2/(n-1); + + if (M2 == 0) + { + // The limit is technically undefined, but the interpretation here is clear: + // A constant dataset has no skewness. + if constexpr (std::is_integral_v<Real>) + { + return double(0); + } + else + { + return Real(0); + } + } + else + { + return M3/(M2*sqrt(var)) / Real(2); + } + } +} + +template<class ExecutionPolicy, class Container> +inline auto skewness(ExecutionPolicy&& exec, Container & v) +{ + return skewness(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto skewness(ForwardIterator first, ForwardIterator last) +{ + return skewness(std::execution::seq, first, last); +} + +template<class Container> +inline auto skewness(Container const & v) +{ + return skewness(std::execution::seq, std::cbegin(v), std::cend(v)); +} + +// Follows equation 1.6 of: +// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf +template<class ExecutionPolicy, class ForwardIterator> +inline auto kurtosis(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + const auto [M1, M2, M3, M4] = first_four_moments(exec, first, last); + if (M2 == 0) + { + return M2; + } + return M4/(M2*M2); +} + +template<class ExecutionPolicy, class Container> +inline auto kurtosis(ExecutionPolicy&& exec, Container const & v) +{ + return kurtosis(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto kurtosis(ForwardIterator first, ForwardIterator last) +{ + return kurtosis(std::execution::seq, first, last); +} + +template<class Container> +inline auto kurtosis(Container const & v) +{ + return kurtosis(std::execution::seq, std::cbegin(v), std::cend(v)); +} + +template<class ExecutionPolicy, class ForwardIterator> +inline auto excess_kurtosis(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + return kurtosis(exec, first, last) - 3; +} + +template<class ExecutionPolicy, class Container> +inline auto excess_kurtosis(ExecutionPolicy&& exec, Container const & v) +{ + return excess_kurtosis(exec, std::cbegin(v), std::cend(v)); +} + +template<class ForwardIterator> +inline auto excess_kurtosis(ForwardIterator first, ForwardIterator last) +{ + return excess_kurtosis(std::execution::seq, first, last); +} + +template<class Container> +inline auto excess_kurtosis(Container const & v) +{ + return excess_kurtosis(std::execution::seq, std::cbegin(v), std::cend(v)); +} + + +template<class ExecutionPolicy, class RandomAccessIterator> +auto median(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) +{ + const auto num_elems = std::distance(first, last); + BOOST_MATH_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(exec, first, middle, last); + return *middle; + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(exec, first, middle, last); + std::nth_element(exec, middle, middle+1, last); + return (*middle + *(middle+1))/2; + } +} + + +template<class ExecutionPolicy, class RandomAccessContainer> +inline auto median(ExecutionPolicy&& exec, RandomAccessContainer & v) +{ + return median(exec, std::begin(v), std::end(v)); +} + +template<class RandomAccessIterator> +inline auto median(RandomAccessIterator first, RandomAccessIterator last) +{ + return median(std::execution::seq, first, last); +} + +template<class RandomAccessContainer> +inline auto median(RandomAccessContainer & v) +{ + return median(std::execution::seq, std::begin(v), std::end(v)); +} + +#if 0 +// +// Parallel gini calculation is curently broken, see: +// https://github.com/boostorg/math/issues/585 +// We will fix this at a later date, for now just use a serial implementation: +// +template<class ExecutionPolicy, class RandomAccessIterator> +inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) +{ + using Real = typename std::iterator_traits<RandomAccessIterator>::value_type; + + if(!std::is_sorted(exec, first, last)) + { + std::sort(exec, first, last); + } + + if constexpr (std::is_same_v<std::remove_reference_t<decltype(exec)>, decltype(std::execution::seq)>) + { + if constexpr (std::is_integral_v<Real>) + { + return detail::gini_coefficient_sequential_impl<double>(first, last); + } + else + { + return detail::gini_coefficient_sequential_impl<Real>(first, last); + } + } + + else if constexpr (std::is_integral_v<Real>) + { + return detail::gini_coefficient_parallel_impl<double>(exec, first, last); + } + + else + { + return detail::gini_coefficient_parallel_impl<Real>(exec, first, last); + } +} +#else +template<class ExecutionPolicy, class RandomAccessIterator> +inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) +{ + using Real = typename std::iterator_traits<RandomAccessIterator>::value_type; + + if (!std::is_sorted(exec, first, last)) + { + std::sort(exec, first, last); + } + + if constexpr (std::is_integral_v<Real>) + { + return detail::gini_coefficient_sequential_impl<double>(first, last); + } + else + { + return detail::gini_coefficient_sequential_impl<Real>(first, last); + } +} +#endif + +template<class ExecutionPolicy, class RandomAccessContainer> +inline auto gini_coefficient(ExecutionPolicy&& exec, RandomAccessContainer & v) +{ + return gini_coefficient(exec, std::begin(v), std::end(v)); +} + +template<class RandomAccessIterator> +inline auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + return gini_coefficient(std::execution::seq, first, last); +} + +template<class RandomAccessContainer> +inline auto gini_coefficient(RandomAccessContainer & v) +{ + return gini_coefficient(std::execution::seq, std::begin(v), std::end(v)); +} + +template<class ExecutionPolicy, class RandomAccessIterator> +inline auto sample_gini_coefficient(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last) +{ + const auto n = std::distance(first, last); + return n*gini_coefficient(exec, first, last)/(n-1); +} + +template<class ExecutionPolicy, class RandomAccessContainer> +inline auto sample_gini_coefficient(ExecutionPolicy&& exec, RandomAccessContainer & v) +{ + return sample_gini_coefficient(exec, std::begin(v), std::end(v)); +} + +template<class RandomAccessIterator> +inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + return sample_gini_coefficient(std::execution::seq, first, last); +} + +template<class RandomAccessContainer> +inline auto sample_gini_coefficient(RandomAccessContainer & v) +{ + return sample_gini_coefficient(std::execution::seq, std::begin(v), std::end(v)); +} + +template<class ExecutionPolicy, class RandomAccessIterator> +auto median_absolute_deviation(ExecutionPolicy&& exec, RandomAccessIterator first, RandomAccessIterator last, + typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN()) +{ + using std::abs; + using Real = typename std::iterator_traits<RandomAccessIterator>::value_type; + using std::isnan; + if (isnan(center)) + { + center = boost::math::statistics::median(exec, first, last); + } + const auto num_elems = std::distance(first, last); + BOOST_MATH_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); + auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);}; + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(exec, first, middle, last, comparator); + return abs(*middle); + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(exec, first, middle, last, comparator); + std::nth_element(exec, middle, middle+1, last, comparator); + return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2)); + } +} + +template<class ExecutionPolicy, class RandomAccessContainer> +inline auto median_absolute_deviation(ExecutionPolicy&& exec, RandomAccessContainer & v, + typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN()) +{ + return median_absolute_deviation(exec, std::begin(v), std::end(v), center); +} + +template<class RandomAccessIterator> +inline auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, + typename RandomAccessIterator::value_type center=std::numeric_limits<typename RandomAccessIterator::value_type>::quiet_NaN()) +{ + return median_absolute_deviation(std::execution::seq, first, last, center); +} + +template<class RandomAccessContainer> +inline auto median_absolute_deviation(RandomAccessContainer & v, + typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN()) +{ + return median_absolute_deviation(std::execution::seq, std::begin(v), std::end(v), center); +} + +template<class ExecutionPolicy, class ForwardIterator> +auto interquartile_range(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + using Real = typename std::iterator_traits<ForwardIterator>::value_type; + static_assert(!std::is_integral_v<Real>, "Integer values have not yet been implemented."); + auto m = std::distance(first,last); + BOOST_MATH_ASSERT_MSG(m >= 3, "At least 3 samples are required to compute the interquartile range."); + auto k = m/4; + auto j = m - (4*k); + // m = 4k+j. + // If j = 0 or j = 1, then there are an even number of samples below the median, and an even number above the median. + // Then we must average adjacent elements to get the quartiles. + // If j = 2 or j = 3, there are an odd number of samples above and below the median, these elements may be directly extracted to get the quartiles. + + if (j==2 || j==3) + { + auto q1 = first + k; + auto q3 = first + 3*k + j - 1; + std::nth_element(exec, first, q1, last); + Real Q1 = *q1; + std::nth_element(exec, q1, q3, last); + Real Q3 = *q3; + return Q3 - Q1; + } else { + // j == 0 or j==1: + auto q1 = first + k - 1; + auto q3 = first + 3*k - 1 + j; + std::nth_element(exec, first, q1, last); + Real a = *q1; + std::nth_element(exec, q1, q1 + 1, last); + Real b = *(q1 + 1); + Real Q1 = (a+b)/2; + std::nth_element(exec, q1, q3, last); + a = *q3; + std::nth_element(exec, q3, q3 + 1, last); + b = *(q3 + 1); + Real Q3 = (a+b)/2; + return Q3 - Q1; + } +} + +template<class ExecutionPolicy, class RandomAccessContainer> +inline auto interquartile_range(ExecutionPolicy&& exec, RandomAccessContainer & v) +{ + return interquartile_range(exec, std::begin(v), std::end(v)); +} + +template<class RandomAccessIterator> +inline auto interquartile_range(RandomAccessIterator first, RandomAccessIterator last) +{ + return interquartile_range(std::execution::seq, first, last); +} + +template<class RandomAccessContainer> +inline auto interquartile_range(RandomAccessContainer & v) +{ + return interquartile_range(std::execution::seq, std::begin(v), std::end(v)); +} + +template<class ExecutionPolicy, class ForwardIterator, class OutputIterator> +inline OutputIterator mode(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last, OutputIterator output) +{ + if(!std::is_sorted(exec, first, last)) + { + if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category(), std::random_access_iterator_tag>) + { + std::sort(exec, first, last); + } + else + { + BOOST_MATH_ASSERT("Data must be sorted for sequential mode calculation"); + } + } + + return detail::mode_impl(first, last, output); +} + +template<class ExecutionPolicy, class Container, class OutputIterator> +inline OutputIterator mode(ExecutionPolicy&& exec, Container & v, OutputIterator output) +{ + return mode(exec, std::begin(v), std::end(v), output); +} + +template<class ForwardIterator, class OutputIterator> +inline OutputIterator mode(ForwardIterator first, ForwardIterator last, OutputIterator output) +{ + return mode(std::execution::seq, first, last, output); +} + +// Requires enable_if_t to not clash with impl that returns std::list +// Very ugly. std::is_execution_policy_v returns false for the std::execution objects and decltype of the objects (e.g. std::execution::seq) +template<class Container, class OutputIterator, std::enable_if_t<!std::is_convertible_v<std::execution::sequenced_policy, Container> && + !std::is_convertible_v<std::execution::parallel_unsequenced_policy, Container> && + !std::is_convertible_v<std::execution::parallel_policy, Container> + #if __cpp_lib_execution > 201900 + && !std::is_convertible_v<std::execution::unsequenced_policy, Container> + #endif + , bool> = true> +inline OutputIterator mode(Container & v, OutputIterator output) +{ + return mode(std::execution::seq, std::begin(v), std::end(v), output); +} + +// std::list is the return type for the proposed STL stats library + +template<class ExecutionPolicy, class ForwardIterator, class Real = typename std::iterator_traits<ForwardIterator>::value_type> +inline auto mode(ExecutionPolicy&& exec, ForwardIterator first, ForwardIterator last) +{ + std::list<Real> modes; + mode(exec, first, last, std::inserter(modes, modes.begin())); + return modes; +} + +template<class ExecutionPolicy, class Container> +inline auto mode(ExecutionPolicy&& exec, Container & v) +{ + return mode(exec, std::begin(v), std::end(v)); +} + +template<class ForwardIterator> +inline auto mode(ForwardIterator first, ForwardIterator last) +{ + return mode(std::execution::seq, first, last); +} + +template<class Container> +inline auto mode(Container & v) +{ + return mode(std::execution::seq, std::begin(v), std::end(v)); +} + +} // Namespace boost::math::statistics + +#else // Backwards compatible bindings for C++11 + +namespace boost { namespace math { namespace statistics { + +template<bool B, class T = void> +using enable_if_t = typename std::enable_if<B, T>::type; + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double mean(const ForwardIterator first, const ForwardIterator last) +{ + BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); + return detail::mean_sequential_impl<double>(first, last); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double mean(const Container& c) +{ + return mean(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real mean(const ForwardIterator first, const ForwardIterator last) +{ + BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute the mean."); + return detail::mean_sequential_impl<Real>(first, last); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real mean(const Container& c) +{ + return mean(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double variance(const ForwardIterator first, const ForwardIterator last) +{ + return std::get<2>(detail::variance_sequential_impl<std::tuple<double, double, double, double>>(first, last)); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double variance(const Container& c) +{ + return variance(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real variance(const ForwardIterator first, const ForwardIterator last) +{ + return std::get<2>(detail::variance_sequential_impl<std::tuple<Real, Real, Real, Real>>(first, last)); + +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real variance(const Container& c) +{ + return variance(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double sample_variance(const ForwardIterator first, const ForwardIterator last) +{ + const auto n = std::distance(first, last); + BOOST_MATH_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance."); + return n*variance(first, last)/(n-1); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double sample_variance(const Container& c) +{ + return sample_variance(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real sample_variance(const ForwardIterator first, const ForwardIterator last) +{ + const auto n = std::distance(first, last); + BOOST_MATH_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance."); + return n*variance(first, last)/(n-1); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real sample_variance(const Container& c) +{ + return sample_variance(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline std::pair<double, double> mean_and_sample_variance(const ForwardIterator first, const ForwardIterator last) +{ + const auto results = detail::variance_sequential_impl<std::tuple<double, double, double, double>>(first, last); + return std::make_pair(std::get<0>(results), std::get<3>(results)*std::get<2>(results)/(std::get<3>(results)-1.0)); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline std::pair<double, double> mean_and_sample_variance(const Container& c) +{ + return mean_and_sample_variance(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline std::pair<Real, Real> mean_and_sample_variance(const ForwardIterator first, const ForwardIterator last) +{ + const auto results = detail::variance_sequential_impl<std::tuple<Real, Real, Real, Real>>(first, last); + return std::make_pair(std::get<0>(results), std::get<3>(results)*std::get<2>(results)/(std::get<3>(results)-Real(1))); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline std::pair<Real, Real> mean_and_sample_variance(const Container& c) +{ + return mean_and_sample_variance(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline std::tuple<double, double, double, double> first_four_moments(const ForwardIterator first, const ForwardIterator last) +{ + const auto results = detail::first_four_moments_sequential_impl<std::tuple<double, double, double, double, double>>(first, last); + return std::make_tuple(std::get<0>(results), std::get<1>(results) / std::get<4>(results), std::get<2>(results) / std::get<4>(results), + std::get<3>(results) / std::get<4>(results)); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline std::tuple<double, double, double, double> first_four_moments(const Container& c) +{ + return first_four_moments(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline std::tuple<Real, Real, Real, Real> first_four_moments(const ForwardIterator first, const ForwardIterator last) +{ + const auto results = detail::first_four_moments_sequential_impl<std::tuple<Real, Real, Real, Real, Real>>(first, last); + return std::make_tuple(std::get<0>(results), std::get<1>(results) / std::get<4>(results), std::get<2>(results) / std::get<4>(results), + std::get<3>(results) / std::get<4>(results)); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline std::tuple<Real, Real, Real, Real> first_four_moments(const Container& c) +{ + return first_four_moments(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double skewness(const ForwardIterator first, const ForwardIterator last) +{ + return detail::skewness_sequential_impl<double>(first, last); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double skewness(const Container& c) +{ + return skewness(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real skewness(const ForwardIterator first, const ForwardIterator last) +{ + return detail::skewness_sequential_impl<Real>(first, last); +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real skewness(const Container& c) +{ + return skewness(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double kurtosis(const ForwardIterator first, const ForwardIterator last) +{ + std::tuple<double, double, double, double> M = first_four_moments(first, last); + + if(std::get<1>(M) == 0) + { + return std::get<1>(M); + } + else + { + return std::get<3>(M)/(std::get<1>(M)*std::get<1>(M)); + } +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double kurtosis(const Container& c) +{ + return kurtosis(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real kurtosis(const ForwardIterator first, const ForwardIterator last) +{ + std::tuple<Real, Real, Real, Real> M = first_four_moments(first, last); + + if(std::get<1>(M) == 0) + { + return std::get<1>(M); + } + else + { + return std::get<3>(M)/(std::get<1>(M)*std::get<1>(M)); + } +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real kurtosis(const Container& c) +{ + return kurtosis(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double excess_kurtosis(const ForwardIterator first, const ForwardIterator last) +{ + return kurtosis(first, last) - 3; +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double excess_kurtosis(const Container& c) +{ + return excess_kurtosis(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real excess_kurtosis(const ForwardIterator first, const ForwardIterator last) +{ + return kurtosis(first, last) - 3; +} + +template<class Container, typename Real = typename Container::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real excess_kurtosis(const Container& c) +{ + return excess_kurtosis(std::begin(c), std::end(c)); +} + +template<class RandomAccessIterator, typename Real = typename std::iterator_traits<RandomAccessIterator>::value_type> +Real median(RandomAccessIterator first, RandomAccessIterator last) +{ + const auto num_elems = std::distance(first, last); + BOOST_MATH_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined."); + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last); + return *middle; + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last); + std::nth_element(middle, middle+1, last); + return (*middle + *(middle+1))/2; + } +} + +template<class RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type> +inline Real median(RandomAccessContainer& c) +{ + return median(std::begin(c), std::end(c)); +} + +template<class RandomAccessIterator, typename Real = typename std::iterator_traits<RandomAccessIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + if(!std::is_sorted(first, last)) + { + std::sort(first, last); + } + + return detail::gini_coefficient_sequential_impl<double>(first, last); +} + +template<class RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double gini_coefficient(RandomAccessContainer& c) +{ + return gini_coefficient(std::begin(c), std::end(c)); +} + +template<class RandomAccessIterator, typename Real = typename std::iterator_traits<RandomAccessIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + if(!std::is_sorted(first, last)) + { + std::sort(first, last); + } + + return detail::gini_coefficient_sequential_impl<Real>(first, last); +} + +template<class RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real gini_coefficient(RandomAccessContainer& c) +{ + return gini_coefficient(std::begin(c), std::end(c)); +} + +template<class RandomAccessIterator, typename Real = typename std::iterator_traits<RandomAccessIterator>::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + const auto n = std::distance(first, last); + return n*gini_coefficient(first, last)/(n-1); +} + +template<class RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + enable_if_t<std::is_integral<Real>::value, bool> = true> +inline double sample_gini_coefficient(RandomAccessContainer& c) +{ + return sample_gini_coefficient(std::begin(c), std::end(c)); +} + +template<class RandomAccessIterator, typename Real = typename std::iterator_traits<RandomAccessIterator>::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last) +{ + const auto n = std::distance(first, last); + return n*gini_coefficient(first, last)/(n-1); +} + +template<class RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, + enable_if_t<!std::is_integral<Real>::value, bool> = true> +inline Real sample_gini_coefficient(RandomAccessContainer& c) +{ + return sample_gini_coefficient(std::begin(c), std::end(c)); +} + +template<class RandomAccessIterator, typename Real = typename std::iterator_traits<RandomAccessIterator>::value_type> +Real median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, + typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN()) +{ + using std::abs; + using std::isnan; + if (isnan(center)) + { + center = boost::math::statistics::median(first, last); + } + const auto num_elems = std::distance(first, last); + BOOST_MATH_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined."); + auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);}; + if (num_elems & 1) + { + auto middle = first + (num_elems - 1)/2; + std::nth_element(first, middle, last, comparator); + return abs(*middle); + } + else + { + auto middle = first + num_elems/2 - 1; + std::nth_element(first, middle, last, comparator); + std::nth_element(middle, middle+1, last, comparator); + return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2)); + } +} + +template<class RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type> +inline Real median_absolute_deviation(RandomAccessContainer& c, + typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN()) +{ + return median_absolute_deviation(std::begin(c), std::end(c), center); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type> +Real interquartile_range(ForwardIterator first, ForwardIterator last) +{ + static_assert(!std::is_integral<Real>::value, "Integer values have not yet been implemented."); + auto m = std::distance(first,last); + BOOST_MATH_ASSERT_MSG(m >= 3, "At least 3 samples are required to compute the interquartile range."); + auto k = m/4; + auto j = m - (4*k); + // m = 4k+j. + // If j = 0 or j = 1, then there are an even number of samples below the median, and an even number above the median. + // Then we must average adjacent elements to get the quartiles. + // If j = 2 or j = 3, there are an odd number of samples above and below the median, these elements may be directly extracted to get the quartiles. + + if (j==2 || j==3) + { + auto q1 = first + k; + auto q3 = first + 3*k + j - 1; + std::nth_element(first, q1, last); + Real Q1 = *q1; + std::nth_element(q1, q3, last); + Real Q3 = *q3; + return Q3 - Q1; + } + else + { + // j == 0 or j==1: + auto q1 = first + k - 1; + auto q3 = first + 3*k - 1 + j; + std::nth_element(first, q1, last); + Real a = *q1; + std::nth_element(q1, q1 + 1, last); + Real b = *(q1 + 1); + Real Q1 = (a+b)/2; + std::nth_element(q1, q3, last); + a = *q3; + std::nth_element(q3, q3 + 1, last); + b = *(q3 + 1); + Real Q3 = (a+b)/2; + return Q3 - Q1; + } +} + +template<class Container, typename Real = typename Container::value_type> +Real interquartile_range(Container& c) +{ + return interquartile_range(std::begin(c), std::end(c)); +} + +template<class ForwardIterator, class OutputIterator, + enable_if_t<std::is_same<typename std::iterator_traits<ForwardIterator>::iterator_category(), std::random_access_iterator_tag>::value, bool> = true> +inline OutputIterator mode(ForwardIterator first, ForwardIterator last, OutputIterator output) +{ + if(!std::is_sorted(first, last)) + { + std::sort(first, last); + } + + return detail::mode_impl(first, last, output); +} + +template<class ForwardIterator, class OutputIterator, + enable_if_t<!std::is_same<typename std::iterator_traits<ForwardIterator>::iterator_category(), std::random_access_iterator_tag>::value, bool> = true> +inline OutputIterator mode(ForwardIterator first, ForwardIterator last, OutputIterator output) +{ + if(!std::is_sorted(first, last)) + { + BOOST_MATH_ASSERT("Data must be sorted for mode calculation"); + } + + return detail::mode_impl(first, last, output); +} + +template<class Container, class OutputIterator> +inline OutputIterator mode(Container& c, OutputIterator output) +{ + return mode(std::begin(c), std::end(c), output); +} + +template<class ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type> +inline std::list<Real> mode(ForwardIterator first, ForwardIterator last) +{ + std::list<Real> modes; + mode(first, last, std::inserter(modes, modes.begin())); + return modes; +} + +template<class Container, typename Real = typename Container::value_type> +inline std::list<Real> mode(Container& c) +{ + return mode(std::begin(c), std::end(c)); +} +}}} +#endif +#endif // BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP diff --git a/src/third_party/boost/boost/math/statistics/z_test.hpp b/src/third_party/boost/boost/math/statistics/z_test.hpp new file mode 100644 index 00000000000..a8ef838e40b --- /dev/null +++ b/src/third_party/boost/boost/math/statistics/z_test.hpp @@ -0,0 +1,161 @@ +// (C) Copyright Matt Borland 2021. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_STATISTICS_Z_TEST_HPP +#define BOOST_MATH_STATISTICS_Z_TEST_HPP + +#include <boost/math/distributions/normal.hpp> +#include <boost/math/statistics/univariate_statistics.hpp> +#include <iterator> +#include <type_traits> +#include <utility> +#include <cmath> + +namespace boost { namespace math { namespace statistics { namespace detail { + +template<typename ReturnType, typename T> +ReturnType one_sample_z_test_impl(T sample_mean, T sample_variance, T sample_size, T assumed_mean) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using std::sqrt; + using no_promote_policy = boost::math::policies::policy<boost::math::policies::promote_float<false>, boost::math::policies::promote_double<false>>; + + Real test_statistic = (sample_mean - assumed_mean) / (sample_variance / sqrt(sample_size)); + auto z = boost::math::normal_distribution<Real, no_promote_policy>(sample_size - 1); + Real pvalue; + if(test_statistic > 0) + { + pvalue = 2*boost::math::cdf<Real>(z, -test_statistic); + } + else + { + pvalue = 2*boost::math::cdf<Real>(z, test_statistic); + } + + return std::make_pair(test_statistic, pvalue); +} + +template<typename ReturnType, typename ForwardIterator> +ReturnType one_sample_z_test_impl(ForwardIterator begin, ForwardIterator end, typename std::iterator_traits<ForwardIterator>::value_type assumed_mean) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + std::pair<Real, Real> temp = mean_and_sample_variance(begin, end); + Real mu = std::get<0>(temp); + Real s_sq = std::get<1>(temp); + return one_sample_z_test_impl<ReturnType>(mu, s_sq, Real(std::distance(begin, end)), Real(assumed_mean)); +} + +template<typename ReturnType, typename T> +ReturnType two_sample_z_test_impl(T mean_1, T variance_1, T size_1, T mean_2, T variance_2, T size_2) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using std::sqrt; + using no_promote_policy = boost::math::policies::policy<boost::math::policies::promote_float<false>, boost::math::policies::promote_double<false>>; + + Real test_statistic = (mean_1 - mean_2) / sqrt(variance_1/size_1 + variance_2/size_2); + auto z = boost::math::normal_distribution<Real, no_promote_policy>(size_1 + size_2 - 1); + Real pvalue; + if(test_statistic > 0) + { + pvalue = 2*boost::math::cdf<Real>(z, -test_statistic); + } + else + { + pvalue = 2*boost::math::cdf<Real>(z, test_statistic); + } + + return std::make_pair(test_statistic, pvalue); +} + +template<typename ReturnType, typename ForwardIterator> +ReturnType two_sample_z_test_impl(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) +{ + using Real = typename std::tuple_element<0, ReturnType>::type; + using std::sqrt; + auto n1 = std::distance(begin_1, end_1); + auto n2 = std::distance(begin_2, end_2); + + ReturnType temp_1 = mean_and_sample_variance(begin_1, end_1); + Real mean_1 = std::get<0>(temp_1); + Real variance_1 = std::get<1>(temp_1); + + ReturnType temp_2 = mean_and_sample_variance(begin_2, end_2); + Real mean_2 = std::get<0>(temp_2); + Real variance_2 = std::get<1>(temp_2); + + return two_sample_z_test_impl<ReturnType>(mean_1, variance_1, Real(n1), mean_2, variance_2, Real(n2)); +} + +} // detail + +template<typename Real, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_z_test(Real sample_mean, Real sample_variance, Real sample_size, Real assumed_mean) -> std::pair<double, double> +{ + return detail::one_sample_z_test_impl<std::pair<double, double>>(sample_mean, sample_variance, sample_size, assumed_mean); +} + +template<typename Real, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_z_test(Real sample_mean, Real sample_variance, Real sample_size, Real assumed_mean) -> std::pair<Real, Real> +{ + return detail::one_sample_z_test_impl<std::pair<Real, Real>>(sample_mean, sample_variance, sample_size, assumed_mean); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_z_test(ForwardIterator begin, ForwardIterator end, Real assumed_mean) -> std::pair<double, double> +{ + return detail::one_sample_z_test_impl<std::pair<double, double>>(begin, end, assumed_mean); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_z_test(ForwardIterator begin, ForwardIterator end, Real assumed_mean) -> std::pair<Real, Real> +{ + return detail::one_sample_z_test_impl<std::pair<Real, Real>>(begin, end, assumed_mean); +} + +template<typename Container, typename Real = typename Container::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_z_test(Container const & v, Real assumed_mean) -> std::pair<double, double> +{ + return detail::one_sample_z_test_impl<std::pair<double, double>>(std::begin(v), std::end(v), assumed_mean); +} + +template<typename Container, typename Real = typename Container::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto one_sample_z_test(Container const & v, Real assumed_mean) -> std::pair<Real, Real> +{ + return detail::one_sample_z_test_impl<std::pair<Real, Real>>(std::begin(v), std::end(v), assumed_mean); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_z_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair<double, double> +{ + return detail::two_sample_z_test_impl<std::pair<double, double>>(begin_1, end_1, begin_2, end_2); +} + +template<typename ForwardIterator, typename Real = typename std::iterator_traits<ForwardIterator>::value_type, + typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_z_test(ForwardIterator begin_1, ForwardIterator end_1, ForwardIterator begin_2, ForwardIterator end_2) -> std::pair<Real, Real> +{ + return detail::two_sample_z_test_impl<std::pair<Real, Real>>(begin_1, end_1, begin_2, end_2); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_z_test(Container const & u, Container const & v) -> std::pair<double, double> +{ + return detail::two_sample_z_test_impl<std::pair<double, double>>(std::begin(u), std::end(u), std::begin(v), std::end(v)); +} + +template<typename Container, typename Real = typename Container::value_type, typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true> +inline auto two_sample_z_test(Container const & u, Container const & v) -> std::pair<Real, Real> +{ + return detail::two_sample_z_test_impl<std::pair<Real, Real>>(std::begin(u), std::end(u), std::begin(v), std::end(v)); +} + +}}} // boost::math::statistics + +#endif // BOOST_MATH_STATISTICS_Z_TEST_HPP diff --git a/src/third_party/scripts/boost_get_sources.sh b/src/third_party/scripts/boost_get_sources.sh index fa2d63ce375..24b7048a9d8 100755 --- a/src/third_party/scripts/boost_get_sources.sh +++ b/src/third_party/scripts/boost_get_sources.sh @@ -43,7 +43,7 @@ cd $SRC ./b2 tools/bcp test -d $DEST_DIR || mkdir $DEST_DIR -$SRC/dist/bin/bcp --boost=$SRC/ algorithm align array asio bind iostreams config container date_time filesystem function integer intrusive log multi_index noncopyable optional program_options property_tree random smart_ptr static_assert unordered utility vmd $DEST_DIR +$SRC/dist/bin/bcp --boost=$SRC/ algorithm align array asio bind iostreams config container date_time filesystem function integer intrusive log math/statistics multi_index noncopyable optional program_options property_tree random smart_ptr static_assert unordered utility vmd $DEST_DIR # Trim files cd $DEST_DIR |