diff options
Diffstat (limited to 'src/mongo/util/descriptive_stats-inl.h')
-rw-r--r-- | src/mongo/util/descriptive_stats-inl.h | 288 |
1 files changed, 132 insertions, 156 deletions
diff --git a/src/mongo/util/descriptive_stats-inl.h b/src/mongo/util/descriptive_stats-inl.h index 7cb7567f5ec..1b7c91595d1 100644 --- a/src/mongo/util/descriptive_stats-inl.h +++ b/src/mongo/util/descriptive_stats-inl.h @@ -39,186 +39,162 @@ namespace mongo { - template <class Sample> - BasicEstimators<Sample>::BasicEstimators() : - _count(0), - _sum(0), - _diff(0), - _min(std::numeric_limits<Sample>::max()), - _max(std::numeric_limits<Sample>::min()) { - - } - - template <class Sample> - BasicEstimators<Sample>& BasicEstimators<Sample>::operator <<(const Sample sample) { - const double oldMean = (_count > 0) ? _sum / _count : 0; - const double delta = oldMean - static_cast<double>(sample); - const double weight = static_cast<double>(_count) / (_count + 1); - _diff += delta * delta * weight; - _sum += static_cast<double>(sample); - _count++; - _min = std::min(sample, _min); - _max = std::max(sample, _max); - return *this; +template <class Sample> +BasicEstimators<Sample>::BasicEstimators() + : _count(0), + _sum(0), + _diff(0), + _min(std::numeric_limits<Sample>::max()), + _max(std::numeric_limits<Sample>::min()) {} + +template <class Sample> +BasicEstimators<Sample>& BasicEstimators<Sample>::operator<<(const Sample sample) { + const double oldMean = (_count > 0) ? _sum / _count : 0; + const double delta = oldMean - static_cast<double>(sample); + const double weight = static_cast<double>(_count) / (_count + 1); + _diff += delta * delta * weight; + _sum += static_cast<double>(sample); + _count++; + _min = std::min(sample, _min); + _max = std::max(sample, _max); + return *this; +} + +template <class Sample> +void BasicEstimators<Sample>::appendBasicToBSONObjBuilder(BSONObjBuilder& b) const { + b << "count" << static_cast<long long>(count()) << "mean" << mean() << "stddev" << stddev() + << "min" << min() << "max" << max(); +} + +template <std::size_t NumQuantiles> +DistributionEstimators<NumQuantiles>::DistributionEstimators() + : _count(0) { + for (std::size_t i = 0; i < NumMarkers; i++) { + _actual_positions[i] = i + 1; } - template <class Sample> - void BasicEstimators<Sample>::appendBasicToBSONObjBuilder(BSONObjBuilder& b) const { - b << "count" << static_cast<long long>(count()) - << "mean" << mean() - << "stddev" << stddev() - << "min" << min() - << "max" << max(); + for (std::size_t i = 0; i < NumMarkers; i++) { + _desired_positions[i] = 1.0 + (2.0 * (NumQuantiles + 1.0) * _positions_increments(i)); } - - template <std::size_t NumQuantiles> - DistributionEstimators<NumQuantiles>::DistributionEstimators() : - _count(0) { - - for(std::size_t i = 0; i < NumMarkers; i++) { - _actual_positions[i] = i + 1; +} + +/* + * The quantile estimation follows the extended_p_square implementation in boost.accumulators. + * It differs by removing the ability to request arbitrary quantiles and computing exactly + * 'NumQuantiles' equidistant quantiles (plus minimum and maximum) instead. + * See http://www.boost.org/doc/libs/1_51_0/doc/html/boost/accumulators/impl/extended_p_square_impl.html , + * R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and histograms without storing observations, Communications of the ACM, Volume 28 (October), Number 10, 1985, p. 1076-1085. and + * K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49, Number 4 (October), 1986, p. 159-164. + */ +template <std::size_t NumQuantiles> +DistributionEstimators<NumQuantiles>& DistributionEstimators<NumQuantiles>::operator<<( + const double sample) { + // first accumulate num_markers samples + if (_count++ < NumMarkers) { + _heights[_count - 1] = sample; + + if (_count == NumMarkers) { + std::sort(_heights, _heights + NumMarkers); } - - for(std::size_t i = 0; i < NumMarkers; i++) { - _desired_positions[i] = 1.0 + (2.0 * (NumQuantiles + 1.0) * _positions_increments(i)); + } else { + std::size_t sample_cell = 1; + + // find cell k = sample_cell such that heights[k-1] <= sample < heights[k] + if (sample < _heights[0]) { + _heights[0] = sample; + sample_cell = 1; + } else if (sample >= _heights[NumMarkers - 1]) { + _heights[NumMarkers - 1] = sample; + sample_cell = NumMarkers - 1; + } else { + double* it = std::upper_bound(_heights, _heights + NumMarkers, sample); + + sample_cell = std::distance(_heights, it); } - } - /* - * The quantile estimation follows the extended_p_square implementation in boost.accumulators. - * It differs by removing the ability to request arbitrary quantiles and computing exactly - * 'NumQuantiles' equidistant quantiles (plus minimum and maximum) instead. - * See http://www.boost.org/doc/libs/1_51_0/doc/html/boost/accumulators/impl/extended_p_square_impl.html , - * R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and histograms without storing observations, Communications of the ACM, Volume 28 (October), Number 10, 1985, p. 1076-1085. and - * K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49, Number 4 (October), 1986, p. 159-164. - */ - template <std::size_t NumQuantiles> - DistributionEstimators<NumQuantiles>& - DistributionEstimators<NumQuantiles>::operator <<(const double sample) { - - // first accumulate num_markers samples - if (_count++ < NumMarkers) { - _heights[_count - 1] = sample; - - if (_count == NumMarkers) - { - std::sort(_heights, _heights + NumMarkers); - } + // update actual positions of all markers above sample_cell index + for (std::size_t i = sample_cell; i < NumMarkers; i++) { + _actual_positions[i]++; } - else { - std::size_t sample_cell = 1; - - // find cell k = sample_cell such that heights[k-1] <= sample < heights[k] - if(sample < _heights[0]) - { - _heights[0] = sample; - sample_cell = 1; - } - else if (sample >= _heights[NumMarkers - 1]) - { - _heights[NumMarkers - 1] = sample; - sample_cell = NumMarkers - 1; - } - else { - double* it = std::upper_bound(_heights, - _heights + NumMarkers, - sample); - sample_cell = std::distance(_heights, it); - } - - // update actual positions of all markers above sample_cell index - for(std::size_t i = sample_cell; i < NumMarkers; i++) { - _actual_positions[i]++; - } - - // update desired positions of all markers - for(std::size_t i = 0; i < NumMarkers; i++) { - _desired_positions[i] += _positions_increments(i); - } + // update desired positions of all markers + for (std::size_t i = 0; i < NumMarkers; i++) { + _desired_positions[i] += _positions_increments(i); + } - // adjust heights and actual positions of markers 1 to num_markers-2 if necessary - for(std::size_t i = 1; i <= NumMarkers - 2; i++) { - // offset to desired position - double d = _desired_positions[i] - _actual_positions[i]; + // adjust heights and actual positions of markers 1 to num_markers-2 if necessary + for (std::size_t i = 1; i <= NumMarkers - 2; i++) { + // offset to desired position + double d = _desired_positions[i] - _actual_positions[i]; - // offset to next position - double dp = _actual_positions[i + 1] - _actual_positions[i]; + // offset to next position + double dp = _actual_positions[i + 1] - _actual_positions[i]; - // offset to previous position - double dm = _actual_positions[i - 1] - _actual_positions[i]; + // offset to previous position + double dm = _actual_positions[i - 1] - _actual_positions[i]; - // height ds - double hp = (_heights[i + 1] - _heights[i]) / dp; - double hm = (_heights[i - 1] - _heights[i]) / dm; + // height ds + double hp = (_heights[i + 1] - _heights[i]) / dp; + double hm = (_heights[i - 1] - _heights[i]) / dm; - if((d >= 1 && dp > 1) || (d <= -1 && dm < -1)) - { - short sign_d = static_cast<short>(d / std::abs(d)); + if ((d >= 1 && dp > 1) || (d <= -1 && dm < -1)) { + short sign_d = static_cast<short>(d / std::abs(d)); - double h = _heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp - + (dp - sign_d) * hm); + double h = + _heights[i] + sign_d / (dp - dm) * ((sign_d - dm) * hp + (dp - sign_d) * hm); - // try adjusting heights[i] using p-squared formula - if(_heights[i - 1] < h && h < _heights[i + 1]) - { - _heights[i] = h; + // try adjusting heights[i] using p-squared formula + if (_heights[i - 1] < h && h < _heights[i + 1]) { + _heights[i] = h; + } else { + // use linear formula + if (d > 0) { + _heights[i] += hp; } - else - { - // use linear formula - if(d > 0) - { - _heights[i] += hp; - } - if(d < 0) - { - _heights[i] -= hm; - } + if (d < 0) { + _heights[i] -= hm; } - _actual_positions[i] += sign_d; } + _actual_positions[i] += sign_d; } } - - return *this; } - template <std::size_t NumQuantiles> - void DistributionEstimators<NumQuantiles>::appendQuantilesToBSONArrayBuilder( - BSONArrayBuilder& arr) const { + return *this; +} - verify(quantilesReady()); - for (std::size_t i = 0; i <= NumQuantiles + 1; i++) { - arr << quantile(i); - } - } - - template <std::size_t NumQuantiles> - inline double DistributionEstimators<NumQuantiles>::_positions_increments(std::size_t i) const { - return static_cast<double>(i) / (2 * (NumQuantiles + 1)); +template <std::size_t NumQuantiles> +void DistributionEstimators<NumQuantiles>::appendQuantilesToBSONArrayBuilder( + BSONArrayBuilder& arr) const { + verify(quantilesReady()); + for (std::size_t i = 0; i <= NumQuantiles + 1; i++) { + arr << quantile(i); } - - template <class Sample, std::size_t NumQuantiles> - BSONObj SummaryEstimators<Sample, NumQuantiles>::statisticSummaryToBSONObj() const { - BSONObjBuilder b; - this->BasicEstimators<Sample>::appendBasicToBSONObjBuilder(b); - if (this->DistributionEstimators<NumQuantiles>::quantilesReady()) { - // Not using appendQuantiles to be explicit about which probability each quantile - // refers to. This way the user does not need to count the quantiles or know in - // advance how many quantiles were computed to figure out their meaning. - BSONObjBuilder quantilesBuilder(b.subobjStart("quantiles")); - for (size_t i = 1; i <= NumQuantiles; i++) { - const double probability = - this->DistributionEstimators<NumQuantiles>::probability(i); - const double quantile = - this->DistributionEstimators<NumQuantiles>::quantile(i); - quantilesBuilder.append(std::string(mongoutils::str::stream() << probability), - quantile); - } - quantilesBuilder.doneFast(); +} + +template <std::size_t NumQuantiles> +inline double DistributionEstimators<NumQuantiles>::_positions_increments(std::size_t i) const { + return static_cast<double>(i) / (2 * (NumQuantiles + 1)); +} + +template <class Sample, std::size_t NumQuantiles> +BSONObj SummaryEstimators<Sample, NumQuantiles>::statisticSummaryToBSONObj() const { + BSONObjBuilder b; + this->BasicEstimators<Sample>::appendBasicToBSONObjBuilder(b); + if (this->DistributionEstimators<NumQuantiles>::quantilesReady()) { + // Not using appendQuantiles to be explicit about which probability each quantile + // refers to. This way the user does not need to count the quantiles or know in + // advance how many quantiles were computed to figure out their meaning. + BSONObjBuilder quantilesBuilder(b.subobjStart("quantiles")); + for (size_t i = 1; i <= NumQuantiles; i++) { + const double probability = this->DistributionEstimators<NumQuantiles>::probability(i); + const double quantile = this->DistributionEstimators<NumQuantiles>::quantile(i); + quantilesBuilder.append(std::string(mongoutils::str::stream() << probability), + quantile); } - return b.obj(); + quantilesBuilder.doneFast(); } + return b.obj(); +} -} // namespace mongo +} // namespace mongo |