+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+#pragma once
+#include <algorithm>
+#include <functional>
+#include <utility>
+#include <vector>
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/commands.h"
+#include "mongo/platform/atomic_word.h"
+#include "mongo/util/assert_util.h"
+namespace mongo {
+ * Generic histogram that supports data collection into intervals based on user-specified partitions
+ * over any continuous type. A binary predicate that establishes a strict weak ordering over the
+ * template parameter type `T` may be specified, otherwise `std::less<T>` is used. (read
+ * more here:
+ *
+ * For some provided lowermost partition x and uppermost partition y, a value z will be counted
+ * in the (-inf, x) interval if z < x, and in the [y, inf) interval if z >= y. If no partitions are
+ * provided, z will be counted in the sole (-inf, inf) interval.
+ */
+template <typename T, typename Cmp = std::less<T>>
+class Histogram {
+ struct AtEnd {};
+ explicit Histogram(std::vector<T> partitions, Cmp comparator = {})
+ : _partitions{std::move(partitions)},
+ _counts(_partitions.size() + 1),
+ _comparator{std::move(comparator)} {
+ auto ordered =
+ std::adjacent_find(_partitions.begin(), _partitions.end(), [&](const T& a, const T& b) {
+ return !_comparator(a, b);
+ }) == _partitions.end();
+ if (!ordered) {
+ iasserted(6101800, "Partitions must be strictly monotonically increasing");
+ }
+ }
+ void increment(const T& data) {
+ auto i = std::upper_bound(_partitions.begin(), _partitions.end(), data, _comparator) -
+ _partitions.begin();
+ _counts[i].addAndFetch(1);
+ }
+ const std::vector<T>& getPartitions() const {
+ return _partitions;
+ }
+ std::vector<int64_t> getCounts() const {
+ std::vector<int64_t> r(_counts.size());
+ std::transform(
+ _counts.begin(), _counts.end(), r.begin(), [](auto&& x) { return x.load(); });
+ return r;
+ }
+ /**
+ * An input iterator over the Histogram class that provides access to histogram buckets, each
+ * containing the count, lower and upper bound values. The `lower` data member set to nullptr
+ * signifies the lowermost extremity of the distribution. nullptr similarly represents the
+ * uppermost extremity when assigned to the `upper` data member.
+ */
+ class iterator {
+ public:
+ struct Bucket {
+ int64_t count;
+ const T* lower;
+ const T* upper;
+ };
+ using difference_type = void;
+ using value_type = Bucket;
+ using pointer = const Bucket*;
+ using reference = const Bucket&;
+ using iterator_category = std::input_iterator_tag;
+ explicit iterator(const Histogram* hist) : _h{hist}, _pos{0} {}
+ iterator(const Histogram* hist, AtEnd) : _h{hist}, _pos{_h->_counts.size()} {}
+ reference operator*() const {
+ _b.count = _h->_counts[_pos].load();
+ _b.lower = (_pos == 0) ? nullptr : &_h->_partitions[_pos - 1];
+ _b.upper = (_pos == _h->_counts.size() - 1) ? nullptr : &_h->_partitions[_pos];
+ return _b;
+ }
+ pointer operator->() const {
+ return &**this;
+ }
+ iterator& operator++() {
+ ++_pos;
+ return *this;
+ }
+ iterator operator++(int) {
+ iterator orig = *this;
+ ++*this;
+ return orig;
+ }
+ friend bool operator==(const iterator& a, const iterator& b) {
+ return a._pos == b._pos;
+ }
+ friend bool operator!=(const iterator& a, const iterator& b) {
+ return !(a == b);
+ }
+ private:
+ const Histogram* _h;
+ size_t _pos; // position into _h->_counts
+ mutable Bucket _b;
+ };
+ iterator begin() const {
+ return iterator(this);
+ }
+ iterator end() const {
+ return iterator(this, AtEnd{});
+ }
+ std::vector<T> _partitions;
+ std::vector<AtomicWord<int64_t>> _counts;
+ Cmp _comparator;
+ * Appends data (i.e. count and lower/upper bounds of all buckets) of a histogram to the provided
+ * BSON object builder. `histKey` is used as the field name for the appended BSON object containing
+ * the data.
+ */
+template <typename T>
+void appendHistogram(BSONObjBuilder& bob, const Histogram<T>& hist, const StringData histKey) {
+ BSONObjBuilder histBob(bob.subobjStart(histKey));
+ long long totalCount = 0;
+ using namespace fmt::literals;
+ for (auto&& [count, lower, upper] : hist) {
+ std::string bucketKey = "{}{}, {})"_format(lower ? "[" : "(",
+ lower ? "{}"_format(*lower) : "-inf",
+ upper ? "{}"_format(*upper) : "inf");
+ BSONObjBuilder(histBob.subobjStart(bucketKey))
+ .append("count", static_cast<long long>(count));
+ totalCount += count;
+ }
+ histBob.append("totalCount", totalCount);
+} // namespace mongo
+#include "mongo/util/histogram.h"
+#include <boost/optional.hpp>
+#include "mongo/unittest/assert_that.h"
+#include "mongo/unittest/unittest.h"
+namespace mongo {
+namespace {
+using namespace unittest::match;
+using namespace std::literals;
+class HistogramTest : public unittest::Test {
+ template <typename T>
+ struct BucketSpec {
+ int64_t count;
+ boost::optional<T> lower, upper;
+ friend bool operator==(const BucketSpec& a, const BucketSpec& b) {
+ auto lens = [](auto&& x) { return std::tie(x.count, x.lower, x.upper); };
+ return lens(a) == lens(b);
+ }
+ friend std::ostream& operator<<(std::ostream& os, const BucketSpec& b) {
+ os << "count: " << b.count;
+ if (b.lower)
+ os << ", lower: " << *b.lower;
+ if (b.upper)
+ os << ", upper: " << *b.upper;
+ return os;
+ }
+ };
+ template <typename T>
+ boost::optional<T> ptrToOpt(const T* p) {
+ return p ? boost::optional<T>(*p) : boost::none;
+ }
+ template <typename T>
+ auto snapshot(const Histogram<T>& h) {
+ std::vector<BucketSpec<T>> r;
+ std::transform(h.begin(), h.end(), std::back_inserter(r), [&](auto&& b) {
+ return BucketSpec<T>{b.count, ptrToOpt(b.lower), ptrToOpt(b.upper)};
+ });
+ return r;
+ }
+TEST_F(HistogramTest, CountsIncrementedAndStored) {
+ Histogram<int64_t> hist({0, 5, 8, 12});
+ for (int64_t i = 0; i < 15; ++i)
+ hist.increment(i);
+ std::vector<BucketSpec<int64_t>> expected = {
+ {0, {}, 0}, {5, 0, 5}, {3, 5, 8}, {4, 8, 12}, {3, 12, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+TEST_F(HistogramTest, CountsIncrementedInSmallestBucket) {
+ Histogram<int64_t> hist({5, 8, 12});
+ for (int64_t i = 0; i < 5; ++i)
+ hist.increment(i);
+ std::vector<BucketSpec<int64_t>> expected = {{5, {}, 5}, {0, 5, 8}, {0, 8, 12}, {0, 12, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+TEST_F(HistogramTest, CountsIncrementedAtPartition) {
+ std::vector<int64_t> origPartitions = {5, 8, 12};
+ Histogram<int64_t> hist(origPartitions);
+ for (auto& p : origPartitions)
+ hist.increment(p);
+ std::vector<BucketSpec<int64_t>> expected = {{0, {}, 5}, {1, 5, 8}, {1, 8, 12}, {1, 12, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+TEST_F(HistogramTest, NegativeValuesIncrementBuckets) {
+ Histogram<int64_t> hist({-12, -8, 5});
+ for (int64_t i = -15; i < 10; ++i)
+ hist.increment(i);
+ std::vector<BucketSpec<int64_t>> expected = {
+ {3, {}, -12}, {4, -12, -8}, {13, -8, 5}, {5, 5, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+TEST_F(HistogramTest, DurationCountsIncrementedAndStored) {
+ Histogram<Milliseconds> hist(
+ {Milliseconds{0}, Milliseconds{5}, Milliseconds{8}, Milliseconds{12}});
+ for (int64_t i = 0; i < 15; ++i)
+ hist.increment(Milliseconds{i});
+ std::vector<BucketSpec<Milliseconds>> expected = {{0, {}, Milliseconds{0}},
+ {5, Milliseconds{0}, Milliseconds{5}},
+ {3, Milliseconds{5}, Milliseconds{8}},
+ {4, Milliseconds{8}, Milliseconds{12}},
+ {3, Milliseconds{12}, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+TEST_F(HistogramTest, StringCountsIncrementedAndStoredByLength) {
+ Histogram<std::string> hist({"", "aa", "aaaaa", "aaaaaaaaa"});
+ for (int64_t i = 0; i < 12; ++i)
+ hist.increment(std::string(i, 'a'));
+ std::vector<BucketSpec<std::string>> expected = {{0, {}, ""s},
+ {2, ""s, "aa"s},
+ {3, "aa"s, "aaaaa"s},
+ {4, "aaaaa"s, "aaaaaaaaa"s},
+ {3, "aaaaaaaaa"s, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+TEST_F(HistogramTest, StringCountsIncrementedAndStoredByChar) {
+ Histogram<std::string> hist({"a", "h", "r", "z"});
+ for (char c = 'a'; c < 'a' + 25; ++c) {
+ hist.increment(std::string{c});
+ }
+ std::vector<BucketSpec<std::string>> expected = {
+ {0, {}, "a"s}, {7, "a"s, "h"s}, {10, "h"s, "r"s}, {8, "r"s, "z"s}, {0, "z"s, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+TEST_F(HistogramTest, SizeTCountsIncrementedAndStored) {
+ Histogram<size_t> hist({0, 5, 8, 12});
+ for (size_t i = 0; i < 15; ++i)
+ hist.increment(i);
+ std::vector<BucketSpec<size_t>> expected = {
+ {0, {}, 0}, {5, 0, 5}, {3, 5, 8}, {4, 8, 12}, {3, 12, {}}};
+ ASSERT_THAT(snapshot(hist), Eq(expected));
+} // namespace
+} // namespace mongo
-#pragma once
-#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/db/commands.h"
-#include <vector>
-namespace mongo {
- * Generalized version of OperationLatencyHistogram that can track latencies for any operation type
- * with custom lower bounds. For some provided lower bounds {x,y} a number z will be counted in the
- * x-y bucket if z ∈ [x, y). in the y-inf bucket if z ∈ [y, inf). and in the (-inf, x) bucket if z <
- * x.
- */
-template <std::size_t numLowerBounds>
-class IntegerHistogram {
- const std::string kKey;
- IntegerHistogram(std::string key, std::array<int64_t, numLowerBounds> lowerBounds)
- : kKey(std::move(key)) {
- invariant(!lowerBounds.empty(), "Lower bounds must not be empty");
- _lowerBoundBuckets[0].lowerBound = std::numeric_limits<int64_t>::min();
- int64_t prevVal = std::numeric_limits<int64_t>::min();
- for (size_t i = 1; i < _lowerBoundBuckets.size(); ++i) {
- auto lowerBoundVal = - 1);
- invariant(lowerBoundVal > prevVal,
- "Lower bounds must be strictly monotonically increasing");
- _lowerBoundBuckets[i].lowerBound = lowerBoundVal;
- prevVal = lowerBoundVal;
- }
- }
- void append(BSONObjBuilder& builder, bool shouldAppendAdditionalInfo) const {
- BSONObjBuilder histogramBuilder(builder.subobjStart(kKey));
- auto offsetToString = [this](size_t offset) {
- if (offset == 0)
- return std::string("-inf");
- if (offset < _lowerBoundBuckets.size())
- return std::to_string(_lowerBoundBuckets[offset].lowerBound);
- return std::string("inf");
- };
- for (size_t i = 0; i < _lowerBoundBuckets.size(); i++) {
- auto count = _lowerBoundBuckets[i].count.load();
- if (count == 0)
- continue;
- auto key = fmt::format("{} - {}", offsetToString(i), offsetToString(i + 1));
- BSONObjBuilder entryBuilder(histogramBuilder.subobjStart(key));
- entryBuilder.append("count", (long long)(count));
- entryBuilder.doneFast();
- }
- auto totalCount = _entryCount.load();
- histogramBuilder.append("ops", (long long)totalCount);
- if (shouldAppendAdditionalInfo && totalCount != 0) {
- auto sum = _sum.load();
- histogramBuilder.append("sum", (long long)sum);
- histogramBuilder.append("mean", static_cast<double>(sum) / totalCount);
- }
- histogramBuilder.doneFast();
- }
- void increment(int64_t data) {
- auto insertionIndex = std::upper_bound(_lowerBoundBuckets.begin(),
- _lowerBoundBuckets.end(),
- data,
- [](const int64_t a, const LowerBoundBucket& b) {
- return a < b.lowerBound;
- }) -
- 1;
- insertionIndex->count.addAndFetch(1);
- _entryCount.addAndFetch(1);
- _sum.addAndFetch(data);
- }
- struct LowerBoundBucket {
- int64_t lowerBound;
- AtomicWord<int64_t> count;
- };
- std::array<LowerBoundBucket, numLowerBounds + 1> _lowerBoundBuckets;
- AtomicWord<int64_t> _entryCount;
- AtomicWord<int64_t> _sum;
-} // namespace mongo
-#include "mongo/platform/basic.h"
-#include "mongo/unittest/death_test.h"
-#include "mongo/unittest/unittest.h"
-#include "mongo/util/integer_histogram.h"
-namespace mongo {
-namespace {
-TEST(IntegerHistogram, EnsureCountsIncrementedAndStored) {
- std::array<int64_t, 4> lowerBounds{0, 5, 8, 12};
- IntegerHistogram<4> hist("testKey", lowerBounds);
- int64_t sum = 0;
- int64_t numInserts = 15;
- for (int64_t i = 0; i < numInserts; i++) {
- hist.increment(i);
- sum += i;
- }
- auto out = [&] {
- BSONObjBuilder builder;
- hist.append(builder, true);
- return builder.obj();
- }();
- auto buckets = out["testKey"];
- ASSERT_EQUALS(buckets["0 - 5"]["count"].Long(), 5);
- ASSERT_EQUALS(buckets["5 - 8"]["count"].Long(), 3);
- ASSERT_EQUALS(buckets["8 - 12"]["count"].Long(), 4);
- ASSERT_EQUALS(buckets["12 - inf"]["count"].Long(), 3);
- ASSERT_EQUALS(buckets["ops"].Long(), numInserts);
- ASSERT_EQUALS(buckets["sum"].Long(), sum);
- ASSERT_EQUALS(buckets["mean"].Double(), static_cast<double>(sum) / numInserts);
-TEST(IntegerHistogram, EnsureCountsIncrementedInSmallestBucket) {
- std::array<int64_t, 3> lowerBounds{5, 8, 12};
- IntegerHistogram<3> hist("testKey2", lowerBounds);
- int64_t sum = 0;
- int64_t numInserts = 5;
- for (int64_t i = 0; i < numInserts; i++) {
- hist.increment(i);
- sum += i;
- }
- auto out = [&] {
- BSONObjBuilder builder;
- hist.append(builder, true);
- return builder.obj();
- }();
- auto buckets = out["testKey2"];
- ASSERT_EQUALS(buckets["-inf - 5"]["count"].Long(), 5);
- ASSERT_EQUALS(buckets["ops"].Long(), numInserts);
- ASSERT_EQUALS(buckets["sum"].Long(), sum);
- ASSERT_EQUALS(buckets["mean"].Double(), static_cast<double>(sum) / numInserts);
-TEST(IntegerHistogram, EnsureCountsCorrectlyIncrementedAtBoundary) {
- std::array<int64_t, 3> lowerBounds{5, 8, 12};
- IntegerHistogram<3> hist("testKey3", lowerBounds);
- int64_t sum = 0;
- int64_t numInserts = 3;
- for (auto& boundary : lowerBounds) {
- hist.increment(boundary);
- sum += boundary;
- }
- auto out = [&] {
- BSONObjBuilder builder;
- hist.append(builder, true);
- return builder.obj();
- }();
- auto buckets = out["testKey3"];
- ASSERT_EQUALS(buckets["5 - 8"]["count"].Long(), 1);
- ASSERT_EQUALS(buckets["8 - 12"]["count"].Long(), 1);
- ASSERT_EQUALS(buckets["12 - inf"]["count"].Long(), 1);
- ASSERT_EQUALS(buckets["ops"].Long(), numInserts);
- ASSERT_EQUALS(buckets["sum"].Long(), sum);
- ASSERT_EQUALS(buckets["mean"].Double(), static_cast<double>(sum) / numInserts);
-TEST(IntegerHistogram, EnsureNegativeCountsIncrementBucketsCorrectly) {
- std::array<int64_t, 3> lowerBounds{-12, -8, 5};
- IntegerHistogram<3> hist("testKey4", lowerBounds);
- int64_t sum = 0;
- int64_t numInserts = 25;
- for (int64_t i = -15; i < 10; i++) {
- hist.increment(i);
- sum += i;
- }
- auto out = [&] {
- BSONObjBuilder builder;
- hist.append(builder, true);
- return builder.obj();
- }();
- auto buckets = out["testKey4"];
- ASSERT_EQUALS(buckets["-inf - -12"]["count"].Long(), 3);
- ASSERT_EQUALS(buckets["-12 - -8"]["count"].Long(), 4);
- ASSERT_EQUALS(buckets["-8 - 5"]["count"].Long(), 13);
- ASSERT_EQUALS(buckets["5 - inf"]["count"].Long(), 5);
- ASSERT_EQUALS(buckets["ops"].Long(), numInserts);
- ASSERT_EQUALS(buckets["sum"].Long(), sum);
- ASSERT_EQUALS(buckets["mean"].Double(), static_cast<double>(sum) / numInserts);
-TEST(IntegerHistogram, SkipsEmptyBuckets) {
- std::array<int64_t, 2> lowerBounds{0, 5};
- IntegerHistogram<2> hist("testKey6", lowerBounds);
- hist.increment(6);
- auto out = [&] {
- BSONObjBuilder builder;
- hist.append(builder, true);
- return builder.obj();
- }();
- auto buckets = out["testKey6"];
- ASSERT_THROWS(buckets["0 - 5"]["count"].Long(), DBException);
- ASSERT_EQ(buckets["5 - inf"]["count"].Long(), 1);
- FailIfFirstLowerBoundIsMin,
- "Lower bounds must be strictly monotonically increasing") {
- std::array<int64_t, 2> lowerBounds{std::numeric_limits<int64_t>::min(), 5};
- IntegerHistogram<2> hist("testKey5", lowerBounds);
- FailsWhenLowerBoundNotMonotonic,
- "Lower bounds must be strictly monotonically increasing") {
- std::array<int64_t, 2> lowerBounds{5, 0};
- IntegerHistogram<2>("testKey7", lowerBounds);
-} // namespace
-} // namespace mongo