summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGil Alon <gil.alon@mongodb.com>2023-04-12 20:40:47 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-12 21:41:15 +0000
commitca85829478462c865010a390793ed8481b5d293a (patch)
tree6ea901bd1505e6899671cc5f29c7630433418dd1
parenta7b4b55a14efd1b59f585148b32b388a526ac99b (diff)
downloadmongo-ca85829478462c865010a390793ed8481b5d293a.tar.gz
SERVER-75191 Improve $percentile bounded window function performance and add benchmarks
-rw-r--r--src/mongo/db/pipeline/SConscript9
-rw-r--r--src/mongo/db/pipeline/window_function/window_function_percentile.h18
-rw-r--r--src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.cpp123
-rw-r--r--src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h72
4 files changed, 216 insertions, 6 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index 9c2a38ea7da..d20c1f0f99c 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -769,6 +769,15 @@ env.Benchmark(
],
)
+env.Benchmark(
+ target='window_function_percentile_bm',
+ source=['window_function/window_function_percentile_bm_fixture.cpp'],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/db/query/query_test_service_context',
+ 'accumulator',
+ ],
+)
+
bmEnv = env.Clone()
bmEnv.InjectThirdParty(libraries=["benchmark"])
diff --git a/src/mongo/db/pipeline/window_function/window_function_percentile.h b/src/mongo/db/pipeline/window_function/window_function_percentile.h
index 8cef109b985..c038d086e6f 100644
--- a/src/mongo/db/pipeline/window_function/window_function_percentile.h
+++ b/src/mongo/db/pipeline/window_function/window_function_percentile.h
@@ -30,7 +30,7 @@
#pragma once
-#include "mongo/db/pipeline/accumulator_percentile.h"
+#include "mongo/db/pipeline/percentile_algo.h"
#include "mongo/db/pipeline/window_function/window_function.h"
namespace mongo {
@@ -70,22 +70,28 @@ public:
protected:
explicit WindowFunctionPercentileCommon(ExpressionContext* const expCtx)
- : WindowFunctionState(expCtx), _values(std::multiset<double>()) {}
+ : WindowFunctionState(expCtx), _values(boost::container::flat_multiset<double>()) {}
Value computePercentile(double p) const {
// Calculate the rank.
const double n = _values.size();
- const double rank = std::max<double>(0, std::ceil(p * n) - 1);
+ const double rank = PercentileAlgorithm::computeTrueRank(n, p);
- // std::multiset stores the values in ascending order, so we don't need to sort them before
- // finding the value at index 'rank'.
+ // boost::container::flat_multiset stores the values in ascending order, so we don't need to
+ // sort them before finding the value at index 'rank'.
+ // boost::container::flat_multiset has random-access iterators, so std::advance has an
+ // expected runtime of O(1).
auto it = _values.begin();
std::advance(it, rank);
return Value(*it);
}
// Holds all the values in the window in ascending order.
- std::multiset<double> _values;
+ // A boost::container::flat_multiset stores elements in a contiguous array, so iterating through
+ // the set is faster than iterating through a std::multiset which stores its elements typically
+ // as a binary search tree. Thus, using a boost::container::flat_multiset significantly improved
+ // performance.
+ boost::container::flat_multiset<double> _values;
};
class WindowFunctionPercentile : public WindowFunctionPercentileCommon {
diff --git a/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.cpp b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.cpp
new file mode 100644
index 00000000000..f116ea91a50
--- /dev/null
+++ b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.cpp
@@ -0,0 +1,123 @@
+/**
+ * Copyright (C) 2023-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include <boost/random/normal_distribution.hpp>
+
+#include "mongo/db/pipeline/expression_context_for_test.h"
+#include "mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h"
+
+namespace mongo {
+using std::vector;
+vector<double> generateNormalData(size_t n) {
+ std::mt19937 generator(curTimeMillis64());
+ boost::random::normal_distribution<double> dist(0.0 /* mean */, 1.0 /* sigma */);
+
+ vector<double> inputs;
+ inputs.reserve(n);
+ for (size_t i = 0; i < n; i++) {
+ inputs.push_back(dist(generator));
+ }
+
+ return inputs;
+}
+
+// This benchmark is mimicking the behavior of computing $percentile for a [0, unbounded]
+// window. In a [0, unbounded] window the first window will add all of the inputs in the window
+// function. Then for each following window, the element before the current element will be removed
+// and the percentile will be recalculated.
+void WindowFunctionPercentileBenchmarkFixture::removable_unbounded_percentile(
+ benchmark::State& state, std::vector<double> ps) {
+ // Generate the data.
+ const vector<double> inputs = generateNormalData(dataSizeLarge);
+ auto expCtx = new ExpressionContextForTest();
+
+ // Run the test.
+ for (auto keepRunning : state) {
+ auto w = WindowFunctionPercentile::create(expCtx, ps);
+
+ // Calculate the percentile for a [0, unbounded] window for each input.
+ for (size_t i = 0; i < dataSizeLarge; i++) {
+ // All of the values are in the first window.
+ if (i == 0) {
+ for (double input : inputs) {
+ w->add(Value(input));
+ }
+ benchmark::DoNotOptimize(w->getValue());
+ } else {
+ // Remove the previous value for the next window.
+ double valToRemove = inputs[i - 1];
+ w->remove(Value(valToRemove));
+ benchmark::DoNotOptimize(w->getValue());
+ }
+ }
+ benchmark::ClobberMemory();
+ }
+}
+
+// This benchmark is mimicking the behavior of computing $percentile for a ["current", 100]
+// window. In a ["current", 100] window, the first window will add itself and the next 100 elements
+// in 'inputs' to the window function. Then for each following window, the previous current element
+// will be removed, and a new element (100 indexes away from the new current element) will be added.
+// Then the percentile will be recalculated. We will not add any elements if the index is out of
+// bounds, resulting in smaller windows towards the end of 'inputs'.
+void WindowFunctionPercentileBenchmarkFixture::removable_bounded_percentile(
+ benchmark::State& state, std::vector<double> ps) {
+ // Generate the data.
+ const vector<double> inputs = generateNormalData(dataSizeLarge);
+ auto expCtx = new ExpressionContextForTest();
+
+ // Run the test.
+ for (auto keepRunning : state) {
+ auto w = WindowFunctionPercentile::create(expCtx, ps);
+
+ // Calculate the percentile for a ["current", 100] window for each input.
+ for (size_t i = 0; i < dataSizeLarge; i++) {
+ // Add the first value and the next 100 to the window.
+ if (i == 0) {
+ for (size_t j = 0; j < 101; j++) {
+ w->add(Value(inputs[j]));
+ }
+ benchmark::DoNotOptimize(w->getValue());
+ } else {
+ // Remove the previous current value.
+ double valToRemove = inputs[i - 1];
+ w->remove(Value(valToRemove));
+ // If possible, add the new value.
+ if (i + 100 < dataSizeLarge - 1) {
+ w->add(Value(inputs[i + 100]));
+ }
+ benchmark::DoNotOptimize(w->getValue());
+ }
+ }
+ benchmark::ClobberMemory();
+ }
+}
+
+BENCHMARK_WINDOW_PERCENTILE(WindowFunctionPercentileBenchmarkFixture);
+} // namespace mongo
diff --git a/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h
new file mode 100644
index 00000000000..b05c6bded9c
--- /dev/null
+++ b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h
@@ -0,0 +1,72 @@
+/**
+ * Copyright (C) 2023-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+#pragma once
+#include <benchmark/benchmark.h>
+
+#include "mongo/db/pipeline/window_function/window_function_percentile.h"
+#include "mongo/platform/basic.h"
+
+namespace mongo {
+
+class WindowFunctionPercentileBenchmarkFixture : public benchmark::Fixture {
+public:
+ void removable_unbounded_percentile(benchmark::State& state, std::vector<double> ps);
+ void removable_bounded_percentile(benchmark::State& state, std::vector<double> ps);
+
+ static constexpr int dataSizeLarge = 100'000;
+};
+
+#define BENCHMARK_WINDOW_PERCENTILE(Fixture) \
+ \
+ BENCHMARK_F(Fixture, percentile_unbounded_low_p)(benchmark::State & state) { \
+ removable_unbounded_percentile(state, {0.001}); \
+ } \
+ BENCHMARK_F(Fixture, percentile_unbounded_high_p)(benchmark::State & state) { \
+ removable_unbounded_percentile(state, {.999}); \
+ } \
+ BENCHMARK_F(Fixture, percentile_unbounded_mid_p)(benchmark::State & state) { \
+ removable_unbounded_percentile(state, {.55}); \
+ } \
+ BENCHMARK_F(Fixture, percentile_unbounded_multi_p)(benchmark::State & state) { \
+ removable_unbounded_percentile(state, {.1, .47, .88, .05, .33, .999, .2, .59, .9, .7}); \
+ } \
+ BENCHMARK_F(Fixture, percentile_bounded_low_p)(benchmark::State & state) { \
+ removable_bounded_percentile(state, {.001}); \
+ } \
+ BENCHMARK_F(Fixture, percentile_bounded_high_p)(benchmark::State & state) { \
+ removable_bounded_percentile(state, {.999}); \
+ } \
+ BENCHMARK_F(Fixture, percentile_bounded_mid_p)(benchmark::State & state) { \
+ removable_bounded_percentile(state, {.55}); \
+ } \
+ BENCHMARK_F(Fixture, percentile_bounded_multi_p)(benchmark::State & state) { \
+ removable_bounded_percentile(state, {.1, .47, .88, .05, .33, .999, .2, .59, .9, .7}); \
+ }
+
+} // namespace mongo