diff options
author | Gil Alon <gil.alon@mongodb.com> | 2023-04-12 20:40:47 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-12 21:41:15 +0000 |
commit | ca85829478462c865010a390793ed8481b5d293a (patch) | |
tree | 6ea901bd1505e6899671cc5f29c7630433418dd1 | |
parent | a7b4b55a14efd1b59f585148b32b388a526ac99b (diff) | |
download | mongo-ca85829478462c865010a390793ed8481b5d293a.tar.gz |
SERVER-75191 Improve $percentile bounded window function performance and add benchmarks
4 files changed, 216 insertions, 6 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 9c2a38ea7da..d20c1f0f99c 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -769,6 +769,15 @@ env.Benchmark( ], ) +env.Benchmark( + target='window_function_percentile_bm', + source=['window_function/window_function_percentile_bm_fixture.cpp'], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/query/query_test_service_context', + 'accumulator', + ], +) + bmEnv = env.Clone() bmEnv.InjectThirdParty(libraries=["benchmark"]) diff --git a/src/mongo/db/pipeline/window_function/window_function_percentile.h b/src/mongo/db/pipeline/window_function/window_function_percentile.h index 8cef109b985..c038d086e6f 100644 --- a/src/mongo/db/pipeline/window_function/window_function_percentile.h +++ b/src/mongo/db/pipeline/window_function/window_function_percentile.h @@ -30,7 +30,7 @@ #pragma once -#include "mongo/db/pipeline/accumulator_percentile.h" +#include "mongo/db/pipeline/percentile_algo.h" #include "mongo/db/pipeline/window_function/window_function.h" namespace mongo { @@ -70,22 +70,28 @@ public: protected: explicit WindowFunctionPercentileCommon(ExpressionContext* const expCtx) - : WindowFunctionState(expCtx), _values(std::multiset<double>()) {} + : WindowFunctionState(expCtx), _values(boost::container::flat_multiset<double>()) {} Value computePercentile(double p) const { // Calculate the rank. const double n = _values.size(); - const double rank = std::max<double>(0, std::ceil(p * n) - 1); + const double rank = PercentileAlgorithm::computeTrueRank(n, p); - // std::multiset stores the values in ascending order, so we don't need to sort them before - // finding the value at index 'rank'. + // boost::container::flat_multiset stores the values in ascending order, so we don't need to + // sort them before finding the value at index 'rank'. + // boost::container::flat_multiset has random-access iterators, so std::advance has an + // expected runtime of O(1). auto it = _values.begin(); std::advance(it, rank); return Value(*it); } // Holds all the values in the window in ascending order. - std::multiset<double> _values; + // A boost::container::flat_multiset stores elements in a contiguous array, so iterating through + // the set is faster than iterating through a std::multiset which stores its elements typically + // as a binary search tree. Thus, using a boost::container::flat_multiset significantly improved + // performance. + boost::container::flat_multiset<double> _values; }; class WindowFunctionPercentile : public WindowFunctionPercentileCommon { diff --git a/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.cpp b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.cpp new file mode 100644 index 00000000000..f116ea91a50 --- /dev/null +++ b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.cpp @@ -0,0 +1,123 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <boost/random/normal_distribution.hpp> + +#include "mongo/db/pipeline/expression_context_for_test.h" +#include "mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h" + +namespace mongo { +using std::vector; +vector<double> generateNormalData(size_t n) { + std::mt19937 generator(curTimeMillis64()); + boost::random::normal_distribution<double> dist(0.0 /* mean */, 1.0 /* sigma */); + + vector<double> inputs; + inputs.reserve(n); + for (size_t i = 0; i < n; i++) { + inputs.push_back(dist(generator)); + } + + return inputs; +} + +// This benchmark is mimicking the behavior of computing $percentile for a [0, unbounded] +// window. In a [0, unbounded] window the first window will add all of the inputs in the window +// function. Then for each following window, the element before the current element will be removed +// and the percentile will be recalculated. +void WindowFunctionPercentileBenchmarkFixture::removable_unbounded_percentile( + benchmark::State& state, std::vector<double> ps) { + // Generate the data. + const vector<double> inputs = generateNormalData(dataSizeLarge); + auto expCtx = new ExpressionContextForTest(); + + // Run the test. + for (auto keepRunning : state) { + auto w = WindowFunctionPercentile::create(expCtx, ps); + + // Calculate the percentile for a [0, unbounded] window for each input. + for (size_t i = 0; i < dataSizeLarge; i++) { + // All of the values are in the first window. + if (i == 0) { + for (double input : inputs) { + w->add(Value(input)); + } + benchmark::DoNotOptimize(w->getValue()); + } else { + // Remove the previous value for the next window. + double valToRemove = inputs[i - 1]; + w->remove(Value(valToRemove)); + benchmark::DoNotOptimize(w->getValue()); + } + } + benchmark::ClobberMemory(); + } +} + +// This benchmark is mimicking the behavior of computing $percentile for a ["current", 100] +// window. In a ["current", 100] window, the first window will add itself and the next 100 elements +// in 'inputs' to the window function. Then for each following window, the previous current element +// will be removed, and a new element (100 indexes away from the new current element) will be added. +// Then the percentile will be recalculated. We will not add any elements if the index is out of +// bounds, resulting in smaller windows towards the end of 'inputs'. +void WindowFunctionPercentileBenchmarkFixture::removable_bounded_percentile( + benchmark::State& state, std::vector<double> ps) { + // Generate the data. + const vector<double> inputs = generateNormalData(dataSizeLarge); + auto expCtx = new ExpressionContextForTest(); + + // Run the test. + for (auto keepRunning : state) { + auto w = WindowFunctionPercentile::create(expCtx, ps); + + // Calculate the percentile for a ["current", 100] window for each input. + for (size_t i = 0; i < dataSizeLarge; i++) { + // Add the first value and the next 100 to the window. + if (i == 0) { + for (size_t j = 0; j < 101; j++) { + w->add(Value(inputs[j])); + } + benchmark::DoNotOptimize(w->getValue()); + } else { + // Remove the previous current value. + double valToRemove = inputs[i - 1]; + w->remove(Value(valToRemove)); + // If possible, add the new value. + if (i + 100 < dataSizeLarge - 1) { + w->add(Value(inputs[i + 100])); + } + benchmark::DoNotOptimize(w->getValue()); + } + } + benchmark::ClobberMemory(); + } +} + +BENCHMARK_WINDOW_PERCENTILE(WindowFunctionPercentileBenchmarkFixture); +} // namespace mongo diff --git a/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h new file mode 100644 index 00000000000..b05c6bded9c --- /dev/null +++ b/src/mongo/db/pipeline/window_function/window_function_percentile_bm_fixture.h @@ -0,0 +1,72 @@ +/** + * Copyright (C) 2023-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ +#pragma once +#include <benchmark/benchmark.h> + +#include "mongo/db/pipeline/window_function/window_function_percentile.h" +#include "mongo/platform/basic.h" + +namespace mongo { + +class WindowFunctionPercentileBenchmarkFixture : public benchmark::Fixture { +public: + void removable_unbounded_percentile(benchmark::State& state, std::vector<double> ps); + void removable_bounded_percentile(benchmark::State& state, std::vector<double> ps); + + static constexpr int dataSizeLarge = 100'000; +}; + +#define BENCHMARK_WINDOW_PERCENTILE(Fixture) \ + \ + BENCHMARK_F(Fixture, percentile_unbounded_low_p)(benchmark::State & state) { \ + removable_unbounded_percentile(state, {0.001}); \ + } \ + BENCHMARK_F(Fixture, percentile_unbounded_high_p)(benchmark::State & state) { \ + removable_unbounded_percentile(state, {.999}); \ + } \ + BENCHMARK_F(Fixture, percentile_unbounded_mid_p)(benchmark::State & state) { \ + removable_unbounded_percentile(state, {.55}); \ + } \ + BENCHMARK_F(Fixture, percentile_unbounded_multi_p)(benchmark::State & state) { \ + removable_unbounded_percentile(state, {.1, .47, .88, .05, .33, .999, .2, .59, .9, .7}); \ + } \ + BENCHMARK_F(Fixture, percentile_bounded_low_p)(benchmark::State & state) { \ + removable_bounded_percentile(state, {.001}); \ + } \ + BENCHMARK_F(Fixture, percentile_bounded_high_p)(benchmark::State & state) { \ + removable_bounded_percentile(state, {.999}); \ + } \ + BENCHMARK_F(Fixture, percentile_bounded_mid_p)(benchmark::State & state) { \ + removable_bounded_percentile(state, {.55}); \ + } \ + BENCHMARK_F(Fixture, percentile_bounded_multi_p)(benchmark::State & state) { \ + removable_bounded_percentile(state, {.1, .47, .88, .05, .33, .999, .2, .59, .9, .7}); \ + } + +} // namespace mongo |