From 9849b9f4a485c91b812ec1d068e610824cb1105e Mon Sep 17 00:00:00 2001 From: David Percy Date: Tue, 9 Feb 2021 19:11:10 +0000 Subject: SERVER-53714 Add removable $min/$max window functions --- .../db/exec/document_value/value_comparator.h | 9 ++ src/mongo/db/pipeline/SConscript | 1 + src/mongo/db/pipeline/accumulator.h | 10 +- src/mongo/db/pipeline/accumulator_min_max.cpp | 2 +- .../db/pipeline/window_function/window_function.h | 99 +++++++++++++++ .../window_function_min_max_test.cpp | 136 +++++++++++++++++++++ 6 files changed, 252 insertions(+), 5 deletions(-) create mode 100644 src/mongo/db/pipeline/window_function/window_function.h create mode 100644 src/mongo/db/pipeline/window_function/window_function_min_max_test.cpp diff --git a/src/mongo/db/exec/document_value/value_comparator.h b/src/mongo/db/exec/document_value/value_comparator.h index 0ab008df31d..85858802f16 100644 --- a/src/mongo/db/exec/document_value/value_comparator.h +++ b/src/mongo/db/exec/document_value/value_comparator.h @@ -157,6 +157,14 @@ public: return std::set(LessThan(this)); } + /** + * Construct an empty ordered multiset of Values whose ordering and equivalence classes are + * given by this comparator. This comparator must outlive the returned set. + */ + std::multiset makeOrderedValueMultiset() const { + return std::multiset(LessThan(this)); + } + /** * Construct an empty unordered set of Values whose equivalence classes are given by this * comparator. This comparator must outlive the returned set. @@ -192,6 +200,7 @@ private: // using ValueSet = std::set; +using ValueMultiset = std::multiset; using ValueUnorderedSet = stdx::unordered_set; diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index acbd901c1b9..86e4a2201e5 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -422,6 +422,7 @@ env.CppUnitTest( 'tee_buffer_test.cpp', 'window_function/partition_iterator_test.cpp', 'window_function/window_function_exec_test.cpp', + 'window_function/window_function_min_max_test.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', diff --git a/src/mongo/db/pipeline/accumulator.h b/src/mongo/db/pipeline/accumulator.h index c162c786874..bf5f7e88b2c 100644 --- a/src/mongo/db/pipeline/accumulator.h +++ b/src/mongo/db/pipeline/accumulator.h @@ -252,8 +252,8 @@ private: class AccumulatorMinMax : public AccumulatorState { public: enum Sense : int { - MIN = 1, - MAX = -1, // Used to "scale" comparison. + kMin = 1, + kMax = -1, // Used to "scale" comparison. }; AccumulatorMinMax(ExpressionContext* const expCtx, Sense sense); @@ -278,13 +278,15 @@ private: class AccumulatorMax final : public AccumulatorMinMax { public: - explicit AccumulatorMax(ExpressionContext* const expCtx) : AccumulatorMinMax(expCtx, MAX) {} + explicit AccumulatorMax(ExpressionContext* const expCtx) + : AccumulatorMinMax(expCtx, Sense::kMax) {} static boost::intrusive_ptr create(ExpressionContext* const expCtx); }; class AccumulatorMin final : public AccumulatorMinMax { public: - explicit AccumulatorMin(ExpressionContext* const expCtx) : AccumulatorMinMax(expCtx, MIN) {} + explicit AccumulatorMin(ExpressionContext* const expCtx) + : AccumulatorMinMax(expCtx, Sense::kMin) {} static boost::intrusive_ptr create(ExpressionContext* const expCtx); }; diff --git a/src/mongo/db/pipeline/accumulator_min_max.cpp b/src/mongo/db/pipeline/accumulator_min_max.cpp index 25759c2a422..3326b9b8450 100644 --- a/src/mongo/db/pipeline/accumulator_min_max.cpp +++ b/src/mongo/db/pipeline/accumulator_min_max.cpp @@ -48,7 +48,7 @@ REGISTER_WINDOW_FUNCTION(max, window_function::ExpressionFromAccumulator::parse) REGISTER_WINDOW_FUNCTION(min, window_function::ExpressionFromAccumulator::parse); const char* AccumulatorMinMax::getOpName() const { - if (_sense == 1) + if (_sense == Sense::kMin) return "$min"; return "$max"; } diff --git a/src/mongo/db/pipeline/window_function/window_function.h b/src/mongo/db/pipeline/window_function/window_function.h new file mode 100644 index 00000000000..fedfbce1f4a --- /dev/null +++ b/src/mongo/db/pipeline/window_function/window_function.h @@ -0,0 +1,99 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/pipeline/accumulator.h" +#include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/expression.h" + +namespace mongo { + +/** + * A WindowFunctionState is a mutable, removable accumulator. + * + * Implementations must ensure that 'remove()' undoes 'add()' when called in FIFO order. + * For example: + * 'add(x); add(y); remove(x)' == 'add(y)' + * 'add(a); add(b); add(z); remove(a); remove(b)' == 'add(z)' + */ +class WindowFunctionState { +public: + virtual void add(Value) = 0; + virtual void remove(Value) = 0; + virtual Value getValue() const = 0; +}; + + +template +class WindowFunctionMinMax : public WindowFunctionState { +public: + static Value getDefault() { + return Value{BSONNULL}; + }; + + /** + * The comparator must outlive the constructed WindowFunctionMinMax. + */ + explicit WindowFunctionMinMax(const ValueComparator& cmp) + : _values(cmp.makeOrderedValueMultiset()) {} + + void add(Value value) final { + _values.insert(std::move(value)); + } + + void remove(Value value) final { + // std::multiset::insert is guaranteed to put the element after any equal elements + // already in the container. So find() / erase() will remove the oldest equal element, + // which is what we want, to satisfy "remove() undoes add() when called in FIFO order". + auto iter = _values.find(std::move(value)); + tassert(5371400, "Can't remove from an empty WindowFunctionMinMax", iter != _values.end()); + _values.erase(iter); + } + + Value getValue() const final { + if (_values.empty()) + return getDefault(); + switch (sense) { + case AccumulatorMinMax::Sense::kMin: + return *_values.begin(); + case AccumulatorMinMax::Sense::kMax: + return *_values.rbegin(); + } + MONGO_UNREACHABLE_TASSERT(5371401); + } + +protected: + // Holds all the values in the window, in order, with constant-time access to both ends. + ValueMultiset _values; +}; +using WindowFunctionMin = WindowFunctionMinMax; +using WindowFunctionMax = WindowFunctionMinMax; + +} // namespace mongo diff --git a/src/mongo/db/pipeline/window_function/window_function_min_max_test.cpp b/src/mongo/db/pipeline/window_function/window_function_min_max_test.cpp new file mode 100644 index 00000000000..4dd249c08dd --- /dev/null +++ b/src/mongo/db/pipeline/window_function/window_function_min_max_test.cpp @@ -0,0 +1,136 @@ +/** + * Copyright (C) 2021-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/exec/document_value/document_value_test_util.h" +#include "mongo/db/pipeline/window_function/window_function.h" +#include "mongo/db/query/collation/collator_interface_mock.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace { + +class WindowFunctionMinMaxTest : public unittest::Test { +public: + WindowFunctionMinMaxTest() + : collator(CollatorInterfaceMock::MockType::kToLowerString), + cmp(&collator), + min(cmp), + max(cmp) {} + + CollatorInterfaceMock collator; + ValueComparator cmp; + WindowFunctionMin min; + WindowFunctionMax max; +}; + +TEST_F(WindowFunctionMinMaxTest, EmptyWindow) { + ASSERT_VALUE_EQ(min.getValue(), Value{BSONNULL}); + + ASSERT_VALUE_EQ(max.getValue(), Value{BSONNULL}); +} + +TEST_F(WindowFunctionMinMaxTest, SingletonWindow) { + min.add(Value{5}); + ASSERT_VALUE_EQ(min.getValue(), Value{5}); + + max.add(Value{7}); + ASSERT_VALUE_EQ(max.getValue(), Value{7}); +} + +TEST_F(WindowFunctionMinMaxTest, SmallWindow) { + min.add(Value{5}); + min.add(Value{2}); + min.add(Value{10}); + min.add(Value{3}); + ASSERT_VALUE_EQ(min.getValue(), Value{2}); + + max.add(Value{7}); + max.add(Value{1}); + max.add(Value{8}); + max.add(Value{4}); + ASSERT_VALUE_EQ(max.getValue(), Value{8}); +} + +TEST_F(WindowFunctionMinMaxTest, Removal) { + min.add(Value{5}); + min.add(Value{2}); + min.add(Value{10}); + min.add(Value{3}); + ASSERT_VALUE_EQ(min.getValue(), Value{2}); + + min.remove(Value{5}); + ASSERT_VALUE_EQ(min.getValue(), Value{2}); + + min.remove(Value{2}); + ASSERT_VALUE_EQ(min.getValue(), Value{3}); +} + +TEST_F(WindowFunctionMinMaxTest, Duplicates) { + min.add(Value{2}); + min.add(Value{2}); + min.add(Value{99}); + min.add(Value{77}); + ASSERT_VALUE_EQ(min.getValue(), Value{2}); + + // Removing one instance of the min isn't enough. + min.remove(Value{2}); + ASSERT_VALUE_EQ(min.getValue(), Value{2}); + + // The min changes only once all instances are removed. + min.remove(Value{2}); + ASSERT_VALUE_EQ(min.getValue(), Value{77}); +} + +TEST_F(WindowFunctionMinMaxTest, Ties) { + // When two elements tie (compare equal), remove() can't pick an arbitrary one, + // because that would break the invariant that 'add(x); add(y); remove(x)' is equivalent to + // 'add(y)'. + + auto x = Value{"foo"_sd}; + auto y = Value{"FOO"_sd}; + // x and y are distinguishable, + ASSERT_VALUE_NE(x, y); + // but they compare equal according to the ordering. + ASSERT(cmp.evaluate(x == y)); + + min.add(x); + min.add(y); + min.remove(x); + ASSERT_VALUE_EQ(min.getValue(), y); + + max.add(x); + max.add(y); + max.remove(x); + ASSERT_VALUE_EQ(max.getValue(), y); +} + +} // namespace +} // namespace mongo -- cgit v1.2.1