From 2b56c5ce4527403329bc60ee406a0f1a7de3f10a Mon Sep 17 00:00:00 2001 From: Benjamin Murphy Date: Fri, 12 Feb 2016 16:20:51 -0500 Subject: SERVER-20506 Conditionally order match and unwind. --- src/mongo/db/matcher/expression_algo.cpp | 156 +++++++++++++++++++ src/mongo/db/matcher/expression_algo.h | 30 ++++ src/mongo/db/matcher/expression_algo_test.cpp | 186 +++++++++++++++++++++++ src/mongo/db/matcher/expression_tree.h | 6 + src/mongo/db/pipeline/SConscript | 1 + src/mongo/db/pipeline/document_source.h | 21 +++ src/mongo/db/pipeline/document_source_match.cpp | 52 ++++++- src/mongo/db/pipeline/document_source_test.cpp | 3 + src/mongo/db/pipeline/document_source_unwind.cpp | 35 +++++ src/mongo/db/pipeline/pipeline_test.cpp | 90 +++++++++++ 10 files changed, 577 insertions(+), 3 deletions(-) diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index a8fb260b7d0..79059c204d1 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -30,11 +30,16 @@ #include "mongo/platform/basic.h" +#include "mongo/base/checked_cast.h" #include "mongo/db/matcher/expression.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_tree.h" namespace mongo { + +using std::unique_ptr; + namespace { bool isComparisonMatchExpression(const MatchExpression* expr) { @@ -219,6 +224,80 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) { } } +/** + * Returns whether the leaf is a $elemMatch expression. + */ +bool isElemMatch(const MatchExpression& expr) { + return expr.matchType() == MatchExpression::ELEM_MATCH_OBJECT || + expr.matchType() == MatchExpression::ELEM_MATCH_VALUE; +} + +/** + * Returns whether 'first' is a prefix of 'second'. + */ +bool isPathPrefixOf(const StringData& first, const StringData& second) { + if (first == second) { + return true; + } + + if (first.size() >= second.size()) { + return false; + } + + return second.startsWith(first) && second[first.size()] == '.'; +} + +/** + * Returns whether the leaf at 'path' is independent of 'fields'. + */ +bool isLeafIndependentOf(const StringData& path, const std::set& fields) { + // For each field in 'fields', we need to check if that field is a prefix of 'path' or if 'path' + // is a prefix of that field. For example, the expression {a.b: {c: 1}} is not independent of + // 'a.b.c', and and the expression {a.b.c.d: 1} is not independent of 'a.b.c'. + for (StringData field : fields) { + if (isPathPrefixOf(path, field) || isPathPrefixOf(field, path)) { + return false; + } + } + return true; +} + +/** + * Creates a MatchExpression that is equivalent to {$and: [children[0], children[1]...]}. + */ +unique_ptr createAndOfNodes(std::vector>* children) { + if (children->empty()) { + return nullptr; + } + + if (children->size() == 1) { + return std::move(children->at(0)); + } + + unique_ptr splitAnd = stdx::make_unique(); + for (auto&& expr : *children) { + splitAnd->add(expr.release()); + } + + return std::move(splitAnd); +} + +/** + * Creates a MatchExpression that is equivalent to {$nor: [children[0], children[1]...]}. + */ +unique_ptr createNorOfNodes(std::vector>* children) { + if (children->empty()) { + return nullptr; + } + + unique_ptr splitNor = stdx::make_unique(); + for (auto&& expr : *children) { + splitNor->add(expr.release()); + } + + return std::move(splitNor); +} + } // namespace namespace expression { @@ -272,5 +351,82 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) { return false; } +bool isIndependentOf(const MatchExpression& expr, const std::set& pathSet) { + if (expr.isLogical()) { + // Any logical expression is independent of 'pathSet' if all its children are independent of + // 'pathSet'. + for (size_t i = 0; i < expr.numChildren(); i++) { + if (!isIndependentOf(*expr.getChild(i), pathSet)) { + return false; + } + } + return true; + } + + // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to + // determine if it is independent or not, and instead just return false. + return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet); +} + +std::pair, unique_ptr> splitMatchExpressionBy( + unique_ptr expr, const std::set& fields) { + if (isIndependentOf(*expr, fields)) { + // 'expr' does not depend upon 'fields', so it can be completely moved. + return {std::move(expr), nullptr}; + } + if (!expr->isLogical()) { + // 'expr' is a leaf, and was not independent of 'fields'. + return {nullptr, std::move(expr)}; + } + + std::vector> reliant; + std::vector> separate; + + switch (expr->matchType()) { + case MatchExpression::AND: { + auto andExpr = checked_cast(expr.get()); + for (size_t i = 0; i < andExpr->numChildren(); i++) { + auto children = splitMatchExpressionBy(andExpr->releaseChild(i), fields); + + invariant(children.first || children.second); + + if (children.first) { + separate.push_back(std::move(children.first)); + } + if (children.second) { + reliant.push_back(std::move(children.second)); + } + } + return {createAndOfNodes(&separate), createAndOfNodes(&reliant)}; + } + case MatchExpression::NOR: { + // We can split a $nor because !(x | y) is logically equivalent to !x & !y. + + // However, we cannot split each child individually; instead, we must look for a wholly + // independent child to split off by itself. As an example of why, with 'b' in + // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not + // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and + // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b + // is equal to 1. + auto norExpr = checked_cast(expr.get()); + for (size_t i = 0; i < norExpr->numChildren(); i++) { + auto child = norExpr->releaseChild(i); + if (isIndependentOf(*child, fields)) { + separate.push_back(std::move(child)); + } else { + reliant.push_back(std::move(child)); + } + } + return {createNorOfNodes(&separate), createNorOfNodes(&reliant)}; + } + case MatchExpression::OR: + case MatchExpression::NOT: { + // If we aren't independent, we can't safely split. + return {nullptr, std::move(expr)}; + } + default: { MONGO_UNREACHABLE; } + } +} + } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index baccf557104..0b377830274 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -59,5 +59,35 @@ namespace expression { */ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs); +/** + * Determine if it is possible to split 'expr' into two MatchExpressions, where one is not + * dependent on any path from 'pathSet', such that applying the two in sequence is equivalent + * to applying 'expr'. + * + * For example, {a: "foo", b: "bar"} is splittable by "b", while + * {$or: [{a: {$eq: "foo"}}, {b: {$eq: "bar"}}]} is not splittable by "b", due to the $or. + */ +bool isSplittableBy(const MatchExpression& expr, const std::set& pathSet); + +/** + * Determine if 'expr' is reliant upon any path from 'pathSet'. + */ +bool isIndependentOf(const MatchExpression& expr, const std::set& pathSet); + +/** + * Attempt to split 'expr' into two MatchExpressions, where the first is not reliant upon any + * path from 'fields', such that applying the matches in sequence is equivalent to applying + * 'expr'. Takes ownership of 'expr'. + * + * If 'expr' cannot be split, returns {nullptr, expr}. If 'expr' is entirely independent of + * 'fields', returns {expr, nullptr}. If 'expr' is partially dependent on 'fields', and partially + * independent, returns {exprLeft, exprRight}, where each new MatchExpression contains a portion of + * 'expr'. + * + * Never returns {nullptr, nullptr}. + */ +std::pair, std::unique_ptr> +splitMatchExpressionBy(std::unique_ptr expr, const std::set& fields); + } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index a9765403df6..8a38ab3be6e 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -38,10 +38,13 @@ #include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/matcher/extensions_callback_disallow_extensions.h" +#include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/platform/decimal128.h" namespace mongo { +using std::unique_ptr; + /** * A MatchExpression does not hold the memory for BSONElements, so use ParsedMatchExpression to * ensure that the BSONObj outlives the MatchExpression. @@ -648,4 +651,187 @@ TEST(ExpressionAlgoIsSubsetOf, Compare_Exists_NE) { ASSERT_TRUE(expression::isSubsetOf(aNotEqualNull.get(), aExists.get())); } +TEST(IsIndependent, AndIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); +} + +TEST(IsIndependent, ElemMatchIsNotIndependent) { + BSONObj matchPredicate = fromjson("{x: {$elemMatch: {y: 1}}}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x.y"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"y"})); +} + +TEST(IsIndependent, NorIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); +} + +TEST(IsIndependent, NotIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr expr = std::move(status.getValue()); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"b"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"})); +} + +TEST(IsIndependent, OrIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); +} + +TEST(IsIndependent, AndWithDottedFieldPathsIsNotIndependent) { + BSONObj matchPredicate = fromjson("{$and: [{'a': 1}, {'a.b': 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b.c"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b"})); +} + +TEST(IsIndependent, BallIsIndependentOfBalloon) { + BSONObj matchPredicate = fromjson("{'a.ball': 4}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr expr = std::move(status.getValue()); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.balloon"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.b"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.ball.c"})); +} + +TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair, unique_ptr> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{a: {$eq: 1}}")); + ASSERT_EQUALS(secondBob.obj(), fromjson("{b: {$eq: 1}}")); +} + +TEST(SplitMatchExpression, NorWithIndependentChildrenIsSplittable) { + BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair, unique_ptr> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{a: {$eq: 1}}]}")); + ASSERT_EQUALS(secondBob.obj(), fromjson("{$nor: [{b: {$eq: 1}}]}")); +} + +TEST(SplitMatchExpression, NotWithIndependentChildIsSplittable) { + BSONObj matchPredicate = fromjson("{x: {$not: {$gt: 4}}}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair, unique_ptr> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{$and: [{x: {$gt: 4}}]}]}")); + ASSERT_FALSE(splitExpr.second); +} + +TEST(SplitMatchExpression, OrWithOnlyIndependentChildrenIsNotSplittable) { + BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair, unique_ptr> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder bob; + splitExpr.second->serialize(&bob); + + ASSERT_FALSE(splitExpr.first); + ASSERT_EQUALS(bob.obj(), fromjson("{$or: [{a: {$eq: 1}}, {b: {$eq: 1}}]}")); +} + +TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) { + BSONObj matchPredicate = fromjson( + "{$and: [{x: {$not: {$size: 2}}}," + "{$or: [{'a.b' : 3}, {'a.b.c': 4}]}," + "{$nor: [{x: {$gt: 4}}, {$and: [{x: {$not: {$eq: 1}}}, {y: 3}]}]}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair, unique_ptr> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{$or: [{'a.b': {$eq: 3}}, {'a.b.c': {$eq: 4}}]}")); + ASSERT_EQUALS(secondBob.obj(), + fromjson( + "{$and: [{$nor: [{$and: [{x: {$size: 2}}]}]}, {$nor: [{x: {$gt: 4}}, {$and: " + "[{$nor: [{$and: [{x: " + "{$eq: 1}}]}]}, {y: {$eq: 3}}]}]}]}")); +} + } // namespace mongo diff --git a/src/mongo/db/matcher/expression_tree.h b/src/mongo/db/matcher/expression_tree.h index 2b65bc9d2f6..50786402a85 100644 --- a/src/mongo/db/matcher/expression_tree.h +++ b/src/mongo/db/matcher/expression_tree.h @@ -65,6 +65,12 @@ public: return _expressions[i]; } + virtual std::unique_ptr releaseChild(size_t i) { + auto child = std::unique_ptr(_expressions[i]); + _expressions[i] = nullptr; + return std::move(child); + } + virtual std::vector* getChildVector() { return &_expressions; } diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index ba97c44e3f8..393079d7378 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -122,6 +122,7 @@ docSourceEnv.Library( 'expression', '$BUILD_DIR/mongo/client/clientdriver', '$BUILD_DIR/mongo/db/matcher/expressions', + '$BUILD_DIR/mongo/db/matcher/expression_algo', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger_customization_hooks', diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index a59dfe6999d..6db0f07db8d 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -665,6 +665,20 @@ public: return _isTextQuery; } + /** + * Attempt to split this $match into two stages, where the first is not dependent upon any path + * from 'fields', and where applying them in sequence is equivalent to applying this stage once. + * + * Will return two intrusive_ptrs to new $match stages, where the first pointer is independent + * of 'fields', and the second is dependent. Either pointer may be null, so be sure to check the + * return value. + * + * For example, {$match: {a: "foo", "b.c": 4}} split by "b" will return pointers to two stages: + * {$match: {a: "foo"}}, and {$match: {"b.c": 4}}. + */ + std::pair, boost::intrusive_ptr> + splitSourceBy(const std::set& fields); + private: DocumentSourceMatch(const BSONObj& query, const boost::intrusive_ptr& pExpCtx); @@ -1284,6 +1298,13 @@ public: GetDepsReturn getDependencies(DepsTracker* deps) const final; + /** + * If the next stage is a $match, the part of the match that is not dependent on the unwound + * field can be moved into a new, preceding, $match stage. + */ + Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; + /** * Creates a new $unwind DocumentSource from a BSON specification. */ diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index 5f5ebebbb1c..27fa4351f61 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -31,16 +31,19 @@ #include #include "mongo/db/jsobj.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/extensions_callback_noop.h" -#include "mongo/db/matcher/matcher.h" #include "mongo/db/pipeline/document.h" #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/expression.h" #include "mongo/util/stringutils.h" +#include "mongo/stdx/memory.h" namespace mongo { using boost::intrusive_ptr; +using std::pair; +using std::unique_ptr; using std::string; using std::vector; @@ -150,6 +153,13 @@ Document redactSafePortionDollarOps(BSONObj expr) { if (field.fieldName()[0] != '$') continue; + if (field.fieldNameStringData() == "$eq") { + if (isTypeRedactSafeInComparison(field.type())) { + output[field.fieldNameStringData()] = Value(field); + } + continue; + } + switch (BSONObj::MatchType(field.getGtLtOp(BSONObj::Equality))) { // These are always ok case BSONObj::opTYPE: @@ -304,7 +314,6 @@ BSONObj DocumentSourceMatch::redactSafePortion() const { void DocumentSourceMatch::setSource(DocumentSource* source) { uassert(17313, "$match with $text is only allowed as the first pipeline stage", !_isTextQuery); - DocumentSource::setSource(source); } @@ -320,6 +329,43 @@ bool DocumentSourceMatch::isTextQuery(const BSONObj& query) { return false; } +pair, intrusive_ptr> +DocumentSourceMatch::splitSourceBy(const std::set& fields) { + pair, unique_ptr> newExpr( + expression::splitMatchExpressionBy(std::move(_expression), fields)); + + invariant(newExpr.first || newExpr.second); + + if (!newExpr.first) { + // The entire $match dependends on 'fields'. + _expression = std::move(newExpr.second); + return {nullptr, this}; + } else if (!newExpr.second) { + // This $match is entirely independent of 'fields'. + _expression = std::move(newExpr.first); + return {this, nullptr}; + } + + // A MatchExpression requires that it is outlived by the BSONObj it is parsed from. Since the + // original BSONObj this $match was created from is no longer equivalent to either of the + // MatchExpressions we return, we instead take each of these expressions, serialize them, and + // then re-parse them, constructing new BSON that is owned by the DocumentSourceMatch. + + // Build an expression for a new $match stage. + BSONObjBuilder firstBob; + newExpr.first->serialize(&firstBob); + + intrusive_ptr firstMatch(new DocumentSourceMatch(firstBob.obj(), pExpCtx)); + + // This $match stage is still needed, so update the MatchExpression as needed. + BSONObjBuilder secondBob; + newExpr.second->serialize(&secondBob); + + intrusive_ptr secondMatch(new DocumentSourceMatch(secondBob.obj(), pExpCtx)); + + return {firstMatch, secondMatch}; +} + static void uassertNoDisallowedClauses(BSONObj query) { BSONForEach(e, query) { // can't use the MatchExpression API because this would segfault the constructor @@ -391,4 +437,4 @@ DocumentSourceMatch::DocumentSourceMatch(const BSONObj& query, _expression = std::move(status.getValue()); } -} +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp index f950eb8328f..e33916f909b 100644 --- a/src/mongo/db/pipeline/document_source_test.cpp +++ b/src/mongo/db/pipeline/document_source_test.cpp @@ -29,6 +29,7 @@ #include "mongo/platform/basic.h" #include "mongo/base/init.h" +#include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/db/operation_context_noop.h" #include "mongo/db/pipeline/dependencies.h" #include "mongo/db/pipeline/document_source.h" @@ -2969,6 +2970,8 @@ public: namespace DocumentSourceMatch { using mongo::DocumentSourceMatch; +using std::unique_ptr; + // Helpers to make a DocumentSourceMatch from a query object or json string intrusive_ptr makeMatch(const BSONObj& query) { intrusive_ptr uncasted = diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp index a03d3fe7a77..506627cc4dd 100644 --- a/src/mongo/db/pipeline/document_source_unwind.cpp +++ b/src/mongo/db/pipeline/document_source_unwind.cpp @@ -225,6 +225,41 @@ BSONObjSet DocumentSourceUnwind::getOutputSorts() { return out; } +Pipeline::SourceContainer::iterator DocumentSourceUnwind::optimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + invariant(*itr == this); + + if (auto nextMatch = dynamic_cast((*std::next(itr)).get())) { + const bool includeDollarPrefix = false; + std::set field = {_unwindPath.getPath(includeDollarPrefix)}; + auto splitMatch = nextMatch->splitSourceBy(field); + + invariant(splitMatch.first || splitMatch.second); + + if (!splitMatch.first && splitMatch.second) { + // No optimization was possible. + return std::next(itr); + } + + container->erase(std::next(itr)); + + // If splitMatch.second is not null, then there is a new $match stage to insert after + // ourselves. + if (splitMatch.second) { + container->insert(std::next(itr), std::move(splitMatch.second)); + } + + if (splitMatch.first) { + container->insert(itr, std::move(splitMatch.first)); + if (std::prev(itr) == container->begin()) { + return std::prev(itr); + } + return std::prev(std::prev(itr)); + } + } + return std::next(itr); +} + Value DocumentSourceUnwind::serialize(bool explain) const { return Value(DOC(getSourceName() << DOC( "path" << _unwindPath.getPath(true) << "preserveNullAndEmptyArrays" diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index e0162c9eaeb..e5afb99fb86 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -268,6 +268,89 @@ class MatchShouldDuplicateItselfBeforeRedact : public Base { } }; +class MatchShouldSwapWithUnwind : public Base { + string inputPipeJson() { + return "[{$unwind: '$a.b.c'}, " + "{$match: {'b': 1}}]"; + } + string outputPipeJson() { + return "[{$match: {'b': 1}}, " + "{$unwind: {path: '$a.b.c'}}]"; + } +}; + +class MatchOnPrefixShouldNotSwapOnUnwind : public Base { + string inputPipeJson() { + return "[{$unwind: {path: '$a.b.c'}}, " + "{$match: {'a.b': 1}}]"; + } + string outputPipeJson() { + return "[{$unwind: {path: '$a.b.c'}}, " + "{$match: {'a.b': 1}}]"; + } +}; + +class MatchShouldSplitOnUnwind : public Base { + string inputPipeJson() { + return "[{$unwind: '$a.b'}, " + "{$match: {$and: [{f: {$eq: 5}}, " + " {$nor: [{'a.d': 1, c: 5}, {'a.b': 3, c: 5}]}]}}]"; + } + string outputPipeJson() { + return "[{$match: {$and: [{f: {$eq: 5}}," + " {$nor: [{$and: [{'a.d': {$eq: 1}}, {c: {$eq: 5}}]}]}]}}," + "{$unwind: {path: '$a.b'}}, " + "{$match: {$nor: [{$and: [{'a.b': {$eq: 3}}, {c: {$eq: 5}}]}]}}]"; + } +}; + +class MatchShouldNotOptimizeWithElemMatch : public Base { + string inputPipeJson() { + return "[{$unwind: {path: '$a.b'}}, " + "{$match: {a: {$elemMatch: {b: {d: 1}}}}}]"; + } + string outputPipeJson() { + return "[{$unwind: {path: '$a.b'}}, " + "{$match: {a: {$elemMatch: {b: {d: 1}}}}}]"; + } +}; + +class MatchWithNorOnlySplitsIndependentChildren : public Base { + string inputPipeJson() { + return "[{$unwind: {path: '$a'}}, " + "{$match: {$nor: [{$and: [{a: {$eq: 1}}, {b: {$eq: 1}}]}, {b: {$eq: 2}} ]}}]"; + } + string outputPipeJson() { + return "[{$match: {$nor: [{b: {$eq: 2}}]}}, " + "{$unwind: {path: '$a'}}, " + "{$match: {$nor: [{$and: [{a: {$eq: 1}}, {b: {$eq: 1}}]}]}}]"; + } +}; + +class MatchWithOrDoesNotSplit : public Base { + string inputPipeJson() { + return "[{$unwind: {path: '$a'}}, " + "{$match: {$or: [{a: {$eq: 'dependent'}}, {b: {$eq: 'independent'}}]}}]"; + } + string outputPipeJson() { + return "[{$unwind: {path: '$a'}}, " + "{$match: {$or: [{a: {$eq: 'dependent'}}, {b: {$eq: 'independent'}}]}}]"; + } +}; + +class UnwindBeforeDoubleMatchShouldRepeatedlyOptimize : public Base { + string inputPipeJson() { + return "[{$unwind: '$a'}, " + "{$match: {b: {$gt: 0}}}, " + "{$match: {a: 1, c: 1}}]"; + } + string outputPipeJson() { + return "[{$match: {$and: [{b: {$gt: 0}}, {c: {$eq: 1}}]}}," + "{$unwind: {path: '$a'}}, " + "{$match: {a: {$eq: 1}}}]"; + } +}; + } // namespace Local namespace Sharded { @@ -674,6 +757,13 @@ public: add(); add(); add(); + add(); + add(); + add(); + add(); + add(); + add(); + add(); add(); add(); add