diff options
author | David Storch <david.storch@10gen.com> | 2017-04-12 14:53:25 -0400 |
---|---|---|
committer | David Storch <david.storch@10gen.com> | 2017-04-18 18:11:15 -0400 |
commit | b9f9195390142f4e1363dc83b986ead5dc8993b8 (patch) | |
tree | d57f4c4643a64d3e64720a90560cc511dd8b5467 | |
parent | 071d0398b7e527fc5b8c1ffdd3474c91f34616f0 (diff) | |
download | mongo-b9f9195390142f4e1363dc83b986ead5dc8993b8.tar.gz |
SERVER-27115 extend $match swapping optimization to handle renamed fields
If a field renamed by $project or $addFields is used in a
subsequent $match, we can now swap the $match and update it
to use the original (or, "renamed from") field name. This
allows $match planning to result in better index usage in
some cases.
19 files changed, 523 insertions, 108 deletions
diff --git a/jstests/aggregation/match_swapping_renamed_fields.js b/jstests/aggregation/match_swapping_renamed_fields.js new file mode 100644 index 00000000000..f14ba7b6d16 --- /dev/null +++ b/jstests/aggregation/match_swapping_renamed_fields.js @@ -0,0 +1,47 @@ +/** + * Tests for the $match swapping optimization's ability to handle fields renamed in $project or + * $addFields. + * @tags: [do_not_wrap_aggregations_in_facets] + */ +(function() { + "use strict"; + + load("jstests/libs/analyze_plan.js"); + + let coll = db.match_swapping_renamed_fields; + coll.drop(); + + assert.writeOK(coll.insert([{a: 1, b: 1, c: 1}, {a: 2, b: 2, c: 2}, {a: 3, b: 3, c: 3}])); + assert.commandWorked(coll.createIndex({a: 1})); + + // Test that a $match can result in index usage after moving past a field renamed by $project. + let pipeline = [{$project: {_id: 0, z: "$a", c: 1}}, {$match: {z: {$gt: 1}}}]; + assert.eq(2, coll.aggregate(pipeline).itcount()); + let explain = coll.explain().aggregate(pipeline); + assert.neq(null, getAggPlanStage(explain, "IXSCAN")); + + // Test that a $match can result in index usage after moving past a field renamed by $addFields. + pipeline = [{$addFields: {z: "$a"}}, {$match: {z: {$gt: 1}}}]; + assert.eq(2, coll.aggregate(pipeline).itcount()); + explain = coll.explain().aggregate(pipeline); + assert.neq(null, getAggPlanStage(explain, "IXSCAN")); + + // Test that a partially dependent match can split, with a rename applied, resulting in index + // usage. + pipeline = + [{$project: {z: "$a", zz: {$sum: ["$a", "$b"]}}}, {$match: {z: {$gt: 1}, zz: {$lt: 5}}}]; + assert.eq(1, coll.aggregate(pipeline).itcount()); + explain = coll.explain().aggregate(pipeline); + assert.neq(null, getAggPlanStage(explain, "IXSCAN")); + + // Test that a match can swap past several renames, resulting in index usage. + pipeline = [ + {$project: {d: "$a"}}, + {$addFields: {e: "$$CURRENT.d"}}, + {$project: {f: "$$ROOT.e"}}, + {$match: {f: {$gt: 1}}} + ]; + assert.eq(2, coll.aggregate(pipeline).itcount()); + explain = coll.explain().aggregate(pipeline); + assert.neq(null, getAggPlanStage(explain, "IXSCAN")); +}()); diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index d918ead80f9..693ef9f55e9 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -223,15 +223,6 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) { } /** - * Returns whether the leaf is a $elemMatch expression. - */ -bool isElemMatch(const MatchExpression& expr) { - return expr.matchType() == MatchExpression::ELEM_MATCH_OBJECT || - expr.matchType() == MatchExpression::ELEM_MATCH_VALUE; -} - - -/** * Returns whether the leaf at 'path' is independent of 'fields'. */ bool isLeafIndependentOf(const StringData& path, const std::set<std::string>& fields) { @@ -283,6 +274,86 @@ unique_ptr<MatchExpression> createNorOfNodes(std::vector<unique_ptr<MatchExpress return std::move(splitNor); } +void applyRenamesToExpression(MatchExpression* expr, const StringMap<std::string>& renames) { + if (expr->isArray() || expr->matchType() == MatchExpression::TYPE_OPERATOR) { + return; + } + + if (expr->isLeaf()) { + auto it = renames.find(expr->path()); + if (it != renames.end()) { + LeafMatchExpression* leafExpr = checked_cast<LeafMatchExpression*>(expr); + leafExpr->setPath(it->second); + } + } + + for (size_t i = 0; i < expr->numChildren(); ++i) { + applyRenamesToExpression(expr->getChild(i), renames); + } +} + +std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> +splitMatchExpressionByWithoutRenames(unique_ptr<MatchExpression> expr, + const std::set<std::string>& fields) { + if (expression::isIndependentOf(*expr, fields)) { + // 'expr' does not depend upon 'fields', so it can be completely moved. + return {std::move(expr), nullptr}; + } + if (!expr->isLogical()) { + // 'expr' is a leaf, and was not independent of 'fields'. + return {nullptr, std::move(expr)}; + } + + std::vector<unique_ptr<MatchExpression>> reliant; + std::vector<unique_ptr<MatchExpression>> separate; + + switch (expr->matchType()) { + case MatchExpression::AND: { + auto andExpr = checked_cast<AndMatchExpression*>(expr.get()); + for (size_t i = 0; i < andExpr->numChildren(); i++) { + auto children = + splitMatchExpressionByWithoutRenames(andExpr->releaseChild(i), fields); + + invariant(children.first || children.second); + + if (children.first) { + separate.push_back(std::move(children.first)); + } + if (children.second) { + reliant.push_back(std::move(children.second)); + } + } + return {createAndOfNodes(&separate), createAndOfNodes(&reliant)}; + } + case MatchExpression::NOR: { + // We can split a $nor because !(x | y) is logically equivalent to !x & !y. + + // However, we cannot split each child individually; instead, we must look for a wholly + // independent child to split off by itself. As an example of why, with 'b' in + // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not + // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and + // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b + // is equal to 1. + auto norExpr = checked_cast<NorMatchExpression*>(expr.get()); + for (size_t i = 0; i < norExpr->numChildren(); i++) { + auto child = norExpr->releaseChild(i); + if (expression::isIndependentOf(*child, fields)) { + separate.push_back(std::move(child)); + } else { + reliant.push_back(std::move(child)); + } + } + return {createNorOfNodes(&separate), createNorOfNodes(&reliant)}; + } + case MatchExpression::OR: + case MatchExpression::NOT: { + // If we aren't independent, we can't safely split. + return {nullptr, std::move(expr)}; + } + default: { MONGO_UNREACHABLE; } + } +} + } // namespace namespace expression { @@ -348,69 +419,29 @@ bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& p return true; } - // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to - // determine if it is independent or not, and instead just return false. - return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet); + // Certain kinds of match expressions are never considered independent. + if (expr.isArray() || expr.matchType() == MatchExpression::TYPE_OPERATOR) { + return false; + } + + return isLeafIndependentOf(expr.path(), pathSet); } std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy( - unique_ptr<MatchExpression> expr, const std::set<std::string>& fields) { - if (isIndependentOf(*expr, fields)) { - // 'expr' does not depend upon 'fields', so it can be completely moved. - return {std::move(expr), nullptr}; - } - if (!expr->isLogical()) { - // 'expr' is a leaf, and was not independent of 'fields'. - return {nullptr, std::move(expr)}; + unique_ptr<MatchExpression> expr, + const std::set<std::string>& fields, + const StringMap<std::string>& renames) { + // TODO SERVER-27115: Currently renames from dotted fields are not supported, but this + // restriction can be relaxed. + for (auto&& rename : renames) { + invariant(rename.second.find('.') == std::string::npos); } - std::vector<unique_ptr<MatchExpression>> reliant; - std::vector<unique_ptr<MatchExpression>> separate; - - switch (expr->matchType()) { - case MatchExpression::AND: { - auto andExpr = checked_cast<AndMatchExpression*>(expr.get()); - for (size_t i = 0; i < andExpr->numChildren(); i++) { - auto children = splitMatchExpressionBy(andExpr->releaseChild(i), fields); - - invariant(children.first || children.second); - - if (children.first) { - separate.push_back(std::move(children.first)); - } - if (children.second) { - reliant.push_back(std::move(children.second)); - } - } - return {createAndOfNodes(&separate), createAndOfNodes(&reliant)}; - } - case MatchExpression::NOR: { - // We can split a $nor because !(x | y) is logically equivalent to !x & !y. - - // However, we cannot split each child individually; instead, we must look for a wholly - // independent child to split off by itself. As an example of why, with 'b' in - // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not - // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and - // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b - // is equal to 1. - auto norExpr = checked_cast<NorMatchExpression*>(expr.get()); - for (size_t i = 0; i < norExpr->numChildren(); i++) { - auto child = norExpr->releaseChild(i); - if (isIndependentOf(*child, fields)) { - separate.push_back(std::move(child)); - } else { - reliant.push_back(std::move(child)); - } - } - return {createNorOfNodes(&separate), createNorOfNodes(&reliant)}; - } - case MatchExpression::OR: - case MatchExpression::NOT: { - // If we aren't independent, we can't safely split. - return {nullptr, std::move(expr)}; - } - default: { MONGO_UNREACHABLE; } + auto splitExpr = splitMatchExpressionByWithoutRenames(std::move(expr), fields); + if (splitExpr.first) { + applyRenamesToExpression(splitExpr.first.get(), renames); } + return splitExpr; } void mapOver(MatchExpression* expr, NodeTraversalFunc func, std::string path) { diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index 1144715c8cf..70092eb644c 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -35,6 +35,7 @@ #include "mongo/base/string_data.h" #include "mongo/stdx/functional.h" +#include "mongo/util/string_map.h" namespace mongo { @@ -114,10 +115,18 @@ void mapOver(MatchExpression* expr, NodeTraversalFunc func, std::string path = " * independent, returns {exprLeft, exprRight}, where each new MatchExpression contains a portion of * 'expr'. * + * Any paths which should be renamed are encoded in 'renames', which maps from path names in 'expr' + * to the new values of those paths. If the return value is {exprLeft, exprRight} or {exprLeft, + * nullptr}, exprLeft will reflect the path renames. For example, suppose the original match + * expression is {old: {$gt: 3}} and 'renames' contains the mapping "old" => "new". The returned + * exprLeft value will be {new: {$gt: 3}}, provided that "old" is not in 'fields'. + * * Never returns {nullptr, nullptr}. */ std::pair<std::unique_ptr<MatchExpression>, std::unique_ptr<MatchExpression>> -splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, const std::set<std::string>& fields); +splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, + const std::set<std::string>& fields, + const StringMap<std::string>& renames); } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index c97d766679a..14678945b4f 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -805,7 +805,7 @@ TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) { ASSERT_OK(status.getStatus()); std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = - expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}, {}); ASSERT_TRUE(splitExpr.first.get()); BSONObjBuilder firstBob; @@ -827,7 +827,7 @@ TEST(SplitMatchExpression, NorWithIndependentChildrenIsSplittable) { ASSERT_OK(status.getStatus()); std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = - expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}, {}); ASSERT_TRUE(splitExpr.first.get()); BSONObjBuilder firstBob; @@ -849,7 +849,7 @@ TEST(SplitMatchExpression, NotWithIndependentChildIsSplittable) { ASSERT_OK(status.getStatus()); std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = - expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"}); + expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"}, {}); ASSERT_TRUE(splitExpr.first.get()); BSONObjBuilder firstBob; @@ -867,7 +867,7 @@ TEST(SplitMatchExpression, OrWithOnlyIndependentChildrenIsNotSplittable) { ASSERT_OK(status.getStatus()); std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = - expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}, {}); ASSERT_TRUE(splitExpr.second.get()); BSONObjBuilder bob; @@ -888,7 +888,7 @@ TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) { ASSERT_OK(status.getStatus()); std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = - expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"}); + expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"}, {}); ASSERT_TRUE(splitExpr.first.get()); BSONObjBuilder firstBob; @@ -914,7 +914,7 @@ TEST(SplitMatchExpression, ShouldNotExtractPrefixOfDottedPathAsIndependent) { ASSERT_OK(status.getStatus()); std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = - expression::splitMatchExpressionBy(std::move(status.getValue()), {"a.b"}); + expression::splitMatchExpressionBy(std::move(status.getValue()), {"a.b"}, {}); ASSERT_TRUE(splitExpr.first.get()); BSONObjBuilder firstBob; @@ -928,6 +928,178 @@ TEST(SplitMatchExpression, ShouldNotExtractPrefixOfDottedPathAsIndependent) { ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{$and: [{a: {$eq: 1}}, {'a.b': {$eq: 1}}]}")); } +TEST(SplitMatchExpression, ShouldMoveIndependentLeafPredicateAcrossRename) { + BSONObj matchPredicate = fromjson("{a: 1}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"a", "b"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{b: {$eq: 1}}")); + + ASSERT_FALSE(splitExpr.second.get()); +} + +TEST(SplitMatchExpression, ShouldMoveIndependentAndPredicateAcrossRename) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 2}]}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"a", "c"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{$and: [{c: {$eq: 1}}, {b: {$eq: 2}}]}")); + + ASSERT_FALSE(splitExpr.second.get()); +} + +TEST(SplitMatchExpression, ShouldSplitPartiallyDependentAndPredicateAcrossRename) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 2}]}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"a", "c"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {"b"}, renames); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{c: {$eq: 1}}")); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{b: {$eq: 2}}")); +} + +TEST(SplitMatchExpression, ShouldSplitPartiallyDependentComplexPredicateMultipleRenames) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {$or: [{b: 2}, {c: 3}]}]}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"b", "d"}, {"c", "e"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {"a"}, renames); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{$or: [{d: {$eq: 2}}, {e: {$eq: 3}}]}")); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$eq: 1}}")); +} + +TEST(SplitMatchExpression, + ShouldSplitPartiallyDependentComplexPredicateMultipleRenamesDottedPaths) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {$or: [{'d.e.f': 2}, {'e.f.g': 3}]}]}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"d.e.f", "x"}, {"e.f.g", "y"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {"a"}, renames); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{$or: [{x: {$eq: 2}}, {y: {$eq: 3}}]}")); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$eq: 1}}")); +} + +TEST(SplitMatchExpression, ShouldNotMoveElemMatchObjectAcrossRename) { + BSONObj matchPredicate = fromjson("{a: {$elemMatch: {b: 3}}}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"a", "c"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames); + + ASSERT_FALSE(splitExpr.first.get()); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$elemMatch: {b: {$eq: 3}}}}")); +} + +TEST(SplitMatchExpression, ShouldNotMoveElemMatchValueAcrossRename) { + BSONObj matchPredicate = fromjson("{a: {$elemMatch: {$eq: 3}}}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"a", "c"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames); + + ASSERT_FALSE(splitExpr.first.get()); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$elemMatch: {$eq: 3}}}")); +} + +TEST(SplitMatchExpression, ShouldNotMoveTypeAcrossRename) { + BSONObj matchPredicate = fromjson("{a: {$type: 16}}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"a", "c"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames); + + ASSERT_FALSE(splitExpr.first.get()); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$type: 16}}")); +} + +TEST(SplitMatchExpression, ShouldNotMoveSizeAcrossRename) { + BSONObj matchPredicate = fromjson("{a: {$size: 3}}"); + const CollatorInterface* collator = nullptr; + auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator); + ASSERT_OK(matcher.getStatus()); + + StringMap<std::string> renames{{"a", "c"}}; + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames); + + ASSERT_FALSE(splitExpr.first.get()); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$size: 3}}")); +} + TEST(MapOverMatchExpression, DoesMapOverLogicalNodes) { BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}"); const CollatorInterface* collator = nullptr; diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index 23e98350197..9651c7cb8e8 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -147,10 +147,15 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc case DocumentSource::GetModPathsReturn::Type::kAllExcept: { DepsTracker depsTracker; match->getDependencies(&depsTracker); - modifiedPaths = extractModifiedDependencies(depsTracker.fields, modifiedPathsRet.paths); + + auto preservedPaths = modifiedPathsRet.paths; + for (auto&& rename : modifiedPathsRet.renames) { + preservedPaths.insert(rename.first); + } + modifiedPaths = extractModifiedDependencies(depsTracker.fields, preservedPaths); } } - return match->splitSourceBy(modifiedPaths); + return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames); } } // namespace diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h index 2b014c37a61..7b724b555cd 100644 --- a/src/mongo/db/pipeline/document_source.h +++ b/src/mongo/db/pipeline/document_source.h @@ -366,11 +366,25 @@ public: kAllExcept, }; - GetModPathsReturn(Type type, std::set<std::string>&& paths) - : type(type), paths(std::move(paths)) {} + GetModPathsReturn(Type type, + std::set<std::string>&& paths, + StringMap<std::string>&& renames) + : type(type), paths(std::move(paths)), renames(std::move(renames)) {} Type type; std::set<std::string> paths; + + // Stages may fill out 'renames' to contain information about path renames. Each entry in + // 'renames' maps from the new name of the path (valid in documents flowing *out* of this + // stage) to the old name of the path (valid in documents flowing *into* this stage). + // + // For example, consider the stage + // + // {$project: {_id: 0, a: 1, b: "$c"}} + // + // This stage should return kAllExcept, since it modifies all paths other than "a". It can + // also fill out 'renames' with the mapping "b" => "c". + StringMap<std::string> renames; }; /** @@ -381,7 +395,7 @@ public: * See GetModPathsReturn above for the possible return values and what they mean. */ virtual GetModPathsReturn getModifiedPaths() const { - return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}}; + return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}, {}}; } /** diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp index 717b82d59aa..7cc78ba8e95 100644 --- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp @@ -351,7 +351,7 @@ DocumentSource::GetModPathsReturn DocumentSourceGraphLookUp::getModifiedPaths() modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(), pathsModifiedByUnwind.paths.end()); } - return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}}; } Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt( diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp index b6cf88172c3..4c68db3f1da 100644 --- a/src/mongo/db/pipeline/document_source_lookup.cpp +++ b/src/mongo/db/pipeline/document_source_lookup.cpp @@ -174,7 +174,7 @@ DocumentSource::GetModPathsReturn DocumentSourceLookUp::getModifiedPaths() const modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(), pathsModifiedByUnwind.paths.end()); } - return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}}; } Pipeline::SourceContainer::iterator DocumentSourceLookUp::doOptimizeAt( diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index 0f611b04423..1b7d19f2d2f 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -371,37 +371,49 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other } pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>> -DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) { +DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields, + const StringMap<std::string>& renames) { pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr( - expression::splitMatchExpressionBy(std::move(_expression), fields)); + expression::splitMatchExpressionBy(std::move(_expression), fields, renames)); invariant(newExpr.first || newExpr.second); if (!newExpr.first) { - // The entire $match depends on 'fields'. + // The entire $match depends on 'fields'. It cannot be split or moved, so we return this + // stage without modification as the second stage in the pair. _expression = std::move(newExpr.second); return {nullptr, this}; - } else if (!newExpr.second) { - // This $match is entirely independent of 'fields'. + } + + if (!newExpr.second && renames.empty()) { + // This $match is entirely independent of 'fields' and there were no renames to apply. In + // this case, the current stage can swap with its predecessor without modification. We + // simply return this as the first stage in the pair. _expression = std::move(newExpr.first); return {this, nullptr}; } - // A MatchExpression requires that it is outlived by the BSONObj it is parsed from. Since the - // original BSONObj this $match was created from is no longer equivalent to either of the - // MatchExpressions we return, we instead take each of these expressions, serialize them, and - // then re-parse them, constructing new BSON that is owned by the DocumentSourceMatch. - - // Build an expression for a new $match stage. + // If we're here, then either: + // - this stage has split into two, or + // - this stage can swap with its predecessor, but potentially had renames applied. + // + // In any of these cases, we have created new expression(s). A MatchExpression requires that it + // is outlived by the BSONObj it is parsed from. But since the MatchExpressions were modified, + // the corresponding BSONObj may not exist. Therefore, we take each of these expressions, + // serialize them, and then re-parse them, constructing new BSON that is owned by the + // DocumentSourceMatch. BSONObjBuilder firstBob; newExpr.first->serialize(&firstBob); + auto firstMatch = DocumentSourceMatch::create(firstBob.obj(), pExpCtx); - // This $match stage is still needed, so update the MatchExpression as needed. - BSONObjBuilder secondBob; - newExpr.second->serialize(&secondBob); + intrusive_ptr<DocumentSourceMatch> secondMatch; + if (newExpr.second) { + BSONObjBuilder secondBob; + newExpr.second->serialize(&secondBob); + secondMatch = DocumentSourceMatch::create(secondBob.obj(), pExpCtx); + } - return {DocumentSourceMatch::create(firstBob.obj(), pExpCtx), - DocumentSourceMatch::create(secondBob.obj(), pExpCtx)}; + return {std::move(firstMatch), std::move(secondMatch)}; } boost::intrusive_ptr<DocumentSourceMatch> DocumentSourceMatch::descendMatchOnPath( diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h index 4abd4862992..b6c41cd5161 100644 --- a/src/mongo/db/pipeline/document_source_match.h +++ b/src/mongo/db/pipeline/document_source_match.h @@ -115,9 +115,14 @@ public: * * For example, {$match: {a: "foo", "b.c": 4}} split by "b" will return pointers to two stages: * {$match: {a: "foo"}}, and {$match: {"b.c": 4}}. + * + * The 'renames' structure maps from a field to an alias that should be used in the independent + * portion of the match. For example, suppose that we split by fields "a" with the rename "b" => + * "c". The match {$match: {a: "foo", b: "bar", z: "baz"}} will split into {$match: {c: "bar", + * z: "baz"}} and {$match: {a: "foo"}}. */ std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> - splitSourceBy(const std::set<std::string>& fields); + splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames); /** * Given a document 'input', extract 'fields' and produce a BSONObj with those values. diff --git a/src/mongo/db/pipeline/document_source_replace_root.cpp b/src/mongo/db/pipeline/document_source_replace_root.cpp index 521cf748784..8f6782dc47e 100644 --- a/src/mongo/db/pipeline/document_source_replace_root.cpp +++ b/src/mongo/db/pipeline/document_source_replace_root.cpp @@ -89,7 +89,7 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { // Replaces the entire root, so all paths are modified. - return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}}; + return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}, {}}; } // Create the replaceRoot transformer. Uasserts on invalid input. diff --git a/src/mongo/db/pipeline/document_source_sort.h b/src/mongo/db/pipeline/document_source_sort.h index 83af9a4bcb6..c3b1609b641 100644 --- a/src/mongo/db/pipeline/document_source_sort.h +++ b/src/mongo/db/pipeline/document_source_sort.h @@ -49,7 +49,7 @@ public: GetModPathsReturn getModifiedPaths() const final { // A $sort does not modify any paths. - return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}}; + return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {}}; } bool canSwapWithMatch() const final { diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp index 079439bd842..1c172cb17be 100644 --- a/src/mongo/db/pipeline/document_source_unwind.cpp +++ b/src/mongo/db/pipeline/document_source_unwind.cpp @@ -236,7 +236,7 @@ DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const if (_indexPath) { modifiedFields.insert(_indexPath->fullPath()); } - return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields)}; + return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields), {}}; } Value DocumentSourceUnwind::serialize(boost::optional<ExplainOptions::Verbosity> explain) const { diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h index a9eb4835a62..62bf6904fc5 100644 --- a/src/mongo/db/pipeline/expression.h +++ b/src/mongo/db/pipeline/expression.h @@ -769,6 +769,10 @@ public: return _fieldPath; } + Variables::Id getVariableId() const { + return _variable; + } + private: ExpressionFieldPath(const boost::intrusive_ptr<ExpressionContext>& expCtx, const std::string& fieldPath, diff --git a/src/mongo/db/pipeline/parsed_add_fields.h b/src/mongo/db/pipeline/parsed_add_fields.h index 8cc3d24f913..f4c1b535f63 100644 --- a/src/mongo/db/pipeline/parsed_add_fields.h +++ b/src/mongo/db/pipeline/parsed_add_fields.h @@ -95,8 +95,11 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { std::set<std::string> computedPaths; - _root->addComputedPaths(&computedPaths); - return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(computedPaths)}; + StringMap<std::string> renamedPaths; + _root->addComputedPaths(&computedPaths, &renamedPaths); + return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, + std::move(computedPaths), + std::move(renamedPaths)}; } /** diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.h b/src/mongo/db/pipeline/parsed_exclusion_projection.h index 74314d97439..8f64ac5258e 100644 --- a/src/mongo/db/pipeline/parsed_exclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_exclusion_projection.h @@ -124,7 +124,7 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { std::set<std::string> modifiedPaths; _root->addModifiedPaths(&modifiedPaths); - return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)}; + return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}}; } private: diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp index da6b9e72ce9..a2c80238570 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp @@ -239,12 +239,33 @@ void InclusionNode::addPreservedPaths(std::set<std::string>* preservedPaths) con } } -void InclusionNode::addComputedPaths(std::set<std::string>* computedPaths) const { +void InclusionNode::addComputedPaths(std::set<std::string>* computedPaths, + StringMap<std::string>* renamedPaths) const { for (auto&& computedPair : _expressions) { + auto exprFieldPath = dynamic_cast<ExpressionFieldPath*>(computedPair.second.get()); + if (exprFieldPath) { + const auto& fieldPath = exprFieldPath->getFieldPath(); + // Make sure that the first path component is CURRENT/ROOT. If this is not explicitly + // provided by the user, we expect the system to have filled it in as the first path + // component. + // + // TODO SERVER-27115: Support field paths that have multiple components. + if (exprFieldPath->getVariableId() == Variables::kRootId && + fieldPath.getPathLength() == 2u) { + // Found a renamed path. Record it and continue, since we don't want to also put + // renamed paths inside the 'computedPaths' set. + std::string oldPath = fieldPath.tail().fullPath(); + std::string newPath = + FieldPath::getFullyQualifiedPath(_pathToNode, computedPair.first); + (*renamedPaths)[std::move(newPath)] = std::move(oldPath); + continue; + } + } + computedPaths->insert(FieldPath::getFullyQualifiedPath(_pathToNode, computedPair.first)); } for (auto&& childPair : _children) { - childPair.second->addComputedPaths(computedPaths); + childPair.second->addComputedPaths(computedPaths, renamedPaths); } } diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h index 1c0a667b025..eb81015f8a4 100644 --- a/src/mongo/db/pipeline/parsed_inclusion_projection.h +++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h @@ -126,9 +126,15 @@ public: void addPreservedPaths(std::set<std::string>* preservedPaths) const; /** - * Recursively adds all paths that are purely computed in this inclusion projection. + * Recursively adds all paths that are purely computed in this inclusion projection to + * 'computedPaths'. + * + * Computed paths that are identified as the result of a simple rename are instead filled out in + * 'renamedPaths'. Each entry in 'renamedPaths' maps from the path's new name to its old name + * prior to application of this inclusion projection. */ - void addComputedPaths(std::set<std::string>* computedPaths) const; + void addComputedPaths(std::set<std::string>* computedPaths, + StringMap<std::string>* renamedPaths) const; private: // Helpers for the Document versions above. These will apply the transformation recursively to @@ -222,7 +228,14 @@ public: DocumentSource::GetModPathsReturn getModifiedPaths() const final { std::set<std::string> preservedPaths; _root->addPreservedPaths(&preservedPaths); - return {DocumentSource::GetModPathsReturn::Type::kAllExcept, std::move(preservedPaths)}; + + std::set<std::string> computedPaths; + StringMap<std::string> renamedPaths; + _root->addComputedPaths(&computedPaths, &renamedPaths); + + return {DocumentSource::GetModPathsReturn::Type::kAllExcept, + std::move(preservedPaths), + std::move(renamedPaths)}; } /** diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index 380a5824fd6..68dfeb35bad 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -758,6 +758,85 @@ TEST(PipelineOptimizationTest, MatchShouldNotSwapBeforeSkip) { assertPipelineOptimizesTo(pipeline, pipeline); } +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossProjectRename) { + string inputPipe = "[{$project: {_id: true, a: '$b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$project: {_id: true, a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossAddFieldsRename) { + string inputPipe = "[{$addFields: {a: '$b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$addFields: {a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossProjectRenameWithExplicitROOT) { + string inputPipe = "[{$project: {_id: true, a: '$$ROOT.b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$project: {_id: true, a: '$$ROOT.b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchShouldMoveAcrossAddFieldsRenameWithExplicitCURRENT) { + string inputPipe = "[{$addFields: {a: '$$CURRENT.b'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$match: {b: {$eq: 1}}}, {$addFields: {a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, PartiallyDependentMatchWithRenameShouldSplitAcrossAddFields) { + string inputPipe = + "[{$addFields: {'a.b': '$c', d: {$add: ['$e', '$f']}}}," + "{$match: {$and: [{$or: [{'a.b': 1}, {x: 2}]}, {d: 3}]}}]"; + string outputPipe = + "[{$match: {$or: [{c: {$eq: 1}}, {x: {$eq: 2}}]}}," + "{$addFields: {a: {b: '$c'}, d: {$add: ['$e', '$f']}}}," + "{$match: {d: {$eq: 3}}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, NorCanSplitAcrossProjectWithRename) { + string inputPipe = + "[{$project: {_id: false, x: true, y: '$z'}}," + "{$match: {$nor: [{w: {$eq: 1}}, {y: {$eq: 1}}]}}]"; + string outputPipe = + "[{$match: {$nor: [{z: {$eq: 1}}]}}," + "{$project: {_id: false, x: true, y: '$z'}}," + "{$match: {$nor: [{w: {$eq: 1}}]}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, MatchCanMoveAcrossSeveralRenames) { + string inputPipe = + "[{$project: {_id: false, c: '$d'}}," + "{$addFields: {b: '$c'}}," + "{$project: {a: '$b', z: 1}}," + "{$match: {a: 1, z: 2}}]"; + string outputPipe = + "[{$match: {d: {$eq: 1}}}," + "{$project: {_id: false, c: '$d'}}," + "{$match: {z: {$eq: 2}}}," + "{$addFields: {b: '$c'}}," + "{$project: {_id: true, z: true, a: '$b'}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + +TEST(PipelineOptimizationTest, RenameShouldNotBeAppliedToDependentMatch) { + string pipeline = + "[{$project: {_id: false, x: {$add: ['$foo', '$bar']}, y: '$z'}}," + "{$match: {$or: [{x: {$eq: 1}}, {y: {$eq: 1}}]}}]"; + assertPipelineOptimizesTo(pipeline, pipeline); +} + +TEST(PipelineOptimizationTest, MatchCannotMoveAcrossAddFieldsRenameOfDottedPath) { + string pipeline = "[{$addFields: {a: '$b.c'}}, {$match: {a: {$eq: 1}}}]"; + assertPipelineOptimizesTo(pipeline, pipeline); +} + +TEST(PipelineOptimizationTest, MatchCannotMoveAcrossProjectRenameOfDottedPath) { + string inputPipe = "[{$project: {_id: false, a: '$$CURRENT.b.c'}}, {$match: {a: {$eq: 1}}}]"; + string outputPipe = "[{$project: {_id: false, a: '$b.c'}}, {$match: {a: {$eq: 1}}}]"; + assertPipelineOptimizesTo(inputPipe, outputPipe); +} + } // namespace Local namespace Sharded { |