summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorDavid Storch <david.storch@10gen.com>2017-04-12 14:53:25 -0400
committerDavid Storch <david.storch@10gen.com>2017-04-18 18:11:15 -0400
commitb9f9195390142f4e1363dc83b986ead5dc8993b8 (patch)
treed57f4c4643a64d3e64720a90560cc511dd8b5467 /src/mongo
parent071d0398b7e527fc5b8c1ffdd3474c91f34616f0 (diff)
downloadmongo-b9f9195390142f4e1363dc83b986ead5dc8993b8.tar.gz
SERVER-27115 extend $match swapping optimization to handle renamed fields
If a field renamed by $project or $addFields is used in a subsequent $match, we can now swap the $match and update it to use the original (or, "renamed from") field name. This allows $match planning to result in better index usage in some cases.
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp163
-rw-r--r--src/mongo/db/matcher/expression_algo.h11
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp184
-rw-r--r--src/mongo/db/pipeline/document_source.cpp9
-rw-r--r--src/mongo/db/pipeline/document_source.h20
-rw-r--r--src/mongo/db/pipeline/document_source_graph_lookup.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_lookup.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_match.cpp44
-rw-r--r--src/mongo/db/pipeline/document_source_match.h7
-rw-r--r--src/mongo/db/pipeline/document_source_replace_root.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_sort.h2
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.cpp2
-rw-r--r--src/mongo/db/pipeline/expression.h4
-rw-r--r--src/mongo/db/pipeline/parsed_add_fields.h7
-rw-r--r--src/mongo/db/pipeline/parsed_exclusion_projection.h2
-rw-r--r--src/mongo/db/pipeline/parsed_inclusion_projection.cpp25
-rw-r--r--src/mongo/db/pipeline/parsed_inclusion_projection.h19
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp79
18 files changed, 476 insertions, 108 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index d918ead80f9..693ef9f55e9 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -223,15 +223,6 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) {
}
/**
- * Returns whether the leaf is a $elemMatch expression.
- */
-bool isElemMatch(const MatchExpression& expr) {
- return expr.matchType() == MatchExpression::ELEM_MATCH_OBJECT ||
- expr.matchType() == MatchExpression::ELEM_MATCH_VALUE;
-}
-
-
-/**
* Returns whether the leaf at 'path' is independent of 'fields'.
*/
bool isLeafIndependentOf(const StringData& path, const std::set<std::string>& fields) {
@@ -283,6 +274,86 @@ unique_ptr<MatchExpression> createNorOfNodes(std::vector<unique_ptr<MatchExpress
return std::move(splitNor);
}
+void applyRenamesToExpression(MatchExpression* expr, const StringMap<std::string>& renames) {
+ if (expr->isArray() || expr->matchType() == MatchExpression::TYPE_OPERATOR) {
+ return;
+ }
+
+ if (expr->isLeaf()) {
+ auto it = renames.find(expr->path());
+ if (it != renames.end()) {
+ LeafMatchExpression* leafExpr = checked_cast<LeafMatchExpression*>(expr);
+ leafExpr->setPath(it->second);
+ }
+ }
+
+ for (size_t i = 0; i < expr->numChildren(); ++i) {
+ applyRenamesToExpression(expr->getChild(i), renames);
+ }
+}
+
+std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>>
+splitMatchExpressionByWithoutRenames(unique_ptr<MatchExpression> expr,
+ const std::set<std::string>& fields) {
+ if (expression::isIndependentOf(*expr, fields)) {
+ // 'expr' does not depend upon 'fields', so it can be completely moved.
+ return {std::move(expr), nullptr};
+ }
+ if (!expr->isLogical()) {
+ // 'expr' is a leaf, and was not independent of 'fields'.
+ return {nullptr, std::move(expr)};
+ }
+
+ std::vector<unique_ptr<MatchExpression>> reliant;
+ std::vector<unique_ptr<MatchExpression>> separate;
+
+ switch (expr->matchType()) {
+ case MatchExpression::AND: {
+ auto andExpr = checked_cast<AndMatchExpression*>(expr.get());
+ for (size_t i = 0; i < andExpr->numChildren(); i++) {
+ auto children =
+ splitMatchExpressionByWithoutRenames(andExpr->releaseChild(i), fields);
+
+ invariant(children.first || children.second);
+
+ if (children.first) {
+ separate.push_back(std::move(children.first));
+ }
+ if (children.second) {
+ reliant.push_back(std::move(children.second));
+ }
+ }
+ return {createAndOfNodes(&separate), createAndOfNodes(&reliant)};
+ }
+ case MatchExpression::NOR: {
+ // We can split a $nor because !(x | y) is logically equivalent to !x & !y.
+
+ // However, we cannot split each child individually; instead, we must look for a wholly
+ // independent child to split off by itself. As an example of why, with 'b' in
+ // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not
+ // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and
+ // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b
+ // is equal to 1.
+ auto norExpr = checked_cast<NorMatchExpression*>(expr.get());
+ for (size_t i = 0; i < norExpr->numChildren(); i++) {
+ auto child = norExpr->releaseChild(i);
+ if (expression::isIndependentOf(*child, fields)) {
+ separate.push_back(std::move(child));
+ } else {
+ reliant.push_back(std::move(child));
+ }
+ }
+ return {createNorOfNodes(&separate), createNorOfNodes(&reliant)};
+ }
+ case MatchExpression::OR:
+ case MatchExpression::NOT: {
+ // If we aren't independent, we can't safely split.
+ return {nullptr, std::move(expr)};
+ }
+ default: { MONGO_UNREACHABLE; }
+ }
+}
+
} // namespace
namespace expression {
@@ -348,69 +419,29 @@ bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& p
return true;
}
- // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to
- // determine if it is independent or not, and instead just return false.
- return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet);
+ // Certain kinds of match expressions are never considered independent.
+ if (expr.isArray() || expr.matchType() == MatchExpression::TYPE_OPERATOR) {
+ return false;
+ }
+
+ return isLeafIndependentOf(expr.path(), pathSet);
}
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy(
- unique_ptr<MatchExpression> expr, const std::set<std::string>& fields) {
- if (isIndependentOf(*expr, fields)) {
- // 'expr' does not depend upon 'fields', so it can be completely moved.
- return {std::move(expr), nullptr};
- }
- if (!expr->isLogical()) {
- // 'expr' is a leaf, and was not independent of 'fields'.
- return {nullptr, std::move(expr)};
+ unique_ptr<MatchExpression> expr,
+ const std::set<std::string>& fields,
+ const StringMap<std::string>& renames) {
+ // TODO SERVER-27115: Currently renames from dotted fields are not supported, but this
+ // restriction can be relaxed.
+ for (auto&& rename : renames) {
+ invariant(rename.second.find('.') == std::string::npos);
}
- std::vector<unique_ptr<MatchExpression>> reliant;
- std::vector<unique_ptr<MatchExpression>> separate;
-
- switch (expr->matchType()) {
- case MatchExpression::AND: {
- auto andExpr = checked_cast<AndMatchExpression*>(expr.get());
- for (size_t i = 0; i < andExpr->numChildren(); i++) {
- auto children = splitMatchExpressionBy(andExpr->releaseChild(i), fields);
-
- invariant(children.first || children.second);
-
- if (children.first) {
- separate.push_back(std::move(children.first));
- }
- if (children.second) {
- reliant.push_back(std::move(children.second));
- }
- }
- return {createAndOfNodes(&separate), createAndOfNodes(&reliant)};
- }
- case MatchExpression::NOR: {
- // We can split a $nor because !(x | y) is logically equivalent to !x & !y.
-
- // However, we cannot split each child individually; instead, we must look for a wholly
- // independent child to split off by itself. As an example of why, with 'b' in
- // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not
- // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and
- // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b
- // is equal to 1.
- auto norExpr = checked_cast<NorMatchExpression*>(expr.get());
- for (size_t i = 0; i < norExpr->numChildren(); i++) {
- auto child = norExpr->releaseChild(i);
- if (isIndependentOf(*child, fields)) {
- separate.push_back(std::move(child));
- } else {
- reliant.push_back(std::move(child));
- }
- }
- return {createNorOfNodes(&separate), createNorOfNodes(&reliant)};
- }
- case MatchExpression::OR:
- case MatchExpression::NOT: {
- // If we aren't independent, we can't safely split.
- return {nullptr, std::move(expr)};
- }
- default: { MONGO_UNREACHABLE; }
+ auto splitExpr = splitMatchExpressionByWithoutRenames(std::move(expr), fields);
+ if (splitExpr.first) {
+ applyRenamesToExpression(splitExpr.first.get(), renames);
}
+ return splitExpr;
}
void mapOver(MatchExpression* expr, NodeTraversalFunc func, std::string path) {
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index 1144715c8cf..70092eb644c 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -35,6 +35,7 @@
#include "mongo/base/string_data.h"
#include "mongo/stdx/functional.h"
+#include "mongo/util/string_map.h"
namespace mongo {
@@ -114,10 +115,18 @@ void mapOver(MatchExpression* expr, NodeTraversalFunc func, std::string path = "
* independent, returns {exprLeft, exprRight}, where each new MatchExpression contains a portion of
* 'expr'.
*
+ * Any paths which should be renamed are encoded in 'renames', which maps from path names in 'expr'
+ * to the new values of those paths. If the return value is {exprLeft, exprRight} or {exprLeft,
+ * nullptr}, exprLeft will reflect the path renames. For example, suppose the original match
+ * expression is {old: {$gt: 3}} and 'renames' contains the mapping "old" => "new". The returned
+ * exprLeft value will be {new: {$gt: 3}}, provided that "old" is not in 'fields'.
+ *
* Never returns {nullptr, nullptr}.
*/
std::pair<std::unique_ptr<MatchExpression>, std::unique_ptr<MatchExpression>>
-splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, const std::set<std::string>& fields);
+splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr,
+ const std::set<std::string>& fields,
+ const StringMap<std::string>& renames);
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index c97d766679a..14678945b4f 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -805,7 +805,7 @@ TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) {
ASSERT_OK(status.getStatus());
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
- expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}, {});
ASSERT_TRUE(splitExpr.first.get());
BSONObjBuilder firstBob;
@@ -827,7 +827,7 @@ TEST(SplitMatchExpression, NorWithIndependentChildrenIsSplittable) {
ASSERT_OK(status.getStatus());
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
- expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}, {});
ASSERT_TRUE(splitExpr.first.get());
BSONObjBuilder firstBob;
@@ -849,7 +849,7 @@ TEST(SplitMatchExpression, NotWithIndependentChildIsSplittable) {
ASSERT_OK(status.getStatus());
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
- expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"});
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"}, {});
ASSERT_TRUE(splitExpr.first.get());
BSONObjBuilder firstBob;
@@ -867,7 +867,7 @@ TEST(SplitMatchExpression, OrWithOnlyIndependentChildrenIsNotSplittable) {
ASSERT_OK(status.getStatus());
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
- expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}, {});
ASSERT_TRUE(splitExpr.second.get());
BSONObjBuilder bob;
@@ -888,7 +888,7 @@ TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) {
ASSERT_OK(status.getStatus());
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
- expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"});
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"}, {});
ASSERT_TRUE(splitExpr.first.get());
BSONObjBuilder firstBob;
@@ -914,7 +914,7 @@ TEST(SplitMatchExpression, ShouldNotExtractPrefixOfDottedPathAsIndependent) {
ASSERT_OK(status.getStatus());
std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
- expression::splitMatchExpressionBy(std::move(status.getValue()), {"a.b"});
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"a.b"}, {});
ASSERT_TRUE(splitExpr.first.get());
BSONObjBuilder firstBob;
@@ -928,6 +928,178 @@ TEST(SplitMatchExpression, ShouldNotExtractPrefixOfDottedPathAsIndependent) {
ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{$and: [{a: {$eq: 1}}, {'a.b': {$eq: 1}}]}"));
}
+TEST(SplitMatchExpression, ShouldMoveIndependentLeafPredicateAcrossRename) {
+ BSONObj matchPredicate = fromjson("{a: 1}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"a", "b"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+ ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{b: {$eq: 1}}"));
+
+ ASSERT_FALSE(splitExpr.second.get());
+}
+
+TEST(SplitMatchExpression, ShouldMoveIndependentAndPredicateAcrossRename) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 2}]}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"a", "c"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+ ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{$and: [{c: {$eq: 1}}, {b: {$eq: 2}}]}"));
+
+ ASSERT_FALSE(splitExpr.second.get());
+}
+
+TEST(SplitMatchExpression, ShouldSplitPartiallyDependentAndPredicateAcrossRename) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 2}]}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"a", "c"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {"b"}, renames);
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+ ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{c: {$eq: 1}}"));
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{b: {$eq: 2}}"));
+}
+
+TEST(SplitMatchExpression, ShouldSplitPartiallyDependentComplexPredicateMultipleRenames) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {$or: [{b: 2}, {c: 3}]}]}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"b", "d"}, {"c", "e"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {"a"}, renames);
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+ ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{$or: [{d: {$eq: 2}}, {e: {$eq: 3}}]}"));
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$eq: 1}}"));
+}
+
+TEST(SplitMatchExpression,
+ ShouldSplitPartiallyDependentComplexPredicateMultipleRenamesDottedPaths) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {$or: [{'d.e.f': 2}, {'e.f.g': 3}]}]}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"d.e.f", "x"}, {"e.f.g", "y"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {"a"}, renames);
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+ ASSERT_BSONOBJ_EQ(firstBob.obj(), fromjson("{$or: [{x: {$eq: 2}}, {y: {$eq: 3}}]}"));
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$eq: 1}}"));
+}
+
+TEST(SplitMatchExpression, ShouldNotMoveElemMatchObjectAcrossRename) {
+ BSONObj matchPredicate = fromjson("{a: {$elemMatch: {b: 3}}}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"a", "c"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
+
+ ASSERT_FALSE(splitExpr.first.get());
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$elemMatch: {b: {$eq: 3}}}}"));
+}
+
+TEST(SplitMatchExpression, ShouldNotMoveElemMatchValueAcrossRename) {
+ BSONObj matchPredicate = fromjson("{a: {$elemMatch: {$eq: 3}}}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"a", "c"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
+
+ ASSERT_FALSE(splitExpr.first.get());
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$elemMatch: {$eq: 3}}}"));
+}
+
+TEST(SplitMatchExpression, ShouldNotMoveTypeAcrossRename) {
+ BSONObj matchPredicate = fromjson("{a: {$type: 16}}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"a", "c"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
+
+ ASSERT_FALSE(splitExpr.first.get());
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$type: 16}}"));
+}
+
+TEST(SplitMatchExpression, ShouldNotMoveSizeAcrossRename) {
+ BSONObj matchPredicate = fromjson("{a: {$size: 3}}");
+ const CollatorInterface* collator = nullptr;
+ auto matcher = MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop(), collator);
+ ASSERT_OK(matcher.getStatus());
+
+ StringMap<std::string> renames{{"a", "c"}};
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(matcher.getValue()), {}, renames);
+
+ ASSERT_FALSE(splitExpr.first.get());
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+ ASSERT_BSONOBJ_EQ(secondBob.obj(), fromjson("{a: {$size: 3}}"));
+}
+
TEST(MapOverMatchExpression, DoesMapOverLogicalNodes) {
BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}");
const CollatorInterface* collator = nullptr;
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index 23e98350197..9651c7cb8e8 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -147,10 +147,15 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc
case DocumentSource::GetModPathsReturn::Type::kAllExcept: {
DepsTracker depsTracker;
match->getDependencies(&depsTracker);
- modifiedPaths = extractModifiedDependencies(depsTracker.fields, modifiedPathsRet.paths);
+
+ auto preservedPaths = modifiedPathsRet.paths;
+ for (auto&& rename : modifiedPathsRet.renames) {
+ preservedPaths.insert(rename.first);
+ }
+ modifiedPaths = extractModifiedDependencies(depsTracker.fields, preservedPaths);
}
}
- return match->splitSourceBy(modifiedPaths);
+ return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames);
}
} // namespace
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index 2b014c37a61..7b724b555cd 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -366,11 +366,25 @@ public:
kAllExcept,
};
- GetModPathsReturn(Type type, std::set<std::string>&& paths)
- : type(type), paths(std::move(paths)) {}
+ GetModPathsReturn(Type type,
+ std::set<std::string>&& paths,
+ StringMap<std::string>&& renames)
+ : type(type), paths(std::move(paths)), renames(std::move(renames)) {}
Type type;
std::set<std::string> paths;
+
+ // Stages may fill out 'renames' to contain information about path renames. Each entry in
+ // 'renames' maps from the new name of the path (valid in documents flowing *out* of this
+ // stage) to the old name of the path (valid in documents flowing *into* this stage).
+ //
+ // For example, consider the stage
+ //
+ // {$project: {_id: 0, a: 1, b: "$c"}}
+ //
+ // This stage should return kAllExcept, since it modifies all paths other than "a". It can
+ // also fill out 'renames' with the mapping "b" => "c".
+ StringMap<std::string> renames;
};
/**
@@ -381,7 +395,7 @@ public:
* See GetModPathsReturn above for the possible return values and what they mean.
*/
virtual GetModPathsReturn getModifiedPaths() const {
- return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}};
+ return {GetModPathsReturn::Type::kNotSupported, std::set<std::string>{}, {}};
}
/**
diff --git a/src/mongo/db/pipeline/document_source_graph_lookup.cpp b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
index 717b82d59aa..7cc78ba8e95 100644
--- a/src/mongo/db/pipeline/document_source_graph_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_graph_lookup.cpp
@@ -351,7 +351,7 @@ DocumentSource::GetModPathsReturn DocumentSourceGraphLookUp::getModifiedPaths()
modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(),
pathsModifiedByUnwind.paths.end());
}
- return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)};
+ return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}};
}
Pipeline::SourceContainer::iterator DocumentSourceGraphLookUp::doOptimizeAt(
diff --git a/src/mongo/db/pipeline/document_source_lookup.cpp b/src/mongo/db/pipeline/document_source_lookup.cpp
index b6cf88172c3..4c68db3f1da 100644
--- a/src/mongo/db/pipeline/document_source_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_lookup.cpp
@@ -174,7 +174,7 @@ DocumentSource::GetModPathsReturn DocumentSourceLookUp::getModifiedPaths() const
modifiedPaths.insert(pathsModifiedByUnwind.paths.begin(),
pathsModifiedByUnwind.paths.end());
}
- return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)};
+ return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}};
}
Pipeline::SourceContainer::iterator DocumentSourceLookUp::doOptimizeAt(
diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp
index 0f611b04423..1b7d19f2d2f 100644
--- a/src/mongo/db/pipeline/document_source_match.cpp
+++ b/src/mongo/db/pipeline/document_source_match.cpp
@@ -371,37 +371,49 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other
}
pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
-DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) {
+DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields,
+ const StringMap<std::string>& renames) {
pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr(
- expression::splitMatchExpressionBy(std::move(_expression), fields));
+ expression::splitMatchExpressionBy(std::move(_expression), fields, renames));
invariant(newExpr.first || newExpr.second);
if (!newExpr.first) {
- // The entire $match depends on 'fields'.
+ // The entire $match depends on 'fields'. It cannot be split or moved, so we return this
+ // stage without modification as the second stage in the pair.
_expression = std::move(newExpr.second);
return {nullptr, this};
- } else if (!newExpr.second) {
- // This $match is entirely independent of 'fields'.
+ }
+
+ if (!newExpr.second && renames.empty()) {
+ // This $match is entirely independent of 'fields' and there were no renames to apply. In
+ // this case, the current stage can swap with its predecessor without modification. We
+ // simply return this as the first stage in the pair.
_expression = std::move(newExpr.first);
return {this, nullptr};
}
- // A MatchExpression requires that it is outlived by the BSONObj it is parsed from. Since the
- // original BSONObj this $match was created from is no longer equivalent to either of the
- // MatchExpressions we return, we instead take each of these expressions, serialize them, and
- // then re-parse them, constructing new BSON that is owned by the DocumentSourceMatch.
-
- // Build an expression for a new $match stage.
+ // If we're here, then either:
+ // - this stage has split into two, or
+ // - this stage can swap with its predecessor, but potentially had renames applied.
+ //
+ // In any of these cases, we have created new expression(s). A MatchExpression requires that it
+ // is outlived by the BSONObj it is parsed from. But since the MatchExpressions were modified,
+ // the corresponding BSONObj may not exist. Therefore, we take each of these expressions,
+ // serialize them, and then re-parse them, constructing new BSON that is owned by the
+ // DocumentSourceMatch.
BSONObjBuilder firstBob;
newExpr.first->serialize(&firstBob);
+ auto firstMatch = DocumentSourceMatch::create(firstBob.obj(), pExpCtx);
- // This $match stage is still needed, so update the MatchExpression as needed.
- BSONObjBuilder secondBob;
- newExpr.second->serialize(&secondBob);
+ intrusive_ptr<DocumentSourceMatch> secondMatch;
+ if (newExpr.second) {
+ BSONObjBuilder secondBob;
+ newExpr.second->serialize(&secondBob);
+ secondMatch = DocumentSourceMatch::create(secondBob.obj(), pExpCtx);
+ }
- return {DocumentSourceMatch::create(firstBob.obj(), pExpCtx),
- DocumentSourceMatch::create(secondBob.obj(), pExpCtx)};
+ return {std::move(firstMatch), std::move(secondMatch)};
}
boost::intrusive_ptr<DocumentSourceMatch> DocumentSourceMatch::descendMatchOnPath(
diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h
index 4abd4862992..b6c41cd5161 100644
--- a/src/mongo/db/pipeline/document_source_match.h
+++ b/src/mongo/db/pipeline/document_source_match.h
@@ -115,9 +115,14 @@ public:
*
* For example, {$match: {a: "foo", "b.c": 4}} split by "b" will return pointers to two stages:
* {$match: {a: "foo"}}, and {$match: {"b.c": 4}}.
+ *
+ * The 'renames' structure maps from a field to an alias that should be used in the independent
+ * portion of the match. For example, suppose that we split by fields "a" with the rename "b" =>
+ * "c". The match {$match: {a: "foo", b: "bar", z: "baz"}} will split into {$match: {c: "bar",
+ * z: "baz"}} and {$match: {a: "foo"}}.
*/
std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
- splitSourceBy(const std::set<std::string>& fields);
+ splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames);
/**
* Given a document 'input', extract 'fields' and produce a BSONObj with those values.
diff --git a/src/mongo/db/pipeline/document_source_replace_root.cpp b/src/mongo/db/pipeline/document_source_replace_root.cpp
index 521cf748784..8f6782dc47e 100644
--- a/src/mongo/db/pipeline/document_source_replace_root.cpp
+++ b/src/mongo/db/pipeline/document_source_replace_root.cpp
@@ -89,7 +89,7 @@ public:
DocumentSource::GetModPathsReturn getModifiedPaths() const final {
// Replaces the entire root, so all paths are modified.
- return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}};
+ return {DocumentSource::GetModPathsReturn::Type::kAllPaths, std::set<std::string>{}, {}};
}
// Create the replaceRoot transformer. Uasserts on invalid input.
diff --git a/src/mongo/db/pipeline/document_source_sort.h b/src/mongo/db/pipeline/document_source_sort.h
index 83af9a4bcb6..c3b1609b641 100644
--- a/src/mongo/db/pipeline/document_source_sort.h
+++ b/src/mongo/db/pipeline/document_source_sort.h
@@ -49,7 +49,7 @@ public:
GetModPathsReturn getModifiedPaths() const final {
// A $sort does not modify any paths.
- return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}};
+ return {GetModPathsReturn::Type::kFiniteSet, std::set<std::string>{}, {}};
}
bool canSwapWithMatch() const final {
diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp
index 079439bd842..1c172cb17be 100644
--- a/src/mongo/db/pipeline/document_source_unwind.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind.cpp
@@ -236,7 +236,7 @@ DocumentSource::GetModPathsReturn DocumentSourceUnwind::getModifiedPaths() const
if (_indexPath) {
modifiedFields.insert(_indexPath->fullPath());
}
- return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields)};
+ return {GetModPathsReturn::Type::kFiniteSet, std::move(modifiedFields), {}};
}
Value DocumentSourceUnwind::serialize(boost::optional<ExplainOptions::Verbosity> explain) const {
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index a9eb4835a62..62bf6904fc5 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -769,6 +769,10 @@ public:
return _fieldPath;
}
+ Variables::Id getVariableId() const {
+ return _variable;
+ }
+
private:
ExpressionFieldPath(const boost::intrusive_ptr<ExpressionContext>& expCtx,
const std::string& fieldPath,
diff --git a/src/mongo/db/pipeline/parsed_add_fields.h b/src/mongo/db/pipeline/parsed_add_fields.h
index 8cc3d24f913..f4c1b535f63 100644
--- a/src/mongo/db/pipeline/parsed_add_fields.h
+++ b/src/mongo/db/pipeline/parsed_add_fields.h
@@ -95,8 +95,11 @@ public:
DocumentSource::GetModPathsReturn getModifiedPaths() const final {
std::set<std::string> computedPaths;
- _root->addComputedPaths(&computedPaths);
- return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(computedPaths)};
+ StringMap<std::string> renamedPaths;
+ _root->addComputedPaths(&computedPaths, &renamedPaths);
+ return {DocumentSource::GetModPathsReturn::Type::kFiniteSet,
+ std::move(computedPaths),
+ std::move(renamedPaths)};
}
/**
diff --git a/src/mongo/db/pipeline/parsed_exclusion_projection.h b/src/mongo/db/pipeline/parsed_exclusion_projection.h
index 74314d97439..8f64ac5258e 100644
--- a/src/mongo/db/pipeline/parsed_exclusion_projection.h
+++ b/src/mongo/db/pipeline/parsed_exclusion_projection.h
@@ -124,7 +124,7 @@ public:
DocumentSource::GetModPathsReturn getModifiedPaths() const final {
std::set<std::string> modifiedPaths;
_root->addModifiedPaths(&modifiedPaths);
- return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths)};
+ return {DocumentSource::GetModPathsReturn::Type::kFiniteSet, std::move(modifiedPaths), {}};
}
private:
diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp
index da6b9e72ce9..a2c80238570 100644
--- a/src/mongo/db/pipeline/parsed_inclusion_projection.cpp
+++ b/src/mongo/db/pipeline/parsed_inclusion_projection.cpp
@@ -239,12 +239,33 @@ void InclusionNode::addPreservedPaths(std::set<std::string>* preservedPaths) con
}
}
-void InclusionNode::addComputedPaths(std::set<std::string>* computedPaths) const {
+void InclusionNode::addComputedPaths(std::set<std::string>* computedPaths,
+ StringMap<std::string>* renamedPaths) const {
for (auto&& computedPair : _expressions) {
+ auto exprFieldPath = dynamic_cast<ExpressionFieldPath*>(computedPair.second.get());
+ if (exprFieldPath) {
+ const auto& fieldPath = exprFieldPath->getFieldPath();
+ // Make sure that the first path component is CURRENT/ROOT. If this is not explicitly
+ // provided by the user, we expect the system to have filled it in as the first path
+ // component.
+ //
+ // TODO SERVER-27115: Support field paths that have multiple components.
+ if (exprFieldPath->getVariableId() == Variables::kRootId &&
+ fieldPath.getPathLength() == 2u) {
+ // Found a renamed path. Record it and continue, since we don't want to also put
+ // renamed paths inside the 'computedPaths' set.
+ std::string oldPath = fieldPath.tail().fullPath();
+ std::string newPath =
+ FieldPath::getFullyQualifiedPath(_pathToNode, computedPair.first);
+ (*renamedPaths)[std::move(newPath)] = std::move(oldPath);
+ continue;
+ }
+ }
+
computedPaths->insert(FieldPath::getFullyQualifiedPath(_pathToNode, computedPair.first));
}
for (auto&& childPair : _children) {
- childPair.second->addComputedPaths(computedPaths);
+ childPair.second->addComputedPaths(computedPaths, renamedPaths);
}
}
diff --git a/src/mongo/db/pipeline/parsed_inclusion_projection.h b/src/mongo/db/pipeline/parsed_inclusion_projection.h
index 1c0a667b025..eb81015f8a4 100644
--- a/src/mongo/db/pipeline/parsed_inclusion_projection.h
+++ b/src/mongo/db/pipeline/parsed_inclusion_projection.h
@@ -126,9 +126,15 @@ public:
void addPreservedPaths(std::set<std::string>* preservedPaths) const;
/**
- * Recursively adds all paths that are purely computed in this inclusion projection.
+ * Recursively adds all paths that are purely computed in this inclusion projection to
+ * 'computedPaths'.
+ *
+ * Computed paths that are identified as the result of a simple rename are instead filled out in
+ * 'renamedPaths'. Each entry in 'renamedPaths' maps from the path's new name to its old name
+ * prior to application of this inclusion projection.
*/
- void addComputedPaths(std::set<std::string>* computedPaths) const;
+ void addComputedPaths(std::set<std::string>* computedPaths,
+ StringMap<std::string>* renamedPaths) const;
private:
// Helpers for the Document versions above. These will apply the transformation recursively to
@@ -222,7 +228,14 @@ public:
DocumentSource::GetModPathsReturn getModifiedPaths() const final {
std::set<std::string> preservedPaths;
_root->addPreservedPaths(&preservedPaths);
- return {DocumentSource::GetModPathsReturn::Type::kAllExcept, std::move(preservedPaths)};
+
+ std::set<std::string> computedPaths;
+ StringMap<std::string> renamedPaths;
+ _root->addComputedPaths(&computedPaths, &renamedPaths);
+
+ return {DocumentSource::GetModPathsReturn::Type::kAllExcept,
+ std::move(preservedPaths),
+ std::move(renamedPaths)};
}
/**
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index 380a5824fd6..68dfeb35bad 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -758,6 +758,85 @@ TEST(PipelineOptimizationTest, MatchShouldNotSwapBeforeSkip) {
assertPipelineOptimizesTo(pipeline, pipeline);
}
+TEST(PipelineOptimizationTest, MatchShouldMoveAcrossProjectRename) {
+ string inputPipe = "[{$project: {_id: true, a: '$b'}}, {$match: {a: {$eq: 1}}}]";
+ string outputPipe = "[{$match: {b: {$eq: 1}}}, {$project: {_id: true, a: '$b'}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
+TEST(PipelineOptimizationTest, MatchShouldMoveAcrossAddFieldsRename) {
+ string inputPipe = "[{$addFields: {a: '$b'}}, {$match: {a: {$eq: 1}}}]";
+ string outputPipe = "[{$match: {b: {$eq: 1}}}, {$addFields: {a: '$b'}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
+TEST(PipelineOptimizationTest, MatchShouldMoveAcrossProjectRenameWithExplicitROOT) {
+ string inputPipe = "[{$project: {_id: true, a: '$$ROOT.b'}}, {$match: {a: {$eq: 1}}}]";
+ string outputPipe = "[{$match: {b: {$eq: 1}}}, {$project: {_id: true, a: '$$ROOT.b'}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
+TEST(PipelineOptimizationTest, MatchShouldMoveAcrossAddFieldsRenameWithExplicitCURRENT) {
+ string inputPipe = "[{$addFields: {a: '$$CURRENT.b'}}, {$match: {a: {$eq: 1}}}]";
+ string outputPipe = "[{$match: {b: {$eq: 1}}}, {$addFields: {a: '$b'}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
+TEST(PipelineOptimizationTest, PartiallyDependentMatchWithRenameShouldSplitAcrossAddFields) {
+ string inputPipe =
+ "[{$addFields: {'a.b': '$c', d: {$add: ['$e', '$f']}}},"
+ "{$match: {$and: [{$or: [{'a.b': 1}, {x: 2}]}, {d: 3}]}}]";
+ string outputPipe =
+ "[{$match: {$or: [{c: {$eq: 1}}, {x: {$eq: 2}}]}},"
+ "{$addFields: {a: {b: '$c'}, d: {$add: ['$e', '$f']}}},"
+ "{$match: {d: {$eq: 3}}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
+TEST(PipelineOptimizationTest, NorCanSplitAcrossProjectWithRename) {
+ string inputPipe =
+ "[{$project: {_id: false, x: true, y: '$z'}},"
+ "{$match: {$nor: [{w: {$eq: 1}}, {y: {$eq: 1}}]}}]";
+ string outputPipe =
+ "[{$match: {$nor: [{z: {$eq: 1}}]}},"
+ "{$project: {_id: false, x: true, y: '$z'}},"
+ "{$match: {$nor: [{w: {$eq: 1}}]}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
+TEST(PipelineOptimizationTest, MatchCanMoveAcrossSeveralRenames) {
+ string inputPipe =
+ "[{$project: {_id: false, c: '$d'}},"
+ "{$addFields: {b: '$c'}},"
+ "{$project: {a: '$b', z: 1}},"
+ "{$match: {a: 1, z: 2}}]";
+ string outputPipe =
+ "[{$match: {d: {$eq: 1}}},"
+ "{$project: {_id: false, c: '$d'}},"
+ "{$match: {z: {$eq: 2}}},"
+ "{$addFields: {b: '$c'}},"
+ "{$project: {_id: true, z: true, a: '$b'}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
+TEST(PipelineOptimizationTest, RenameShouldNotBeAppliedToDependentMatch) {
+ string pipeline =
+ "[{$project: {_id: false, x: {$add: ['$foo', '$bar']}, y: '$z'}},"
+ "{$match: {$or: [{x: {$eq: 1}}, {y: {$eq: 1}}]}}]";
+ assertPipelineOptimizesTo(pipeline, pipeline);
+}
+
+TEST(PipelineOptimizationTest, MatchCannotMoveAcrossAddFieldsRenameOfDottedPath) {
+ string pipeline = "[{$addFields: {a: '$b.c'}}, {$match: {a: {$eq: 1}}}]";
+ assertPipelineOptimizesTo(pipeline, pipeline);
+}
+
+TEST(PipelineOptimizationTest, MatchCannotMoveAcrossProjectRenameOfDottedPath) {
+ string inputPipe = "[{$project: {_id: false, a: '$$CURRENT.b.c'}}, {$match: {a: {$eq: 1}}}]";
+ string outputPipe = "[{$project: {_id: false, a: '$b.c'}}, {$match: {a: {$eq: 1}}}]";
+ assertPipelineOptimizesTo(inputPipe, outputPipe);
+}
+
} // namespace Local
namespace Sharded {