summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenjamin Murphy <benjamin_murphy@me.com>2016-02-12 16:20:51 -0500
committerBenjamin Murphy <benjamin_murphy@me.com>2016-03-30 16:21:13 -0400
commit2b56c5ce4527403329bc60ee406a0f1a7de3f10a (patch)
tree3124ce031943a41c3fb1138095f1e565378be72a
parent46d994e5c186e9634896db0d434db0a635ffc60b (diff)
downloadmongo-2b56c5ce4527403329bc60ee406a0f1a7de3f10a.tar.gz
SERVER-20506 Conditionally order match and unwind.
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp156
-rw-r--r--src/mongo/db/matcher/expression_algo.h30
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp186
-rw-r--r--src/mongo/db/matcher/expression_tree.h6
-rw-r--r--src/mongo/db/pipeline/SConscript1
-rw-r--r--src/mongo/db/pipeline/document_source.h21
-rw-r--r--src/mongo/db/pipeline/document_source_match.cpp52
-rw-r--r--src/mongo/db/pipeline/document_source_test.cpp3
-rw-r--r--src/mongo/db/pipeline/document_source_unwind.cpp35
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp90
10 files changed, 577 insertions, 3 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index a8fb260b7d0..79059c204d1 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -30,11 +30,16 @@
#include "mongo/platform/basic.h"
+#include "mongo/base/checked_cast.h"
#include "mongo/db/matcher/expression.h"
+#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/matcher/expression_leaf.h"
#include "mongo/db/matcher/expression_tree.h"
namespace mongo {
+
+using std::unique_ptr;
+
namespace {
bool isComparisonMatchExpression(const MatchExpression* expr) {
@@ -219,6 +224,80 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) {
}
}
+/**
+ * Returns whether the leaf is a $elemMatch expression.
+ */
+bool isElemMatch(const MatchExpression& expr) {
+ return expr.matchType() == MatchExpression::ELEM_MATCH_OBJECT ||
+ expr.matchType() == MatchExpression::ELEM_MATCH_VALUE;
+}
+
+/**
+ * Returns whether 'first' is a prefix of 'second'.
+ */
+bool isPathPrefixOf(const StringData& first, const StringData& second) {
+ if (first == second) {
+ return true;
+ }
+
+ if (first.size() >= second.size()) {
+ return false;
+ }
+
+ return second.startsWith(first) && second[first.size()] == '.';
+}
+
+/**
+ * Returns whether the leaf at 'path' is independent of 'fields'.
+ */
+bool isLeafIndependentOf(const StringData& path, const std::set<std::string>& fields) {
+ // For each field in 'fields', we need to check if that field is a prefix of 'path' or if 'path'
+ // is a prefix of that field. For example, the expression {a.b: {c: 1}} is not independent of
+ // 'a.b.c', and and the expression {a.b.c.d: 1} is not independent of 'a.b.c'.
+ for (StringData field : fields) {
+ if (isPathPrefixOf(path, field) || isPathPrefixOf(field, path)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * Creates a MatchExpression that is equivalent to {$and: [children[0], children[1]...]}.
+ */
+unique_ptr<MatchExpression> createAndOfNodes(std::vector<unique_ptr<MatchExpression>>* children) {
+ if (children->empty()) {
+ return nullptr;
+ }
+
+ if (children->size() == 1) {
+ return std::move(children->at(0));
+ }
+
+ unique_ptr<AndMatchExpression> splitAnd = stdx::make_unique<AndMatchExpression>();
+ for (auto&& expr : *children) {
+ splitAnd->add(expr.release());
+ }
+
+ return std::move(splitAnd);
+}
+
+/**
+ * Creates a MatchExpression that is equivalent to {$nor: [children[0], children[1]...]}.
+ */
+unique_ptr<MatchExpression> createNorOfNodes(std::vector<unique_ptr<MatchExpression>>* children) {
+ if (children->empty()) {
+ return nullptr;
+ }
+
+ unique_ptr<NorMatchExpression> splitNor = stdx::make_unique<NorMatchExpression>();
+ for (auto&& expr : *children) {
+ splitNor->add(expr.release());
+ }
+
+ return std::move(splitNor);
+}
+
} // namespace
namespace expression {
@@ -272,5 +351,82 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
return false;
}
+bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) {
+ if (expr.isLogical()) {
+ // Any logical expression is independent of 'pathSet' if all its children are independent of
+ // 'pathSet'.
+ for (size_t i = 0; i < expr.numChildren(); i++) {
+ if (!isIndependentOf(*expr.getChild(i), pathSet)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to
+ // determine if it is independent or not, and instead just return false.
+ return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet);
+}
+
+std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy(
+ unique_ptr<MatchExpression> expr, const std::set<std::string>& fields) {
+ if (isIndependentOf(*expr, fields)) {
+ // 'expr' does not depend upon 'fields', so it can be completely moved.
+ return {std::move(expr), nullptr};
+ }
+ if (!expr->isLogical()) {
+ // 'expr' is a leaf, and was not independent of 'fields'.
+ return {nullptr, std::move(expr)};
+ }
+
+ std::vector<unique_ptr<MatchExpression>> reliant;
+ std::vector<unique_ptr<MatchExpression>> separate;
+
+ switch (expr->matchType()) {
+ case MatchExpression::AND: {
+ auto andExpr = checked_cast<AndMatchExpression*>(expr.get());
+ for (size_t i = 0; i < andExpr->numChildren(); i++) {
+ auto children = splitMatchExpressionBy(andExpr->releaseChild(i), fields);
+
+ invariant(children.first || children.second);
+
+ if (children.first) {
+ separate.push_back(std::move(children.first));
+ }
+ if (children.second) {
+ reliant.push_back(std::move(children.second));
+ }
+ }
+ return {createAndOfNodes(&separate), createAndOfNodes(&reliant)};
+ }
+ case MatchExpression::NOR: {
+ // We can split a $nor because !(x | y) is logically equivalent to !x & !y.
+
+ // However, we cannot split each child individually; instead, we must look for a wholly
+ // independent child to split off by itself. As an example of why, with 'b' in
+ // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not
+ // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and
+ // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b
+ // is equal to 1.
+ auto norExpr = checked_cast<NorMatchExpression*>(expr.get());
+ for (size_t i = 0; i < norExpr->numChildren(); i++) {
+ auto child = norExpr->releaseChild(i);
+ if (isIndependentOf(*child, fields)) {
+ separate.push_back(std::move(child));
+ } else {
+ reliant.push_back(std::move(child));
+ }
+ }
+ return {createNorOfNodes(&separate), createNorOfNodes(&reliant)};
+ }
+ case MatchExpression::OR:
+ case MatchExpression::NOT: {
+ // If we aren't independent, we can't safely split.
+ return {nullptr, std::move(expr)};
+ }
+ default: { MONGO_UNREACHABLE; }
+ }
+}
+
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index baccf557104..0b377830274 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -59,5 +59,35 @@ namespace expression {
*/
bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs);
+/**
+ * Determine if it is possible to split 'expr' into two MatchExpressions, where one is not
+ * dependent on any path from 'pathSet', such that applying the two in sequence is equivalent
+ * to applying 'expr'.
+ *
+ * For example, {a: "foo", b: "bar"} is splittable by "b", while
+ * {$or: [{a: {$eq: "foo"}}, {b: {$eq: "bar"}}]} is not splittable by "b", due to the $or.
+ */
+bool isSplittableBy(const MatchExpression& expr, const std::set<std::string>& pathSet);
+
+/**
+ * Determine if 'expr' is reliant upon any path from 'pathSet'.
+ */
+bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet);
+
+/**
+ * Attempt to split 'expr' into two MatchExpressions, where the first is not reliant upon any
+ * path from 'fields', such that applying the matches in sequence is equivalent to applying
+ * 'expr'. Takes ownership of 'expr'.
+ *
+ * If 'expr' cannot be split, returns {nullptr, expr}. If 'expr' is entirely independent of
+ * 'fields', returns {expr, nullptr}. If 'expr' is partially dependent on 'fields', and partially
+ * independent, returns {exprLeft, exprRight}, where each new MatchExpression contains a portion of
+ * 'expr'.
+ *
+ * Never returns {nullptr, nullptr}.
+ */
+std::pair<std::unique_ptr<MatchExpression>, std::unique_ptr<MatchExpression>>
+splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, const std::set<std::string>& fields);
+
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index a9765403df6..8a38ab3be6e 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -38,10 +38,13 @@
#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/matcher/expression_parser.h"
#include "mongo/db/matcher/extensions_callback_disallow_extensions.h"
+#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/platform/decimal128.h"
namespace mongo {
+using std::unique_ptr;
+
/**
* A MatchExpression does not hold the memory for BSONElements, so use ParsedMatchExpression to
* ensure that the BSONObj outlives the MatchExpression.
@@ -648,4 +651,187 @@ TEST(ExpressionAlgoIsSubsetOf, Compare_Exists_NE) {
ASSERT_TRUE(expression::isSubsetOf(aNotEqualNull.get(), aExists.get()));
}
+TEST(IsIndependent, AndIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"}));
+}
+
+TEST(IsIndependent, ElemMatchIsNotIndependent) {
+ BSONObj matchPredicate = fromjson("{x: {$elemMatch: {y: 1}}}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x.y"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"y"}));
+}
+
+TEST(IsIndependent, NorIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"}));
+}
+
+TEST(IsIndependent, NotIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"b"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"}));
+}
+
+TEST(IsIndependent, OrIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"}));
+}
+
+TEST(IsIndependent, AndWithDottedFieldPathsIsNotIndependent) {
+ BSONObj matchPredicate = fromjson("{$and: [{'a': 1}, {'a.b': 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b.c"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b"}));
+}
+
+TEST(IsIndependent, BallIsIndependentOfBalloon) {
+ BSONObj matchPredicate = fromjson("{'a.ball': 4}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.balloon"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.b"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.ball.c"}));
+}
+
+TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{a: {$eq: 1}}"));
+ ASSERT_EQUALS(secondBob.obj(), fromjson("{b: {$eq: 1}}"));
+}
+
+TEST(SplitMatchExpression, NorWithIndependentChildrenIsSplittable) {
+ BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{a: {$eq: 1}}]}"));
+ ASSERT_EQUALS(secondBob.obj(), fromjson("{$nor: [{b: {$eq: 1}}]}"));
+}
+
+TEST(SplitMatchExpression, NotWithIndependentChildIsSplittable) {
+ BSONObj matchPredicate = fromjson("{x: {$not: {$gt: 4}}}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{$and: [{x: {$gt: 4}}]}]}"));
+ ASSERT_FALSE(splitExpr.second);
+}
+
+TEST(SplitMatchExpression, OrWithOnlyIndependentChildrenIsNotSplittable) {
+ BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder bob;
+ splitExpr.second->serialize(&bob);
+
+ ASSERT_FALSE(splitExpr.first);
+ ASSERT_EQUALS(bob.obj(), fromjson("{$or: [{a: {$eq: 1}}, {b: {$eq: 1}}]}"));
+}
+
+TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) {
+ BSONObj matchPredicate = fromjson(
+ "{$and: [{x: {$not: {$size: 2}}},"
+ "{$or: [{'a.b' : 3}, {'a.b.c': 4}]},"
+ "{$nor: [{x: {$gt: 4}}, {$and: [{x: {$not: {$eq: 1}}}, {y: 3}]}]}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{$or: [{'a.b': {$eq: 3}}, {'a.b.c': {$eq: 4}}]}"));
+ ASSERT_EQUALS(secondBob.obj(),
+ fromjson(
+ "{$and: [{$nor: [{$and: [{x: {$size: 2}}]}]}, {$nor: [{x: {$gt: 4}}, {$and: "
+ "[{$nor: [{$and: [{x: "
+ "{$eq: 1}}]}]}, {y: {$eq: 3}}]}]}]}"));
+}
+
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_tree.h b/src/mongo/db/matcher/expression_tree.h
index 2b65bc9d2f6..50786402a85 100644
--- a/src/mongo/db/matcher/expression_tree.h
+++ b/src/mongo/db/matcher/expression_tree.h
@@ -65,6 +65,12 @@ public:
return _expressions[i];
}
+ virtual std::unique_ptr<MatchExpression> releaseChild(size_t i) {
+ auto child = std::unique_ptr<MatchExpression>(_expressions[i]);
+ _expressions[i] = nullptr;
+ return std::move(child);
+ }
+
virtual std::vector<MatchExpression*>* getChildVector() {
return &_expressions;
}
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index ba97c44e3f8..393079d7378 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -122,6 +122,7 @@ docSourceEnv.Library(
'expression',
'$BUILD_DIR/mongo/client/clientdriver',
'$BUILD_DIR/mongo/db/matcher/expressions',
+ '$BUILD_DIR/mongo/db/matcher/expression_algo',
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger_customization_hooks',
diff --git a/src/mongo/db/pipeline/document_source.h b/src/mongo/db/pipeline/document_source.h
index a59dfe6999d..6db0f07db8d 100644
--- a/src/mongo/db/pipeline/document_source.h
+++ b/src/mongo/db/pipeline/document_source.h
@@ -665,6 +665,20 @@ public:
return _isTextQuery;
}
+ /**
+ * Attempt to split this $match into two stages, where the first is not dependent upon any path
+ * from 'fields', and where applying them in sequence is equivalent to applying this stage once.
+ *
+ * Will return two intrusive_ptrs to new $match stages, where the first pointer is independent
+ * of 'fields', and the second is dependent. Either pointer may be null, so be sure to check the
+ * return value.
+ *
+ * For example, {$match: {a: "foo", "b.c": 4}} split by "b" will return pointers to two stages:
+ * {$match: {a: "foo"}}, and {$match: {"b.c": 4}}.
+ */
+ std::pair<boost::intrusive_ptr<DocumentSource>, boost::intrusive_ptr<DocumentSource>>
+ splitSourceBy(const std::set<std::string>& fields);
+
private:
DocumentSourceMatch(const BSONObj& query,
const boost::intrusive_ptr<ExpressionContext>& pExpCtx);
@@ -1285,6 +1299,13 @@ public:
GetDepsReturn getDependencies(DepsTracker* deps) const final;
/**
+ * If the next stage is a $match, the part of the match that is not dependent on the unwound
+ * field can be moved into a new, preceding, $match stage.
+ */
+ Pipeline::SourceContainer::iterator optimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
+
+ /**
* Creates a new $unwind DocumentSource from a BSON specification.
*/
static boost::intrusive_ptr<DocumentSource> createFromBson(
diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp
index 5f5ebebbb1c..27fa4351f61 100644
--- a/src/mongo/db/pipeline/document_source_match.cpp
+++ b/src/mongo/db/pipeline/document_source_match.cpp
@@ -31,16 +31,19 @@
#include <cctype>
#include "mongo/db/jsobj.h"
+#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/matcher/extensions_callback_noop.h"
-#include "mongo/db/matcher/matcher.h"
#include "mongo/db/pipeline/document.h"
#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/util/stringutils.h"
+#include "mongo/stdx/memory.h"
namespace mongo {
using boost::intrusive_ptr;
+using std::pair;
+using std::unique_ptr;
using std::string;
using std::vector;
@@ -150,6 +153,13 @@ Document redactSafePortionDollarOps(BSONObj expr) {
if (field.fieldName()[0] != '$')
continue;
+ if (field.fieldNameStringData() == "$eq") {
+ if (isTypeRedactSafeInComparison(field.type())) {
+ output[field.fieldNameStringData()] = Value(field);
+ }
+ continue;
+ }
+
switch (BSONObj::MatchType(field.getGtLtOp(BSONObj::Equality))) {
// These are always ok
case BSONObj::opTYPE:
@@ -304,7 +314,6 @@ BSONObj DocumentSourceMatch::redactSafePortion() const {
void DocumentSourceMatch::setSource(DocumentSource* source) {
uassert(17313, "$match with $text is only allowed as the first pipeline stage", !_isTextQuery);
-
DocumentSource::setSource(source);
}
@@ -320,6 +329,43 @@ bool DocumentSourceMatch::isTextQuery(const BSONObj& query) {
return false;
}
+pair<intrusive_ptr<DocumentSource>, intrusive_ptr<DocumentSource>>
+DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields) {
+ pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr(
+ expression::splitMatchExpressionBy(std::move(_expression), fields));
+
+ invariant(newExpr.first || newExpr.second);
+
+ if (!newExpr.first) {
+ // The entire $match dependends on 'fields'.
+ _expression = std::move(newExpr.second);
+ return {nullptr, this};
+ } else if (!newExpr.second) {
+ // This $match is entirely independent of 'fields'.
+ _expression = std::move(newExpr.first);
+ return {this, nullptr};
+ }
+
+ // A MatchExpression requires that it is outlived by the BSONObj it is parsed from. Since the
+ // original BSONObj this $match was created from is no longer equivalent to either of the
+ // MatchExpressions we return, we instead take each of these expressions, serialize them, and
+ // then re-parse them, constructing new BSON that is owned by the DocumentSourceMatch.
+
+ // Build an expression for a new $match stage.
+ BSONObjBuilder firstBob;
+ newExpr.first->serialize(&firstBob);
+
+ intrusive_ptr<DocumentSource> firstMatch(new DocumentSourceMatch(firstBob.obj(), pExpCtx));
+
+ // This $match stage is still needed, so update the MatchExpression as needed.
+ BSONObjBuilder secondBob;
+ newExpr.second->serialize(&secondBob);
+
+ intrusive_ptr<DocumentSource> secondMatch(new DocumentSourceMatch(secondBob.obj(), pExpCtx));
+
+ return {firstMatch, secondMatch};
+}
+
static void uassertNoDisallowedClauses(BSONObj query) {
BSONForEach(e, query) {
// can't use the MatchExpression API because this would segfault the constructor
@@ -391,4 +437,4 @@ DocumentSourceMatch::DocumentSourceMatch(const BSONObj& query,
_expression = std::move(status.getValue());
}
-}
+} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_test.cpp b/src/mongo/db/pipeline/document_source_test.cpp
index f950eb8328f..e33916f909b 100644
--- a/src/mongo/db/pipeline/document_source_test.cpp
+++ b/src/mongo/db/pipeline/document_source_test.cpp
@@ -29,6 +29,7 @@
#include "mongo/platform/basic.h"
#include "mongo/base/init.h"
+#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/db/operation_context_noop.h"
#include "mongo/db/pipeline/dependencies.h"
#include "mongo/db/pipeline/document_source.h"
@@ -2969,6 +2970,8 @@ public:
namespace DocumentSourceMatch {
using mongo::DocumentSourceMatch;
+using std::unique_ptr;
+
// Helpers to make a DocumentSourceMatch from a query object or json string
intrusive_ptr<DocumentSourceMatch> makeMatch(const BSONObj& query) {
intrusive_ptr<DocumentSource> uncasted =
diff --git a/src/mongo/db/pipeline/document_source_unwind.cpp b/src/mongo/db/pipeline/document_source_unwind.cpp
index a03d3fe7a77..506627cc4dd 100644
--- a/src/mongo/db/pipeline/document_source_unwind.cpp
+++ b/src/mongo/db/pipeline/document_source_unwind.cpp
@@ -225,6 +225,41 @@ BSONObjSet DocumentSourceUnwind::getOutputSorts() {
return out;
}
+Pipeline::SourceContainer::iterator DocumentSourceUnwind::optimizeAt(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ invariant(*itr == this);
+
+ if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get())) {
+ const bool includeDollarPrefix = false;
+ std::set<std::string> field = {_unwindPath.getPath(includeDollarPrefix)};
+ auto splitMatch = nextMatch->splitSourceBy(field);
+
+ invariant(splitMatch.first || splitMatch.second);
+
+ if (!splitMatch.first && splitMatch.second) {
+ // No optimization was possible.
+ return std::next(itr);
+ }
+
+ container->erase(std::next(itr));
+
+ // If splitMatch.second is not null, then there is a new $match stage to insert after
+ // ourselves.
+ if (splitMatch.second) {
+ container->insert(std::next(itr), std::move(splitMatch.second));
+ }
+
+ if (splitMatch.first) {
+ container->insert(itr, std::move(splitMatch.first));
+ if (std::prev(itr) == container->begin()) {
+ return std::prev(itr);
+ }
+ return std::prev(std::prev(itr));
+ }
+ }
+ return std::next(itr);
+}
+
Value DocumentSourceUnwind::serialize(bool explain) const {
return Value(DOC(getSourceName() << DOC(
"path" << _unwindPath.getPath(true) << "preserveNullAndEmptyArrays"
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index e0162c9eaeb..e5afb99fb86 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -268,6 +268,89 @@ class MatchShouldDuplicateItselfBeforeRedact : public Base {
}
};
+class MatchShouldSwapWithUnwind : public Base {
+ string inputPipeJson() {
+ return "[{$unwind: '$a.b.c'}, "
+ "{$match: {'b': 1}}]";
+ }
+ string outputPipeJson() {
+ return "[{$match: {'b': 1}}, "
+ "{$unwind: {path: '$a.b.c'}}]";
+ }
+};
+
+class MatchOnPrefixShouldNotSwapOnUnwind : public Base {
+ string inputPipeJson() {
+ return "[{$unwind: {path: '$a.b.c'}}, "
+ "{$match: {'a.b': 1}}]";
+ }
+ string outputPipeJson() {
+ return "[{$unwind: {path: '$a.b.c'}}, "
+ "{$match: {'a.b': 1}}]";
+ }
+};
+
+class MatchShouldSplitOnUnwind : public Base {
+ string inputPipeJson() {
+ return "[{$unwind: '$a.b'}, "
+ "{$match: {$and: [{f: {$eq: 5}}, "
+ " {$nor: [{'a.d': 1, c: 5}, {'a.b': 3, c: 5}]}]}}]";
+ }
+ string outputPipeJson() {
+ return "[{$match: {$and: [{f: {$eq: 5}},"
+ " {$nor: [{$and: [{'a.d': {$eq: 1}}, {c: {$eq: 5}}]}]}]}},"
+ "{$unwind: {path: '$a.b'}}, "
+ "{$match: {$nor: [{$and: [{'a.b': {$eq: 3}}, {c: {$eq: 5}}]}]}}]";
+ }
+};
+
+class MatchShouldNotOptimizeWithElemMatch : public Base {
+ string inputPipeJson() {
+ return "[{$unwind: {path: '$a.b'}}, "
+ "{$match: {a: {$elemMatch: {b: {d: 1}}}}}]";
+ }
+ string outputPipeJson() {
+ return "[{$unwind: {path: '$a.b'}}, "
+ "{$match: {a: {$elemMatch: {b: {d: 1}}}}}]";
+ }
+};
+
+class MatchWithNorOnlySplitsIndependentChildren : public Base {
+ string inputPipeJson() {
+ return "[{$unwind: {path: '$a'}}, "
+ "{$match: {$nor: [{$and: [{a: {$eq: 1}}, {b: {$eq: 1}}]}, {b: {$eq: 2}} ]}}]";
+ }
+ string outputPipeJson() {
+ return "[{$match: {$nor: [{b: {$eq: 2}}]}}, "
+ "{$unwind: {path: '$a'}}, "
+ "{$match: {$nor: [{$and: [{a: {$eq: 1}}, {b: {$eq: 1}}]}]}}]";
+ }
+};
+
+class MatchWithOrDoesNotSplit : public Base {
+ string inputPipeJson() {
+ return "[{$unwind: {path: '$a'}}, "
+ "{$match: {$or: [{a: {$eq: 'dependent'}}, {b: {$eq: 'independent'}}]}}]";
+ }
+ string outputPipeJson() {
+ return "[{$unwind: {path: '$a'}}, "
+ "{$match: {$or: [{a: {$eq: 'dependent'}}, {b: {$eq: 'independent'}}]}}]";
+ }
+};
+
+class UnwindBeforeDoubleMatchShouldRepeatedlyOptimize : public Base {
+ string inputPipeJson() {
+ return "[{$unwind: '$a'}, "
+ "{$match: {b: {$gt: 0}}}, "
+ "{$match: {a: 1, c: 1}}]";
+ }
+ string outputPipeJson() {
+ return "[{$match: {$and: [{b: {$gt: 0}}, {c: {$eq: 1}}]}},"
+ "{$unwind: {path: '$a'}}, "
+ "{$match: {a: {$eq: 1}}}]";
+ }
+};
+
} // namespace Local
namespace Sharded {
@@ -674,6 +757,13 @@ public:
add<Optimizations::Local::LookupShouldCoalesceWithUnwindOnAsWithIncludeArrayIndex>();
add<Optimizations::Local::LookupShouldNotCoalesceWithUnwindNotOnAs>();
add<Optimizations::Local::MatchShouldDuplicateItselfBeforeRedact>();
+ add<Optimizations::Local::MatchShouldSwapWithUnwind>();
+ add<Optimizations::Local::MatchOnPrefixShouldNotSwapOnUnwind>();
+ add<Optimizations::Local::MatchShouldNotOptimizeWithElemMatch>();
+ add<Optimizations::Local::MatchWithNorOnlySplitsIndependentChildren>();
+ add<Optimizations::Local::MatchWithOrDoesNotSplit>();
+ add<Optimizations::Local::MatchShouldSplitOnUnwind>();
+ add<Optimizations::Local::UnwindBeforeDoubleMatchShouldRepeatedlyOptimize>();
add<Optimizations::Sharded::Empty>();
add<Optimizations::Sharded::coalesceLookUpAndUnwind::ShouldCoalesceUnwindOnAs>();
add<Optimizations::Sharded::coalesceLookUpAndUnwind::