summaryrefslogtreecommitdiff
path: root/src/mongo/db/matcher
diff options
context:
space:
mode:
authorBenjamin Murphy <benjamin_murphy@me.com>2016-02-12 16:20:51 -0500
committerBenjamin Murphy <benjamin_murphy@me.com>2016-03-30 16:21:13 -0400
commit2b56c5ce4527403329bc60ee406a0f1a7de3f10a (patch)
tree3124ce031943a41c3fb1138095f1e565378be72a /src/mongo/db/matcher
parent46d994e5c186e9634896db0d434db0a635ffc60b (diff)
downloadmongo-2b56c5ce4527403329bc60ee406a0f1a7de3f10a.tar.gz
SERVER-20506 Conditionally order match and unwind.
Diffstat (limited to 'src/mongo/db/matcher')
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp156
-rw-r--r--src/mongo/db/matcher/expression_algo.h30
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp186
-rw-r--r--src/mongo/db/matcher/expression_tree.h6
4 files changed, 378 insertions, 0 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index a8fb260b7d0..79059c204d1 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -30,11 +30,16 @@
#include "mongo/platform/basic.h"
+#include "mongo/base/checked_cast.h"
#include "mongo/db/matcher/expression.h"
+#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/matcher/expression_leaf.h"
#include "mongo/db/matcher/expression_tree.h"
namespace mongo {
+
+using std::unique_ptr;
+
namespace {
bool isComparisonMatchExpression(const MatchExpression* expr) {
@@ -219,6 +224,80 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) {
}
}
+/**
+ * Returns whether the leaf is a $elemMatch expression.
+ */
+bool isElemMatch(const MatchExpression& expr) {
+ return expr.matchType() == MatchExpression::ELEM_MATCH_OBJECT ||
+ expr.matchType() == MatchExpression::ELEM_MATCH_VALUE;
+}
+
+/**
+ * Returns whether 'first' is a prefix of 'second'.
+ */
+bool isPathPrefixOf(const StringData& first, const StringData& second) {
+ if (first == second) {
+ return true;
+ }
+
+ if (first.size() >= second.size()) {
+ return false;
+ }
+
+ return second.startsWith(first) && second[first.size()] == '.';
+}
+
+/**
+ * Returns whether the leaf at 'path' is independent of 'fields'.
+ */
+bool isLeafIndependentOf(const StringData& path, const std::set<std::string>& fields) {
+ // For each field in 'fields', we need to check if that field is a prefix of 'path' or if 'path'
+ // is a prefix of that field. For example, the expression {a.b: {c: 1}} is not independent of
+ // 'a.b.c', and and the expression {a.b.c.d: 1} is not independent of 'a.b.c'.
+ for (StringData field : fields) {
+ if (isPathPrefixOf(path, field) || isPathPrefixOf(field, path)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * Creates a MatchExpression that is equivalent to {$and: [children[0], children[1]...]}.
+ */
+unique_ptr<MatchExpression> createAndOfNodes(std::vector<unique_ptr<MatchExpression>>* children) {
+ if (children->empty()) {
+ return nullptr;
+ }
+
+ if (children->size() == 1) {
+ return std::move(children->at(0));
+ }
+
+ unique_ptr<AndMatchExpression> splitAnd = stdx::make_unique<AndMatchExpression>();
+ for (auto&& expr : *children) {
+ splitAnd->add(expr.release());
+ }
+
+ return std::move(splitAnd);
+}
+
+/**
+ * Creates a MatchExpression that is equivalent to {$nor: [children[0], children[1]...]}.
+ */
+unique_ptr<MatchExpression> createNorOfNodes(std::vector<unique_ptr<MatchExpression>>* children) {
+ if (children->empty()) {
+ return nullptr;
+ }
+
+ unique_ptr<NorMatchExpression> splitNor = stdx::make_unique<NorMatchExpression>();
+ for (auto&& expr : *children) {
+ splitNor->add(expr.release());
+ }
+
+ return std::move(splitNor);
+}
+
} // namespace
namespace expression {
@@ -272,5 +351,82 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
return false;
}
+bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) {
+ if (expr.isLogical()) {
+ // Any logical expression is independent of 'pathSet' if all its children are independent of
+ // 'pathSet'.
+ for (size_t i = 0; i < expr.numChildren(); i++) {
+ if (!isIndependentOf(*expr.getChild(i), pathSet)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to
+ // determine if it is independent or not, and instead just return false.
+ return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet);
+}
+
+std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy(
+ unique_ptr<MatchExpression> expr, const std::set<std::string>& fields) {
+ if (isIndependentOf(*expr, fields)) {
+ // 'expr' does not depend upon 'fields', so it can be completely moved.
+ return {std::move(expr), nullptr};
+ }
+ if (!expr->isLogical()) {
+ // 'expr' is a leaf, and was not independent of 'fields'.
+ return {nullptr, std::move(expr)};
+ }
+
+ std::vector<unique_ptr<MatchExpression>> reliant;
+ std::vector<unique_ptr<MatchExpression>> separate;
+
+ switch (expr->matchType()) {
+ case MatchExpression::AND: {
+ auto andExpr = checked_cast<AndMatchExpression*>(expr.get());
+ for (size_t i = 0; i < andExpr->numChildren(); i++) {
+ auto children = splitMatchExpressionBy(andExpr->releaseChild(i), fields);
+
+ invariant(children.first || children.second);
+
+ if (children.first) {
+ separate.push_back(std::move(children.first));
+ }
+ if (children.second) {
+ reliant.push_back(std::move(children.second));
+ }
+ }
+ return {createAndOfNodes(&separate), createAndOfNodes(&reliant)};
+ }
+ case MatchExpression::NOR: {
+ // We can split a $nor because !(x | y) is logically equivalent to !x & !y.
+
+ // However, we cannot split each child individually; instead, we must look for a wholly
+ // independent child to split off by itself. As an example of why, with 'b' in
+ // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not
+ // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and
+ // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b
+ // is equal to 1.
+ auto norExpr = checked_cast<NorMatchExpression*>(expr.get());
+ for (size_t i = 0; i < norExpr->numChildren(); i++) {
+ auto child = norExpr->releaseChild(i);
+ if (isIndependentOf(*child, fields)) {
+ separate.push_back(std::move(child));
+ } else {
+ reliant.push_back(std::move(child));
+ }
+ }
+ return {createNorOfNodes(&separate), createNorOfNodes(&reliant)};
+ }
+ case MatchExpression::OR:
+ case MatchExpression::NOT: {
+ // If we aren't independent, we can't safely split.
+ return {nullptr, std::move(expr)};
+ }
+ default: { MONGO_UNREACHABLE; }
+ }
+}
+
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index baccf557104..0b377830274 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -59,5 +59,35 @@ namespace expression {
*/
bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs);
+/**
+ * Determine if it is possible to split 'expr' into two MatchExpressions, where one is not
+ * dependent on any path from 'pathSet', such that applying the two in sequence is equivalent
+ * to applying 'expr'.
+ *
+ * For example, {a: "foo", b: "bar"} is splittable by "b", while
+ * {$or: [{a: {$eq: "foo"}}, {b: {$eq: "bar"}}]} is not splittable by "b", due to the $or.
+ */
+bool isSplittableBy(const MatchExpression& expr, const std::set<std::string>& pathSet);
+
+/**
+ * Determine if 'expr' is reliant upon any path from 'pathSet'.
+ */
+bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet);
+
+/**
+ * Attempt to split 'expr' into two MatchExpressions, where the first is not reliant upon any
+ * path from 'fields', such that applying the matches in sequence is equivalent to applying
+ * 'expr'. Takes ownership of 'expr'.
+ *
+ * If 'expr' cannot be split, returns {nullptr, expr}. If 'expr' is entirely independent of
+ * 'fields', returns {expr, nullptr}. If 'expr' is partially dependent on 'fields', and partially
+ * independent, returns {exprLeft, exprRight}, where each new MatchExpression contains a portion of
+ * 'expr'.
+ *
+ * Never returns {nullptr, nullptr}.
+ */
+std::pair<std::unique_ptr<MatchExpression>, std::unique_ptr<MatchExpression>>
+splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, const std::set<std::string>& fields);
+
} // namespace expression
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index a9765403df6..8a38ab3be6e 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -38,10 +38,13 @@
#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/matcher/expression_parser.h"
#include "mongo/db/matcher/extensions_callback_disallow_extensions.h"
+#include "mongo/db/matcher/extensions_callback_noop.h"
#include "mongo/platform/decimal128.h"
namespace mongo {
+using std::unique_ptr;
+
/**
* A MatchExpression does not hold the memory for BSONElements, so use ParsedMatchExpression to
* ensure that the BSONObj outlives the MatchExpression.
@@ -648,4 +651,187 @@ TEST(ExpressionAlgoIsSubsetOf, Compare_Exists_NE) {
ASSERT_TRUE(expression::isSubsetOf(aNotEqualNull.get(), aExists.get()));
}
+TEST(IsIndependent, AndIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"}));
+}
+
+TEST(IsIndependent, ElemMatchIsNotIndependent) {
+ BSONObj matchPredicate = fromjson("{x: {$elemMatch: {y: 1}}}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x.y"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"y"}));
+}
+
+TEST(IsIndependent, NorIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"}));
+}
+
+TEST(IsIndependent, NotIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"b"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"}));
+}
+
+TEST(IsIndependent, OrIsIndependentOnlyIfChildrenAre) {
+ BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"}));
+}
+
+TEST(IsIndependent, AndWithDottedFieldPathsIsNotIndependent) {
+ BSONObj matchPredicate = fromjson("{$and: [{'a': 1}, {'a.b': 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b.c"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b"}));
+}
+
+TEST(IsIndependent, BallIsIndependentOfBalloon) {
+ BSONObj matchPredicate = fromjson("{'a.ball': 4}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ unique_ptr<MatchExpression> expr = std::move(status.getValue());
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.balloon"}));
+ ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.b"}));
+ ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.ball.c"}));
+}
+
+TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{a: {$eq: 1}}"));
+ ASSERT_EQUALS(secondBob.obj(), fromjson("{b: {$eq: 1}}"));
+}
+
+TEST(SplitMatchExpression, NorWithIndependentChildrenIsSplittable) {
+ BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{a: {$eq: 1}}]}"));
+ ASSERT_EQUALS(secondBob.obj(), fromjson("{$nor: [{b: {$eq: 1}}]}"));
+}
+
+TEST(SplitMatchExpression, NotWithIndependentChildIsSplittable) {
+ BSONObj matchPredicate = fromjson("{x: {$not: {$gt: 4}}}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{$and: [{x: {$gt: 4}}]}]}"));
+ ASSERT_FALSE(splitExpr.second);
+}
+
+TEST(SplitMatchExpression, OrWithOnlyIndependentChildrenIsNotSplittable) {
+ BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"});
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder bob;
+ splitExpr.second->serialize(&bob);
+
+ ASSERT_FALSE(splitExpr.first);
+ ASSERT_EQUALS(bob.obj(), fromjson("{$or: [{a: {$eq: 1}}, {b: {$eq: 1}}]}"));
+}
+
+TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) {
+ BSONObj matchPredicate = fromjson(
+ "{$and: [{x: {$not: {$size: 2}}},"
+ "{$or: [{'a.b' : 3}, {'a.b.c': 4}]},"
+ "{$nor: [{x: {$gt: 4}}, {$and: [{x: {$not: {$eq: 1}}}, {y: 3}]}]}]}");
+ StatusWithMatchExpression status =
+ MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop());
+ ASSERT_OK(status.getStatus());
+
+ std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr =
+ expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"});
+
+ ASSERT_TRUE(splitExpr.first.get());
+ BSONObjBuilder firstBob;
+ splitExpr.first->serialize(&firstBob);
+
+ ASSERT_TRUE(splitExpr.second.get());
+ BSONObjBuilder secondBob;
+ splitExpr.second->serialize(&secondBob);
+
+ ASSERT_EQUALS(firstBob.obj(), fromjson("{$or: [{'a.b': {$eq: 3}}, {'a.b.c': {$eq: 4}}]}"));
+ ASSERT_EQUALS(secondBob.obj(),
+ fromjson(
+ "{$and: [{$nor: [{$and: [{x: {$size: 2}}]}]}, {$nor: [{x: {$gt: 4}}, {$and: "
+ "[{$nor: [{$and: [{x: "
+ "{$eq: 1}}]}]}, {y: {$eq: 3}}]}]}]}"));
+}
+
} // namespace mongo
diff --git a/src/mongo/db/matcher/expression_tree.h b/src/mongo/db/matcher/expression_tree.h
index 2b65bc9d2f6..50786402a85 100644
--- a/src/mongo/db/matcher/expression_tree.h
+++ b/src/mongo/db/matcher/expression_tree.h
@@ -65,6 +65,12 @@ public:
return _expressions[i];
}
+ virtual std::unique_ptr<MatchExpression> releaseChild(size_t i) {
+ auto child = std::unique_ptr<MatchExpression>(_expressions[i]);
+ _expressions[i] = nullptr;
+ return std::move(child);
+ }
+
virtual std::vector<MatchExpression*>* getChildVector() {
return &_expressions;
}