diff options
author | Benjamin Murphy <benjamin_murphy@me.com> | 2016-02-12 16:20:51 -0500 |
---|---|---|
committer | Benjamin Murphy <benjamin_murphy@me.com> | 2016-03-30 16:21:13 -0400 |
commit | 2b56c5ce4527403329bc60ee406a0f1a7de3f10a (patch) | |
tree | 3124ce031943a41c3fb1138095f1e565378be72a /src/mongo/db/matcher | |
parent | 46d994e5c186e9634896db0d434db0a635ffc60b (diff) | |
download | mongo-2b56c5ce4527403329bc60ee406a0f1a7de3f10a.tar.gz |
SERVER-20506 Conditionally order match and unwind.
Diffstat (limited to 'src/mongo/db/matcher')
-rw-r--r-- | src/mongo/db/matcher/expression_algo.cpp | 156 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.h | 30 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo_test.cpp | 186 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_tree.h | 6 |
4 files changed, 378 insertions, 0 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index a8fb260b7d0..79059c204d1 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -30,11 +30,16 @@ #include "mongo/platform/basic.h" +#include "mongo/base/checked_cast.h" #include "mongo/db/matcher/expression.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/expression_leaf.h" #include "mongo/db/matcher/expression_tree.h" namespace mongo { + +using std::unique_ptr; + namespace { bool isComparisonMatchExpression(const MatchExpression* expr) { @@ -219,6 +224,80 @@ bool _isSubsetOf(const MatchExpression* lhs, const ExistsMatchExpression* rhs) { } } +/** + * Returns whether the leaf is a $elemMatch expression. + */ +bool isElemMatch(const MatchExpression& expr) { + return expr.matchType() == MatchExpression::ELEM_MATCH_OBJECT || + expr.matchType() == MatchExpression::ELEM_MATCH_VALUE; +} + +/** + * Returns whether 'first' is a prefix of 'second'. + */ +bool isPathPrefixOf(const StringData& first, const StringData& second) { + if (first == second) { + return true; + } + + if (first.size() >= second.size()) { + return false; + } + + return second.startsWith(first) && second[first.size()] == '.'; +} + +/** + * Returns whether the leaf at 'path' is independent of 'fields'. + */ +bool isLeafIndependentOf(const StringData& path, const std::set<std::string>& fields) { + // For each field in 'fields', we need to check if that field is a prefix of 'path' or if 'path' + // is a prefix of that field. For example, the expression {a.b: {c: 1}} is not independent of + // 'a.b.c', and and the expression {a.b.c.d: 1} is not independent of 'a.b.c'. + for (StringData field : fields) { + if (isPathPrefixOf(path, field) || isPathPrefixOf(field, path)) { + return false; + } + } + return true; +} + +/** + * Creates a MatchExpression that is equivalent to {$and: [children[0], children[1]...]}. + */ +unique_ptr<MatchExpression> createAndOfNodes(std::vector<unique_ptr<MatchExpression>>* children) { + if (children->empty()) { + return nullptr; + } + + if (children->size() == 1) { + return std::move(children->at(0)); + } + + unique_ptr<AndMatchExpression> splitAnd = stdx::make_unique<AndMatchExpression>(); + for (auto&& expr : *children) { + splitAnd->add(expr.release()); + } + + return std::move(splitAnd); +} + +/** + * Creates a MatchExpression that is equivalent to {$nor: [children[0], children[1]...]}. + */ +unique_ptr<MatchExpression> createNorOfNodes(std::vector<unique_ptr<MatchExpression>>* children) { + if (children->empty()) { + return nullptr; + } + + unique_ptr<NorMatchExpression> splitNor = stdx::make_unique<NorMatchExpression>(); + for (auto&& expr : *children) { + splitNor->add(expr.release()); + } + + return std::move(splitNor); +} + } // namespace namespace expression { @@ -272,5 +351,82 @@ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) { return false; } +bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet) { + if (expr.isLogical()) { + // Any logical expression is independent of 'pathSet' if all its children are independent of + // 'pathSet'. + for (size_t i = 0; i < expr.numChildren(); i++) { + if (!isIndependentOf(*expr.getChild(i), pathSet)) { + return false; + } + } + return true; + } + + // At this point, we know 'expr' is a leaf. If it is an elemMatch, we do not attempt to + // determine if it is independent or not, and instead just return false. + return !isElemMatch(expr) && isLeafIndependentOf(expr.path(), pathSet); +} + +std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitMatchExpressionBy( + unique_ptr<MatchExpression> expr, const std::set<std::string>& fields) { + if (isIndependentOf(*expr, fields)) { + // 'expr' does not depend upon 'fields', so it can be completely moved. + return {std::move(expr), nullptr}; + } + if (!expr->isLogical()) { + // 'expr' is a leaf, and was not independent of 'fields'. + return {nullptr, std::move(expr)}; + } + + std::vector<unique_ptr<MatchExpression>> reliant; + std::vector<unique_ptr<MatchExpression>> separate; + + switch (expr->matchType()) { + case MatchExpression::AND: { + auto andExpr = checked_cast<AndMatchExpression*>(expr.get()); + for (size_t i = 0; i < andExpr->numChildren(); i++) { + auto children = splitMatchExpressionBy(andExpr->releaseChild(i), fields); + + invariant(children.first || children.second); + + if (children.first) { + separate.push_back(std::move(children.first)); + } + if (children.second) { + reliant.push_back(std::move(children.second)); + } + } + return {createAndOfNodes(&separate), createAndOfNodes(&reliant)}; + } + case MatchExpression::NOR: { + // We can split a $nor because !(x | y) is logically equivalent to !x & !y. + + // However, we cannot split each child individually; instead, we must look for a wholly + // independent child to split off by itself. As an example of why, with 'b' in + // 'fields': $nor: [{$and: [{a: 1}, {b: 1}]}]} will match if a is not 1, or if b is not + // 1. However, if we split this into: {$nor: [{$and: [{a: 1}]}]}, and + // {$nor: [{$and: [{b: 1}]}]}, a document will only pass both stages if neither a nor b + // is equal to 1. + auto norExpr = checked_cast<NorMatchExpression*>(expr.get()); + for (size_t i = 0; i < norExpr->numChildren(); i++) { + auto child = norExpr->releaseChild(i); + if (isIndependentOf(*child, fields)) { + separate.push_back(std::move(child)); + } else { + reliant.push_back(std::move(child)); + } + } + return {createNorOfNodes(&separate), createNorOfNodes(&reliant)}; + } + case MatchExpression::OR: + case MatchExpression::NOT: { + // If we aren't independent, we can't safely split. + return {nullptr, std::move(expr)}; + } + default: { MONGO_UNREACHABLE; } + } +} + } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index baccf557104..0b377830274 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -59,5 +59,35 @@ namespace expression { */ bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs); +/** + * Determine if it is possible to split 'expr' into two MatchExpressions, where one is not + * dependent on any path from 'pathSet', such that applying the two in sequence is equivalent + * to applying 'expr'. + * + * For example, {a: "foo", b: "bar"} is splittable by "b", while + * {$or: [{a: {$eq: "foo"}}, {b: {$eq: "bar"}}]} is not splittable by "b", due to the $or. + */ +bool isSplittableBy(const MatchExpression& expr, const std::set<std::string>& pathSet); + +/** + * Determine if 'expr' is reliant upon any path from 'pathSet'. + */ +bool isIndependentOf(const MatchExpression& expr, const std::set<std::string>& pathSet); + +/** + * Attempt to split 'expr' into two MatchExpressions, where the first is not reliant upon any + * path from 'fields', such that applying the matches in sequence is equivalent to applying + * 'expr'. Takes ownership of 'expr'. + * + * If 'expr' cannot be split, returns {nullptr, expr}. If 'expr' is entirely independent of + * 'fields', returns {expr, nullptr}. If 'expr' is partially dependent on 'fields', and partially + * independent, returns {exprLeft, exprRight}, where each new MatchExpression contains a portion of + * 'expr'. + * + * Never returns {nullptr, nullptr}. + */ +std::pair<std::unique_ptr<MatchExpression>, std::unique_ptr<MatchExpression>> +splitMatchExpressionBy(std::unique_ptr<MatchExpression> expr, const std::set<std::string>& fields); + } // namespace expression } // namespace mongo diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index a9765403df6..8a38ab3be6e 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -38,10 +38,13 @@ #include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/expression_parser.h" #include "mongo/db/matcher/extensions_callback_disallow_extensions.h" +#include "mongo/db/matcher/extensions_callback_noop.h" #include "mongo/platform/decimal128.h" namespace mongo { +using std::unique_ptr; + /** * A MatchExpression does not hold the memory for BSONElements, so use ParsedMatchExpression to * ensure that the BSONObj outlives the MatchExpression. @@ -648,4 +651,187 @@ TEST(ExpressionAlgoIsSubsetOf, Compare_Exists_NE) { ASSERT_TRUE(expression::isSubsetOf(aNotEqualNull.get(), aExists.get())); } +TEST(IsIndependent, AndIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr<MatchExpression> expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); +} + +TEST(IsIndependent, ElemMatchIsNotIndependent) { + BSONObj matchPredicate = fromjson("{x: {$elemMatch: {y: 1}}}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr<MatchExpression> expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"x.y"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"y"})); +} + +TEST(IsIndependent, NorIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr<MatchExpression> expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"b"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); +} + +TEST(IsIndependent, NotIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{a: {$not: {$eq: 1}}}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr<MatchExpression> expr = std::move(status.getValue()); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"b"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"})); +} + +TEST(IsIndependent, OrIsIndependentOnlyIfChildrenAre) { + BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr<MatchExpression> expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"c"})); +} + +TEST(IsIndependent, AndWithDottedFieldPathsIsNotIndependent) { + BSONObj matchPredicate = fromjson("{$and: [{'a': 1}, {'a.b': 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr<MatchExpression> expr = std::move(status.getValue()); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b.c"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.b"})); +} + +TEST(IsIndependent, BallIsIndependentOfBalloon) { + BSONObj matchPredicate = fromjson("{'a.ball': 4}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + unique_ptr<MatchExpression> expr = std::move(status.getValue()); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.balloon"})); + ASSERT_TRUE(expression::isIndependentOf(*expr.get(), {"a.b"})); + ASSERT_FALSE(expression::isIndependentOf(*expr.get(), {"a.ball.c"})); +} + +TEST(SplitMatchExpression, AndWithSplittableChildrenIsSplittable) { + BSONObj matchPredicate = fromjson("{$and: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{a: {$eq: 1}}")); + ASSERT_EQUALS(secondBob.obj(), fromjson("{b: {$eq: 1}}")); +} + +TEST(SplitMatchExpression, NorWithIndependentChildrenIsSplittable) { + BSONObj matchPredicate = fromjson("{$nor: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{a: {$eq: 1}}]}")); + ASSERT_EQUALS(secondBob.obj(), fromjson("{$nor: [{b: {$eq: 1}}]}")); +} + +TEST(SplitMatchExpression, NotWithIndependentChildIsSplittable) { + BSONObj matchPredicate = fromjson("{x: {$not: {$gt: 4}}}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"y"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{$nor: [{$and: [{x: {$gt: 4}}]}]}")); + ASSERT_FALSE(splitExpr.second); +} + +TEST(SplitMatchExpression, OrWithOnlyIndependentChildrenIsNotSplittable) { + BSONObj matchPredicate = fromjson("{$or: [{a: 1}, {b: 1}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"b"}); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder bob; + splitExpr.second->serialize(&bob); + + ASSERT_FALSE(splitExpr.first); + ASSERT_EQUALS(bob.obj(), fromjson("{$or: [{a: {$eq: 1}}, {b: {$eq: 1}}]}")); +} + +TEST(SplitMatchExpression, ComplexMatchExpressionSplitsCorrectly) { + BSONObj matchPredicate = fromjson( + "{$and: [{x: {$not: {$size: 2}}}," + "{$or: [{'a.b' : 3}, {'a.b.c': 4}]}," + "{$nor: [{x: {$gt: 4}}, {$and: [{x: {$not: {$eq: 1}}}, {y: 3}]}]}]}"); + StatusWithMatchExpression status = + MatchExpressionParser::parse(matchPredicate, ExtensionsCallbackNoop()); + ASSERT_OK(status.getStatus()); + + std::pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> splitExpr = + expression::splitMatchExpressionBy(std::move(status.getValue()), {"x"}); + + ASSERT_TRUE(splitExpr.first.get()); + BSONObjBuilder firstBob; + splitExpr.first->serialize(&firstBob); + + ASSERT_TRUE(splitExpr.second.get()); + BSONObjBuilder secondBob; + splitExpr.second->serialize(&secondBob); + + ASSERT_EQUALS(firstBob.obj(), fromjson("{$or: [{'a.b': {$eq: 3}}, {'a.b.c': {$eq: 4}}]}")); + ASSERT_EQUALS(secondBob.obj(), + fromjson( + "{$and: [{$nor: [{$and: [{x: {$size: 2}}]}]}, {$nor: [{x: {$gt: 4}}, {$and: " + "[{$nor: [{$and: [{x: " + "{$eq: 1}}]}]}, {y: {$eq: 3}}]}]}]}")); +} + } // namespace mongo diff --git a/src/mongo/db/matcher/expression_tree.h b/src/mongo/db/matcher/expression_tree.h index 2b65bc9d2f6..50786402a85 100644 --- a/src/mongo/db/matcher/expression_tree.h +++ b/src/mongo/db/matcher/expression_tree.h @@ -65,6 +65,12 @@ public: return _expressions[i]; } + virtual std::unique_ptr<MatchExpression> releaseChild(size_t i) { + auto child = std::unique_ptr<MatchExpression>(_expressions[i]); + _expressions[i] = nullptr; + return std::move(child); + } + virtual std::vector<MatchExpression*>* getChildVector() { return &_expressions; } |