diff options
author | George Wangensteen <george.wangensteen@10gen.com> | 2019-05-31 15:35:53 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-12-16 18:40:07 +0000 |
commit | 0aefd7c76f286b9a51296144df9fdb249fec8948 (patch) | |
tree | 552c5da38b1747ab084b6c698f1cce04ad147b2e /src/mongo/db | |
parent | 265e3c7d0d40457f0e8483d3ed4161ac3896d04a (diff) | |
download | mongo-0aefd7c76f286b9a51296144df9fdb249fec8948.tar.gz |
SERVER-34741 Move $match in front of $group if condition is on group key
(cherry picked from commit 55e76198b1e41b5dbd5868d2ed8b914da29f0f29)
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/matcher/expression_algo.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo.h | 7 | ||||
-rw-r--r-- | src/mongo/db/matcher/expression_algo_test.cpp | 63 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_group.h | 17 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 70 |
6 files changed, 189 insertions, 8 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp index 67acd98d672..127e9933fa8 100644 --- a/src/mongo/db/matcher/expression_algo.cpp +++ b/src/mongo/db/matcher/expression_algo.cpp @@ -357,6 +357,19 @@ splitMatchExpressionByWithoutRenames(unique_ptr<MatchExpression> expr, namespace expression { +bool hasExistencePredicateOnPath(const MatchExpression& expr, StringData path) { + if (expr.getCategory() == MatchExpression::MatchCategory::kLeaf) { + return (expr.matchType() == MatchExpression::MatchType::EXISTS && expr.path() == path); + } + for (size_t i = 0; i < expr.numChildren(); i++) { + MatchExpression* child = expr.getChild(i); + if (hasExistencePredicateOnPath(*child, path)) { + return true; + } + } + return false; +} + bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) { invariant(lhs); invariant(rhs); diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h index 0f97c19a8d6..715e29416ef 100644 --- a/src/mongo/db/matcher/expression_algo.h +++ b/src/mongo/db/matcher/expression_algo.h @@ -46,6 +46,13 @@ namespace expression { using NodeTraversalFunc = stdx::function<void(MatchExpression*, std::string)>; /** + * Returns true if 'expr' has an $exists predicate on 'path.' Note that this only returns true + * for an $exists predicatated on the exact path given: it will not return true if there is a + * $exists predicated on a prefix of the path. + */ +bool hasExistencePredicateOnPath(const MatchExpression& expr, StringData path); + +/** * Returns true if the documents matched by 'lhs' are a subset of the documents matched by * 'rhs', i.e. a document matched by 'lhs' must also be matched by 'rhs', and false otherwise. * diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp index 29d344d8918..68075fbb396 100644 --- a/src/mongo/db/matcher/expression_algo_test.cpp +++ b/src/mongo/db/matcher/expression_algo_test.cpp @@ -1279,5 +1279,68 @@ TEST(IsPathPrefixOf, ComputesPrefixesCorrectly) { ASSERT_FALSE(expression::isPathPrefixOf("a.b", "a")); } +TEST(HasExistencePredicateOnPath, IdentifiesLeavesCorrectly) { + BSONObj matchPredicate = fromjson("{$and: [{a: {$exists: true}}, {b: {$lte: 2}}]}"); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); + ASSERT_OK(swMatchExpression.getStatus()); + ASSERT_TRUE( + expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd)); + ASSERT_FALSE( + expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "b"_sd)); +} + +TEST(HasExistencePredicateOnPath, HandlesMultiplePredicatesWithSamePath) { + BSONObj matchPredicate = fromjson("{$and: [{a: {$gt: 5000}}, {a: {$exists: false}}]}"); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); + ASSERT_OK(swMatchExpression.getStatus()); + ASSERT_TRUE( + expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd)); +} + +TEST(HasExistencePredicateOnPath, DeeperTreeTest) { + BSONObj matchPredicate = fromjson( + "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}}," + "{$or: [{f : {$gte: 4}}, {a : {$exists : true}}]}]}]}"); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); + ASSERT_OK(swMatchExpression.getStatus()); + ASSERT_TRUE( + expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd)); +} + +TEST(HasExistencePredicateOnPath, HandlesDottedPathsInDeepTree) { + BSONObj matchPredicate = fromjson( + "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}}," + "{$or: [{f : {$gte: 4}}, {'a.b.c.d' : {$exists : true}}]}]}]}"); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); + ASSERT_OK(swMatchExpression.getStatus()); + ASSERT_TRUE( + expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a.b.c.d"_sd)); +} + +TEST(HasExistencePredicateOnPath, ReturnsFalseWhenExistsOnlyOnPrefix) { + BSONObj matchPredicate = fromjson( + "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}}," + "{$or: [{f : {$gte: 4}}, {'a' : {$exists : true}}]}]}]}"); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); + ASSERT_OK(swMatchExpression.getStatus()); + ASSERT_FALSE( + expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a.b"_sd)); +} + +TEST(HasExistencePredicateOnPath, ReturnsFalseWhenExistsOnSubpath) { + BSONObj matchPredicate = fromjson( + "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}}," + "{$or: [{f : {$gte: 4}}, {'a.b' : {$exists : true}}]}]}]}"); + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx)); + ASSERT_OK(swMatchExpression.getStatus()); + ASSERT_FALSE( + expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd)); +} } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index 158e97b33fa..67d530db7ca 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -32,6 +32,7 @@ #include "mongo/db/pipeline/document_source.h" #include "mongo/db/matcher/expression_algo.h" +#include "mongo/db/pipeline/document_source_group.h" #include "mongo/db/pipeline/document_source_internal_shard_filter.h" #include "mongo/db/pipeline/document_source_match.h" #include "mongo/db/pipeline/document_source_sample.h" @@ -209,12 +210,36 @@ StringMap<std::string> computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified( return renameOut; } +/** + * Verifies whether or not a $group is able to swap with a succeeding $match stage. While ordinarily + * $group can swap with a $match, it cannot if the following $match has an $exists predicate on _id, + * and the $group has exactly one field as the $group key. This is because every document will have + * an _id field following such a $group stage, including those whose group key was missing before + * the $group. As an example, the following optimization would be incorrect as the post-optimization + * pipeline would handle documents that had nullish _id fields differently. Thus, given such a + * $group and $match, this function would return false. + * {$group: {_id: "$x"}} + * {$match: {_id: {$exists: true}} + * ----> + * {$match: {x: {$exists: true}} + * {$group: {_id: "$x"}} + */ +bool groupMatchSwapVerified(const DocumentSourceMatch& nextMatch, + const DocumentSourceGroup& thisGroup) { + if (thisGroup.getIdFields().size() != 1) { + return true; + } + return !expression::hasExistencePredicateOnPath(*(nextMatch.getMatchExpression()), "_id"_sd); +} + } // namespace bool DocumentSource::pushMatchBefore(Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get()); - if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery()) { + auto thisGroup = dynamic_cast<DocumentSourceGroup*>(this); + if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery() && + (!thisGroup || groupMatchSwapVerified(*nextMatch, *thisGroup))) { // We're allowed to swap with a $match and the stage after us is a $match. Furthermore, the // $match does not contain a text search predicate, which we do not attempt to optimize // because such a $match must already be the first stage in the pipeline. We can attempt to diff --git a/src/mongo/db/pipeline/document_source_group.h b/src/mongo/db/pipeline/document_source_group.h index 79ea8048ec2..c7e4779eb62 100644 --- a/src/mongo/db/pipeline/document_source_group.h +++ b/src/mongo/db/pipeline/document_source_group.h @@ -120,15 +120,18 @@ public: BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); StageConstraints constraints(Pipeline::SplitState pipeState) const final { - return {StreamType::kBlocking, - PositionRequirement::kNone, - HostTypeRequirement::kNone, - DiskUseRequirement::kWritesTmpData, - FacetRequirement::kAllowed, - TransactionRequirement::kAllowed, - LookupRequirement::kAllowed}; + StageConstraints constraints(StreamType::kBlocking, + PositionRequirement::kNone, + HostTypeRequirement::kNone, + DiskUseRequirement::kWritesTmpData, + FacetRequirement::kAllowed, + TransactionRequirement::kAllowed, + LookupRequirement::kAllowed); + constraints.canSwapWithMatch = true; + return constraints; } + /** * Add an accumulator, which will become a field in each Document that results from grouping. */ diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index ee5b3153b42..edc747d2657 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -678,6 +678,76 @@ TEST(PipelineOptimizationTest, LookupDoesNotAbsorbUnwindOnSubfieldOfAsButStillMo assertPipelineOptimizesTo(inputPipe, outputPipe); } +TEST(PipelineOptimizationTest, GroupShouldSwapWithMatchIfFilteringOnID) { + string inputPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {_id : 4}}]"; + string outputPipe = + "[{$match: {a:{$eq : 4}}}, " + " {$group:{_id:'$a'}}]"; + string serializedPipe = + "[{$match: {a:{$eq :4}}}, " + " {$group:{_id:'$a'}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfNotFilteringOnID) { + string inputPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {b : 4}}]"; + string outputPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {b : {$eq: 4}}}]"; + string serializedPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {b : 4}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfExistsPredicateOnID) { + string inputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {_id : {$exists: true}}}]"; + string outputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {_id : {$exists: true}}}]"; + string serializedPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {_id : {$exists: true}}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldNotSwapWithCompoundMatchIfExistsPredicateOnID) { + string inputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]"; + string outputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]"; + string serializedPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldSwapWithCompoundMatchIfFilteringOnID) { + string inputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$lte : 50}}, {_id : {$gt : 70}}]}}]"; + string outputPipe = + "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}}," + "{$group : {_id:'$x'}}]"; + string serializedPipe = + "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}}," + "{$group : {_id:'$x'}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + TEST(PipelineOptimizationTest, MatchShouldDuplicateItselfBeforeRedact) { string inputPipe = "[{$redact: '$$PRUNE'}, {$match: {a: 1, b:12}}]"; string outputPipe = |