diff options
author | George Wangensteen <george.wangensteen@10gen.com> | 2019-05-31 15:35:53 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-12-16 18:40:07 +0000 |
commit | 0aefd7c76f286b9a51296144df9fdb249fec8948 (patch) | |
tree | 552c5da38b1747ab084b6c698f1cce04ad147b2e /src/mongo/db/pipeline | |
parent | 265e3c7d0d40457f0e8483d3ed4161ac3896d04a (diff) | |
download | mongo-0aefd7c76f286b9a51296144df9fdb249fec8948.tar.gz |
SERVER-34741 Move $match in front of $group if condition is on group key
(cherry picked from commit 55e76198b1e41b5dbd5868d2ed8b914da29f0f29)
Diffstat (limited to 'src/mongo/db/pipeline')
-rw-r--r-- | src/mongo/db/pipeline/document_source.cpp | 27 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_group.h | 17 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_test.cpp | 70 |
3 files changed, 106 insertions, 8 deletions
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index 158e97b33fa..67d530db7ca 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -32,6 +32,7 @@ #include "mongo/db/pipeline/document_source.h" #include "mongo/db/matcher/expression_algo.h" +#include "mongo/db/pipeline/document_source_group.h" #include "mongo/db/pipeline/document_source_internal_shard_filter.h" #include "mongo/db/pipeline/document_source_match.h" #include "mongo/db/pipeline/document_source_sample.h" @@ -209,12 +210,36 @@ StringMap<std::string> computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified( return renameOut; } +/** + * Verifies whether or not a $group is able to swap with a succeeding $match stage. While ordinarily + * $group can swap with a $match, it cannot if the following $match has an $exists predicate on _id, + * and the $group has exactly one field as the $group key. This is because every document will have + * an _id field following such a $group stage, including those whose group key was missing before + * the $group. As an example, the following optimization would be incorrect as the post-optimization + * pipeline would handle documents that had nullish _id fields differently. Thus, given such a + * $group and $match, this function would return false. + * {$group: {_id: "$x"}} + * {$match: {_id: {$exists: true}} + * ----> + * {$match: {x: {$exists: true}} + * {$group: {_id: "$x"}} + */ +bool groupMatchSwapVerified(const DocumentSourceMatch& nextMatch, + const DocumentSourceGroup& thisGroup) { + if (thisGroup.getIdFields().size() != 1) { + return true; + } + return !expression::hasExistencePredicateOnPath(*(nextMatch.getMatchExpression()), "_id"_sd); +} + } // namespace bool DocumentSource::pushMatchBefore(Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get()); - if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery()) { + auto thisGroup = dynamic_cast<DocumentSourceGroup*>(this); + if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery() && + (!thisGroup || groupMatchSwapVerified(*nextMatch, *thisGroup))) { // We're allowed to swap with a $match and the stage after us is a $match. Furthermore, the // $match does not contain a text search predicate, which we do not attempt to optimize // because such a $match must already be the first stage in the pipeline. We can attempt to diff --git a/src/mongo/db/pipeline/document_source_group.h b/src/mongo/db/pipeline/document_source_group.h index 79ea8048ec2..c7e4779eb62 100644 --- a/src/mongo/db/pipeline/document_source_group.h +++ b/src/mongo/db/pipeline/document_source_group.h @@ -120,15 +120,18 @@ public: BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx); StageConstraints constraints(Pipeline::SplitState pipeState) const final { - return {StreamType::kBlocking, - PositionRequirement::kNone, - HostTypeRequirement::kNone, - DiskUseRequirement::kWritesTmpData, - FacetRequirement::kAllowed, - TransactionRequirement::kAllowed, - LookupRequirement::kAllowed}; + StageConstraints constraints(StreamType::kBlocking, + PositionRequirement::kNone, + HostTypeRequirement::kNone, + DiskUseRequirement::kWritesTmpData, + FacetRequirement::kAllowed, + TransactionRequirement::kAllowed, + LookupRequirement::kAllowed); + constraints.canSwapWithMatch = true; + return constraints; } + /** * Add an accumulator, which will become a field in each Document that results from grouping. */ diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp index ee5b3153b42..edc747d2657 100644 --- a/src/mongo/db/pipeline/pipeline_test.cpp +++ b/src/mongo/db/pipeline/pipeline_test.cpp @@ -678,6 +678,76 @@ TEST(PipelineOptimizationTest, LookupDoesNotAbsorbUnwindOnSubfieldOfAsButStillMo assertPipelineOptimizesTo(inputPipe, outputPipe); } +TEST(PipelineOptimizationTest, GroupShouldSwapWithMatchIfFilteringOnID) { + string inputPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {_id : 4}}]"; + string outputPipe = + "[{$match: {a:{$eq : 4}}}, " + " {$group:{_id:'$a'}}]"; + string serializedPipe = + "[{$match: {a:{$eq :4}}}, " + " {$group:{_id:'$a'}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfNotFilteringOnID) { + string inputPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {b : 4}}]"; + string outputPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {b : {$eq: 4}}}]"; + string serializedPipe = + "[{$group : {_id:'$a'}}, " + " {$match: {b : 4}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfExistsPredicateOnID) { + string inputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {_id : {$exists: true}}}]"; + string outputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {_id : {$exists: true}}}]"; + string serializedPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {_id : {$exists: true}}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldNotSwapWithCompoundMatchIfExistsPredicateOnID) { + string inputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]"; + string outputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]"; + string serializedPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + +TEST(PipelineOptimizationTest, GroupShouldSwapWithCompoundMatchIfFilteringOnID) { + string inputPipe = + "[{$group : {_id:'$x'}}, " + " {$match: {$or : [ {_id : {$lte : 50}}, {_id : {$gt : 70}}]}}]"; + string outputPipe = + "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}}," + "{$group : {_id:'$x'}}]"; + string serializedPipe = + "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}}," + "{$group : {_id:'$x'}}]"; + + assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe); +} + TEST(PipelineOptimizationTest, MatchShouldDuplicateItselfBeforeRedact) { string inputPipe = "[{$redact: '$$PRUNE'}, {$match: {a: 1, b:12}}]"; string outputPipe = |