summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline
diff options
context:
space:
mode:
authorGeorge Wangensteen <george.wangensteen@10gen.com>2019-05-31 15:35:53 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-12-16 18:40:07 +0000
commit0aefd7c76f286b9a51296144df9fdb249fec8948 (patch)
tree552c5da38b1747ab084b6c698f1cce04ad147b2e /src/mongo/db/pipeline
parent265e3c7d0d40457f0e8483d3ed4161ac3896d04a (diff)
downloadmongo-0aefd7c76f286b9a51296144df9fdb249fec8948.tar.gz
SERVER-34741 Move $match in front of $group if condition is on group key
(cherry picked from commit 55e76198b1e41b5dbd5868d2ed8b914da29f0f29)
Diffstat (limited to 'src/mongo/db/pipeline')
-rw-r--r--src/mongo/db/pipeline/document_source.cpp27
-rw-r--r--src/mongo/db/pipeline/document_source_group.h17
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp70
3 files changed, 106 insertions, 8 deletions
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index 158e97b33fa..67d530db7ca 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -32,6 +32,7 @@
#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/matcher/expression_algo.h"
+#include "mongo/db/pipeline/document_source_group.h"
#include "mongo/db/pipeline/document_source_internal_shard_filter.h"
#include "mongo/db/pipeline/document_source_match.h"
#include "mongo/db/pipeline/document_source_sample.h"
@@ -209,12 +210,36 @@ StringMap<std::string> computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified(
return renameOut;
}
+/**
+ * Verifies whether or not a $group is able to swap with a succeeding $match stage. While ordinarily
+ * $group can swap with a $match, it cannot if the following $match has an $exists predicate on _id,
+ * and the $group has exactly one field as the $group key. This is because every document will have
+ * an _id field following such a $group stage, including those whose group key was missing before
+ * the $group. As an example, the following optimization would be incorrect as the post-optimization
+ * pipeline would handle documents that had nullish _id fields differently. Thus, given such a
+ * $group and $match, this function would return false.
+ * {$group: {_id: "$x"}}
+ * {$match: {_id: {$exists: true}}
+ * ---->
+ * {$match: {x: {$exists: true}}
+ * {$group: {_id: "$x"}}
+ */
+bool groupMatchSwapVerified(const DocumentSourceMatch& nextMatch,
+ const DocumentSourceGroup& thisGroup) {
+ if (thisGroup.getIdFields().size() != 1) {
+ return true;
+ }
+ return !expression::hasExistencePredicateOnPath(*(nextMatch.getMatchExpression()), "_id"_sd);
+}
+
} // namespace
bool DocumentSource::pushMatchBefore(Pipeline::SourceContainer::iterator itr,
Pipeline::SourceContainer* container) {
auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get());
- if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery()) {
+ auto thisGroup = dynamic_cast<DocumentSourceGroup*>(this);
+ if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery() &&
+ (!thisGroup || groupMatchSwapVerified(*nextMatch, *thisGroup))) {
// We're allowed to swap with a $match and the stage after us is a $match. Furthermore, the
// $match does not contain a text search predicate, which we do not attempt to optimize
// because such a $match must already be the first stage in the pipeline. We can attempt to
diff --git a/src/mongo/db/pipeline/document_source_group.h b/src/mongo/db/pipeline/document_source_group.h
index 79ea8048ec2..c7e4779eb62 100644
--- a/src/mongo/db/pipeline/document_source_group.h
+++ b/src/mongo/db/pipeline/document_source_group.h
@@ -120,15 +120,18 @@ public:
BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx);
StageConstraints constraints(Pipeline::SplitState pipeState) const final {
- return {StreamType::kBlocking,
- PositionRequirement::kNone,
- HostTypeRequirement::kNone,
- DiskUseRequirement::kWritesTmpData,
- FacetRequirement::kAllowed,
- TransactionRequirement::kAllowed,
- LookupRequirement::kAllowed};
+ StageConstraints constraints(StreamType::kBlocking,
+ PositionRequirement::kNone,
+ HostTypeRequirement::kNone,
+ DiskUseRequirement::kWritesTmpData,
+ FacetRequirement::kAllowed,
+ TransactionRequirement::kAllowed,
+ LookupRequirement::kAllowed);
+ constraints.canSwapWithMatch = true;
+ return constraints;
}
+
/**
* Add an accumulator, which will become a field in each Document that results from grouping.
*/
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index ee5b3153b42..edc747d2657 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -678,6 +678,76 @@ TEST(PipelineOptimizationTest, LookupDoesNotAbsorbUnwindOnSubfieldOfAsButStillMo
assertPipelineOptimizesTo(inputPipe, outputPipe);
}
+TEST(PipelineOptimizationTest, GroupShouldSwapWithMatchIfFilteringOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {_id : 4}}]";
+ string outputPipe =
+ "[{$match: {a:{$eq : 4}}}, "
+ " {$group:{_id:'$a'}}]";
+ string serializedPipe =
+ "[{$match: {a:{$eq :4}}}, "
+ " {$group:{_id:'$a'}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfNotFilteringOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {b : 4}}]";
+ string outputPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {b : {$eq: 4}}}]";
+ string serializedPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {b : 4}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfExistsPredicateOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {_id : {$exists: true}}}]";
+ string outputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {_id : {$exists: true}}}]";
+ string serializedPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {_id : {$exists: true}}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldNotSwapWithCompoundMatchIfExistsPredicateOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]";
+ string outputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]";
+ string serializedPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldSwapWithCompoundMatchIfFilteringOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$lte : 50}}, {_id : {$gt : 70}}]}}]";
+ string outputPipe =
+ "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}},"
+ "{$group : {_id:'$x'}}]";
+ string serializedPipe =
+ "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}},"
+ "{$group : {_id:'$x'}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
TEST(PipelineOptimizationTest, MatchShouldDuplicateItselfBeforeRedact) {
string inputPipe = "[{$redact: '$$PRUNE'}, {$match: {a: 1, b:12}}]";
string outputPipe =