summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorGeorge Wangensteen <george.wangensteen@10gen.com>2019-05-31 15:35:53 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-12-16 18:40:07 +0000
commit0aefd7c76f286b9a51296144df9fdb249fec8948 (patch)
tree552c5da38b1747ab084b6c698f1cce04ad147b2e /src/mongo/db
parent265e3c7d0d40457f0e8483d3ed4161ac3896d04a (diff)
downloadmongo-0aefd7c76f286b9a51296144df9fdb249fec8948.tar.gz
SERVER-34741 Move $match in front of $group if condition is on group key
(cherry picked from commit 55e76198b1e41b5dbd5868d2ed8b914da29f0f29)
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/matcher/expression_algo.cpp13
-rw-r--r--src/mongo/db/matcher/expression_algo.h7
-rw-r--r--src/mongo/db/matcher/expression_algo_test.cpp63
-rw-r--r--src/mongo/db/pipeline/document_source.cpp27
-rw-r--r--src/mongo/db/pipeline/document_source_group.h17
-rw-r--r--src/mongo/db/pipeline/pipeline_test.cpp70
6 files changed, 189 insertions, 8 deletions
diff --git a/src/mongo/db/matcher/expression_algo.cpp b/src/mongo/db/matcher/expression_algo.cpp
index 67acd98d672..127e9933fa8 100644
--- a/src/mongo/db/matcher/expression_algo.cpp
+++ b/src/mongo/db/matcher/expression_algo.cpp
@@ -357,6 +357,19 @@ splitMatchExpressionByWithoutRenames(unique_ptr<MatchExpression> expr,
namespace expression {
+bool hasExistencePredicateOnPath(const MatchExpression& expr, StringData path) {
+ if (expr.getCategory() == MatchExpression::MatchCategory::kLeaf) {
+ return (expr.matchType() == MatchExpression::MatchType::EXISTS && expr.path() == path);
+ }
+ for (size_t i = 0; i < expr.numChildren(); i++) {
+ MatchExpression* child = expr.getChild(i);
+ if (hasExistencePredicateOnPath(*child, path)) {
+ return true;
+ }
+ }
+ return false;
+}
+
bool isSubsetOf(const MatchExpression* lhs, const MatchExpression* rhs) {
invariant(lhs);
invariant(rhs);
diff --git a/src/mongo/db/matcher/expression_algo.h b/src/mongo/db/matcher/expression_algo.h
index 0f97c19a8d6..715e29416ef 100644
--- a/src/mongo/db/matcher/expression_algo.h
+++ b/src/mongo/db/matcher/expression_algo.h
@@ -46,6 +46,13 @@ namespace expression {
using NodeTraversalFunc = stdx::function<void(MatchExpression*, std::string)>;
/**
+ * Returns true if 'expr' has an $exists predicate on 'path.' Note that this only returns true
+ * for an $exists predicatated on the exact path given: it will not return true if there is a
+ * $exists predicated on a prefix of the path.
+ */
+bool hasExistencePredicateOnPath(const MatchExpression& expr, StringData path);
+
+/**
* Returns true if the documents matched by 'lhs' are a subset of the documents matched by
* 'rhs', i.e. a document matched by 'lhs' must also be matched by 'rhs', and false otherwise.
*
diff --git a/src/mongo/db/matcher/expression_algo_test.cpp b/src/mongo/db/matcher/expression_algo_test.cpp
index 29d344d8918..68075fbb396 100644
--- a/src/mongo/db/matcher/expression_algo_test.cpp
+++ b/src/mongo/db/matcher/expression_algo_test.cpp
@@ -1279,5 +1279,68 @@ TEST(IsPathPrefixOf, ComputesPrefixesCorrectly) {
ASSERT_FALSE(expression::isPathPrefixOf("a.b", "a"));
}
+TEST(HasExistencePredicateOnPath, IdentifiesLeavesCorrectly) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: {$exists: true}}, {b: {$lte: 2}}]}");
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
+ ASSERT_OK(swMatchExpression.getStatus());
+ ASSERT_TRUE(
+ expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd));
+ ASSERT_FALSE(
+ expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "b"_sd));
+}
+
+TEST(HasExistencePredicateOnPath, HandlesMultiplePredicatesWithSamePath) {
+ BSONObj matchPredicate = fromjson("{$and: [{a: {$gt: 5000}}, {a: {$exists: false}}]}");
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
+ ASSERT_OK(swMatchExpression.getStatus());
+ ASSERT_TRUE(
+ expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd));
+}
+
+TEST(HasExistencePredicateOnPath, DeeperTreeTest) {
+ BSONObj matchPredicate = fromjson(
+ "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}},"
+ "{$or: [{f : {$gte: 4}}, {a : {$exists : true}}]}]}]}");
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
+ ASSERT_OK(swMatchExpression.getStatus());
+ ASSERT_TRUE(
+ expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd));
+}
+
+TEST(HasExistencePredicateOnPath, HandlesDottedPathsInDeepTree) {
+ BSONObj matchPredicate = fromjson(
+ "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}},"
+ "{$or: [{f : {$gte: 4}}, {'a.b.c.d' : {$exists : true}}]}]}]}");
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
+ ASSERT_OK(swMatchExpression.getStatus());
+ ASSERT_TRUE(
+ expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a.b.c.d"_sd));
+}
+
+TEST(HasExistencePredicateOnPath, ReturnsFalseWhenExistsOnlyOnPrefix) {
+ BSONObj matchPredicate = fromjson(
+ "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}},"
+ "{$or: [{f : {$gte: 4}}, {'a' : {$exists : true}}]}]}]}");
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
+ ASSERT_OK(swMatchExpression.getStatus());
+ ASSERT_FALSE(
+ expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a.b"_sd));
+}
+
+TEST(HasExistencePredicateOnPath, ReturnsFalseWhenExistsOnSubpath) {
+ BSONObj matchPredicate = fromjson(
+ "{$and: [{q: {$gt: 5000}}, {$and: [{z: {$lte: 50}},"
+ "{$or: [{f : {$gte: 4}}, {'a.b' : {$exists : true}}]}]}]}");
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ auto swMatchExpression = MatchExpressionParser::parse(matchPredicate, std::move(expCtx));
+ ASSERT_OK(swMatchExpression.getStatus());
+ ASSERT_FALSE(
+ expression::hasExistencePredicateOnPath(*swMatchExpression.getValue().get(), "a"_sd));
+}
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index 158e97b33fa..67d530db7ca 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -32,6 +32,7 @@
#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/matcher/expression_algo.h"
+#include "mongo/db/pipeline/document_source_group.h"
#include "mongo/db/pipeline/document_source_internal_shard_filter.h"
#include "mongo/db/pipeline/document_source_match.h"
#include "mongo/db/pipeline/document_source_sample.h"
@@ -209,12 +210,36 @@ StringMap<std::string> computeNewNamesAssumingAnyPathsNotRenamedAreUnmodified(
return renameOut;
}
+/**
+ * Verifies whether or not a $group is able to swap with a succeeding $match stage. While ordinarily
+ * $group can swap with a $match, it cannot if the following $match has an $exists predicate on _id,
+ * and the $group has exactly one field as the $group key. This is because every document will have
+ * an _id field following such a $group stage, including those whose group key was missing before
+ * the $group. As an example, the following optimization would be incorrect as the post-optimization
+ * pipeline would handle documents that had nullish _id fields differently. Thus, given such a
+ * $group and $match, this function would return false.
+ * {$group: {_id: "$x"}}
+ * {$match: {_id: {$exists: true}}
+ * ---->
+ * {$match: {x: {$exists: true}}
+ * {$group: {_id: "$x"}}
+ */
+bool groupMatchSwapVerified(const DocumentSourceMatch& nextMatch,
+ const DocumentSourceGroup& thisGroup) {
+ if (thisGroup.getIdFields().size() != 1) {
+ return true;
+ }
+ return !expression::hasExistencePredicateOnPath(*(nextMatch.getMatchExpression()), "_id"_sd);
+}
+
} // namespace
bool DocumentSource::pushMatchBefore(Pipeline::SourceContainer::iterator itr,
Pipeline::SourceContainer* container) {
auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get());
- if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery()) {
+ auto thisGroup = dynamic_cast<DocumentSourceGroup*>(this);
+ if (constraints().canSwapWithMatch && nextMatch && !nextMatch->isTextQuery() &&
+ (!thisGroup || groupMatchSwapVerified(*nextMatch, *thisGroup))) {
// We're allowed to swap with a $match and the stage after us is a $match. Furthermore, the
// $match does not contain a text search predicate, which we do not attempt to optimize
// because such a $match must already be the first stage in the pipeline. We can attempt to
diff --git a/src/mongo/db/pipeline/document_source_group.h b/src/mongo/db/pipeline/document_source_group.h
index 79ea8048ec2..c7e4779eb62 100644
--- a/src/mongo/db/pipeline/document_source_group.h
+++ b/src/mongo/db/pipeline/document_source_group.h
@@ -120,15 +120,18 @@ public:
BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& pExpCtx);
StageConstraints constraints(Pipeline::SplitState pipeState) const final {
- return {StreamType::kBlocking,
- PositionRequirement::kNone,
- HostTypeRequirement::kNone,
- DiskUseRequirement::kWritesTmpData,
- FacetRequirement::kAllowed,
- TransactionRequirement::kAllowed,
- LookupRequirement::kAllowed};
+ StageConstraints constraints(StreamType::kBlocking,
+ PositionRequirement::kNone,
+ HostTypeRequirement::kNone,
+ DiskUseRequirement::kWritesTmpData,
+ FacetRequirement::kAllowed,
+ TransactionRequirement::kAllowed,
+ LookupRequirement::kAllowed);
+ constraints.canSwapWithMatch = true;
+ return constraints;
}
+
/**
* Add an accumulator, which will become a field in each Document that results from grouping.
*/
diff --git a/src/mongo/db/pipeline/pipeline_test.cpp b/src/mongo/db/pipeline/pipeline_test.cpp
index ee5b3153b42..edc747d2657 100644
--- a/src/mongo/db/pipeline/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/pipeline_test.cpp
@@ -678,6 +678,76 @@ TEST(PipelineOptimizationTest, LookupDoesNotAbsorbUnwindOnSubfieldOfAsButStillMo
assertPipelineOptimizesTo(inputPipe, outputPipe);
}
+TEST(PipelineOptimizationTest, GroupShouldSwapWithMatchIfFilteringOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {_id : 4}}]";
+ string outputPipe =
+ "[{$match: {a:{$eq : 4}}}, "
+ " {$group:{_id:'$a'}}]";
+ string serializedPipe =
+ "[{$match: {a:{$eq :4}}}, "
+ " {$group:{_id:'$a'}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfNotFilteringOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {b : 4}}]";
+ string outputPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {b : {$eq: 4}}}]";
+ string serializedPipe =
+ "[{$group : {_id:'$a'}}, "
+ " {$match: {b : 4}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldNotSwapWithMatchIfExistsPredicateOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {_id : {$exists: true}}}]";
+ string outputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {_id : {$exists: true}}}]";
+ string serializedPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {_id : {$exists: true}}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldNotSwapWithCompoundMatchIfExistsPredicateOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]";
+ string outputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]";
+ string serializedPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$exists: true}}, {_id : {$gt : 70}}]}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
+TEST(PipelineOptimizationTest, GroupShouldSwapWithCompoundMatchIfFilteringOnID) {
+ string inputPipe =
+ "[{$group : {_id:'$x'}}, "
+ " {$match: {$or : [ {_id : {$lte : 50}}, {_id : {$gt : 70}}]}}]";
+ string outputPipe =
+ "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}},"
+ "{$group : {_id:'$x'}}]";
+ string serializedPipe =
+ "[{$match: {$or : [ {x : {$lte : 50}}, {x : {$gt : 70}}]}},"
+ "{$group : {_id:'$x'}}]";
+
+ assertPipelineOptimizesAndSerializesTo(inputPipe, outputPipe, serializedPipe);
+}
+
TEST(PipelineOptimizationTest, MatchShouldDuplicateItselfBeforeRedact) {
string inputPipe = "[{$redact: '$$PRUNE'}, {$match: {a: 1, b:12}}]";
string outputPipe =