summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline
diff options
context:
space:
mode:
authorHana Pearlman <hana.pearlman@mongodb.com>2021-03-10 14:16:01 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-15 20:13:15 +0000
commitfa66c2f9df10fbb8ef1083a286f45fbf0ec235fa (patch)
treef17748552cb02cf6b788c682dbeef7ce71b3125a /src/mongo/db/pipeline
parent7d9059301512c911850804ec67bdd0fb45f4067e (diff)
downloadmongo-fa66c2f9df10fbb8ef1083a286f45fbf0ec235fa.tar.gz
SERVER-53484: Allow rewrite to push meta predicates past $unpackBucket
Diffstat (limited to 'src/mongo/db/pipeline')
-rw-r--r--src/mongo/db/pipeline/SConscript1
-rw-r--r--src/mongo/db/pipeline/document_source.cpp2
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp53
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.h11
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp11
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp297
-rw-r--r--src/mongo/db/pipeline/document_source_match.cpp17
-rw-r--r--src/mongo/db/pipeline/document_source_match.h25
8 files changed, 396 insertions, 21 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index 3f538a39b3b..6baca8e610b 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -397,6 +397,7 @@ env.CppUnitTest(
'document_source_internal_unpack_bucket_test/internalize_project_test.cpp',
'document_source_internal_unpack_bucket_test/sort_reorder_test.cpp',
'document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp',
+ 'document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp',
'document_source_unwind_test.cpp',
'expression_and_test.cpp',
'expression_compare_test.cpp',
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index aa0ca4885ea..1fd84f3f54f 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -169,7 +169,7 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc
semantic_analysis::extractModifiedDependencies(depsTracker.fields, preservedPaths);
}
}
- return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames);
+ return std::move(*match).splitSourceBy(modifiedPaths, modifiedPathsRet.renames);
}
/**
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index c9a77ab25fc..b20be4cdd68 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -190,6 +190,13 @@ boost::intrusive_ptr<DocumentSourceSort> createMetadataSortForReorder(
maxMemoryUsageBytes);
}
+// Optimize the section of the pipeline before the $_internalUnpackBucket stage.
+void optimizePrefix(Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ auto prefix = Pipeline::SourceContainer(container->begin(), itr);
+ Pipeline::optimizeContainer(&prefix);
+ container->erase(container->begin(), itr);
+ container->splice(itr, prefix);
+}
} // namespace
void BucketUnpacker::reset(BSONObj&& bucket) {
@@ -566,6 +573,16 @@ DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField(
return nullptr;
}
+std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+DocumentSourceInternalUnpackBucket::splitMatchOnMetaAndRename(
+ boost::intrusive_ptr<DocumentSourceMatch> match) {
+ if (auto&& metaField = _bucketUnpacker.bucketSpec().metaField) {
+ return std::move(*match).extractMatchOnFieldsAndRemainder(
+ {*metaField}, {{*metaField, BucketUnpacker::kBucketMetaFieldName.toString()}});
+ }
+ return {nullptr, match};
+}
+
Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt(
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
invariant(*itr == this);
@@ -600,20 +617,27 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi
// Optimize the pipeline after the $unpackBucket.
optimizeEndOfPipeline(itr, container);
- // Attempt to map predicates on bucketed fields to predicates on the control field.
if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get())) {
- if (auto match = createPredicatesOnBucketLevelField(nextMatch->getMatchExpression())) {
- // Optimize the newly created MatchExpression.
- auto optimized = MatchExpression::optimize(std::move(match));
- BSONObjBuilder bob;
- optimized->serialize(&bob);
-
- // Because we insert any possible $match first before performing other
- // $_internalUnpackBucket optimizations, it is not necessary to call
- // optimizeContainer() here to allow for the newly inserted stage to engage in further
- // optimizations with its neighbors, as this $match is already in the optimal place for
- // predicate pushdown.
- container->insert(itr, DocumentSourceMatch::create(bob.obj(), pExpCtx));
+ // Attempt to push predicates on the metaField past $_internalUnpackBucket.
+ auto [metaMatch, remainingMatch] = splitMatchOnMetaAndRename(nextMatch);
+
+ // 'metaMatch' is safe to move before $_internalUnpackBucket.
+ if (metaMatch) {
+ container->insert(itr, metaMatch);
+ }
+
+ // The old $match can be removed and potentially replaced with 'remainingMatch'.
+ container->erase(std::next(itr));
+ if (remainingMatch) {
+ container->insert(std::next(itr), remainingMatch);
+
+ // Attempt to map predicates on bucketed fields to predicates on the control field.
+ if (auto match =
+ createPredicatesOnBucketLevelField(remainingMatch->getMatchExpression())) {
+ BSONObjBuilder bob;
+ match->serialize(&bob);
+ container->insert(itr, DocumentSourceMatch::create(bob.obj(), pExpCtx));
+ }
}
}
@@ -624,6 +648,9 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi
internalizeProject(project, isInclusion);
}
+ // Optimize the prefix of the pipeline, now that all optimizations have been completed.
+ optimizePrefix(itr, container);
+
return container->end();
}
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
index 50a1513cc51..18316a1a3e9 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
@@ -32,6 +32,7 @@
#include <set>
#include "mongo/db/pipeline/document_source.h"
+#include "mongo/db/pipeline/document_source_match.h"
namespace mongo {
@@ -206,6 +207,16 @@ public:
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) const;
/**
+ * Attempts to split 'match' into two stages, where the first is dependent only on the metaField
+ * and the second is the remainder, so that applying them in sequence is equivalent to applying
+ * 'match' once. Will return two intrusive_ptrs to new $match stages. Either pointer may be
+ * null. If the first is non-null, it will have the metaField renamed from the user defined name
+ * to 'kBucketMetaFieldName'.
+ */
+ std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+ splitMatchOnMetaAndRename(boost::intrusive_ptr<DocumentSourceMatch> match);
+
+ /**
* Takes a predicate after $_internalUnpackBucket on a bucketed field as an argument and
* attempts to map it to a new predicate on the 'control' field. For example, the predicate
* {a: {$gt: 5}} will generate the predicate {control.max.a: {$_internalExprGt: 5}}, which will
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
index 88e5a2a2109..7e5f9511da0 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
@@ -216,12 +216,15 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
pipeline->optimizePipeline();
ASSERT_EQ(pipeline->getSources().size(), 3U);
- auto stages = pipeline->serializeToBson();
+ // To get the optimized $match from the pipeline, we have to serialize with explain.
+ auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner);
ASSERT_EQ(stages.size(), 3U);
- ASSERT_BSONOBJ_EQ(stages[0], fromjson("{$match: {'control.max.b': {$_internalExprGt: 1}}}"));
- ASSERT_BSONOBJ_EQ(stages[1], unpackBucketObj);
- ASSERT_BSONOBJ_EQ(stages[2], matchObj);
+ ASSERT_BSONOBJ_EQ(stages[0].getDocument().toBson(),
+ fromjson("{$match: {'control.max.b': {$_internalExprGt: 1}}}"));
+ ASSERT_BSONOBJ_EQ(stages[1].getDocument().toBson(), unpackBucketObj);
+ ASSERT_BSONOBJ_EQ(stages[2].getDocument().toBson(),
+ fromjson("{$match: {$and: [{b: {$gt: 1}}, {a: {$not: {$eq: 5}}}]}}"));
}
TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp
new file mode 100644
index 00000000000..535e182e659
--- /dev/null
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp
@@ -0,0 +1,297 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/pipeline/aggregation_context_fixture.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
+#include "mongo/db/pipeline/document_source_match.h"
+#include "mongo/db/pipeline/pipeline.h"
+#include "mongo/db/query/util/make_data_structure.h"
+#include "mongo/unittest/bson_test_util.h"
+
+namespace mongo {
+namespace {
+
+using InternalUnpackBucketSplitMatchOnMetaAndRename = AggregationContextFixture;
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, DoesNotSplitWhenNoMetaFieldSpecified) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}").firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(fromjson("{meta: {$gt: 1}}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can't split when there is no metaField specified in the stage.
+ ASSERT_FALSE(metaOnlyMatch);
+ ASSERT_TRUE(remainingMatch);
+ ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, DoesNotSplitWhenNoMatchOnMetaField) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(fromjson("{a: {$gt: 1}}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can't split when the match does not reference the metaField.
+ ASSERT_FALSE(metaOnlyMatch);
+ ASSERT_TRUE(remainingMatch);
+ ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsWhenEntireMatchIsOnMetaField) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(
+ fromjson("{$or: [{myMeta: {$gt: 1}}, {'myMeta.a': {$lt: 1}}]}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can split and rename when the match is entirely on the metaField.
+ ASSERT_TRUE(metaOnlyMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{$or: [{meta: {$gt: 1}}, {'meta.a': {$lt: 1}}]}"),
+ metaOnlyMatch->getQuery());
+ ASSERT_FALSE(remainingMatch);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
+ SplitsWhenIndependentPartOfMatchIsOnMetaField) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(
+ fromjson("{$and: [{'myMeta.a': {$gt: 1}}, {b: {$lt: 1}}]}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can split and rename when an independent part of the match is on the metaField.
+ ASSERT_TRUE(metaOnlyMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{'meta.a': {$gt: 1}}"), metaOnlyMatch->getQuery());
+ ASSERT_TRUE(remainingMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{b: {$lt: 1}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
+ DoesNotSplitsWhenDependentPartOfMatchIsOnMetaField) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'meta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(
+ fromjson("{$or: [{'meta.a': {$gt: 1}}, {metaXYZ: {$lt: 1}}]}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can't split when the part of the match that is on the metaField is dependent on the rest.
+ // Even though 'metaXYZ' is prefixed by 'meta', it's not a subfield. The presence of a top-level
+ // $or means this match cannot be correctly split into two matches.
+ ASSERT_FALSE(metaOnlyMatch);
+ ASSERT_TRUE(remainingMatch);
+ ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsWhenSharedPrefixOfMetaIsNotSubfield) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(
+ fromjson("{$and: [{myMeta: {$gt: 1}}, {myMetaXYZ: {$lt: 1}}]}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can split and rename when an independent part of the match is on the metaField. Even though
+ // 'myMetaXYZ' is prefixed by 'myMeta', it's not a subfield, so it should not be pushed down.
+ ASSERT_TRUE(metaOnlyMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{meta: {$gt: 1}}"), metaOnlyMatch->getQuery());
+ ASSERT_TRUE(remainingMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{myMetaXYZ: {$lt: 1}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWithExpr) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit =
+ DocumentSourceMatch::create(fromjson("{$expr: {$eq: ['$myMeta.a', 2]}}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can split and rename when the $match includes a $expr.
+ ASSERT_TRUE(metaOnlyMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{$expr: {$eq: ['$meta.a', {$const: 2}]}}"),
+ metaOnlyMatch->getQuery());
+ ASSERT_FALSE(remainingMatch);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWithType) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit =
+ DocumentSourceMatch::create(fromjson("{myMeta: {$type: [4]}}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can split and rename when the $match includes a $type.
+ ASSERT_TRUE(metaOnlyMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{meta: {$type: [4]}}"), metaOnlyMatch->getQuery());
+ ASSERT_FALSE(remainingMatch);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWhenMultiplePredicates) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(
+ fromjson("{myMeta: {$gte: 0, $lte: 5}, l: {$type: [4]}}"), getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can split and rename when the $match includes multiple predicates.
+ ASSERT_TRUE(metaOnlyMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}"),
+ metaOnlyMatch->getQuery());
+ ASSERT_TRUE(remainingMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{l: {$type: [4]}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWhenSeveralFieldReferences) {
+ auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+ .firstElement(),
+ getExpCtx());
+ auto matchToSplit = DocumentSourceMatch::create(
+ fromjson("{$and: [{myMeta: {$type: [3]}}, {'myMeta.a': {$gte: "
+ "0}}, {'myMeta.b': {$type: [4]}}, {a: {$in: ['$b', '$c']}}]}"),
+ getExpCtx());
+
+ auto [metaOnlyMatch, remainingMatch] =
+ dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+ ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+ // Can split and rename when the $match includes several field references.
+ ASSERT_TRUE(metaOnlyMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{$and: [{meta: {$type: [3]}}, {'meta.a': {$gte: 0}}, "
+ "{'meta.b': {$type: [4]}}]}"),
+ metaOnlyMatch->getQuery());
+ ASSERT_TRUE(remainingMatch);
+ ASSERT_BSONOBJ_EQ(fromjson("{a: {$in: ['$b', '$c']}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMapsControlPredicates) {
+ auto unpack =
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}");
+ auto pipeline = Pipeline::parse(
+ makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0, $lte: 5}, a: {$lte: 4}}}")),
+ getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+
+ // We should split and rename the $match. A separate optimization maps the predicate on 'a' to a
+ // predicate on 'control.min.a'. These two created $match stages should be added before
+ // $_internalUnpackBucket and merged.
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(3u, serialized.size());
+ ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}, "
+ "{'control.min.a': {$_internalExprLte: 4}}]}}"),
+ serialized[0]);
+ ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
+ ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), serialized[2]);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeMovesMetaMatchBeforeUnpack) {
+ auto unpack =
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}");
+ auto pipeline =
+ Pipeline::parse(makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0}}}")), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+
+ // The $match on meta is moved before $_internalUnpackBucket and no other optimization is done.
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(2u, serialized.size());
+ ASSERT_BSONOBJ_EQ(fromjson("{$match: {meta: {$gte: 0}}}"), serialized[0]);
+ ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
+ OptimizeDoesNotErrorOnFailedSplitOfMetaMatch) {
+ auto unpack =
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}");
+ auto match = fromjson(
+ "{$match: {$and: [{x: {$lte: 1}}, {$or: [{'myMeta.a': "
+ "{$gt: 1}}, {y: {$lt: 1}}]}]}}");
+ auto pipeline = Pipeline::parse(makeVector(unpack, match), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+
+ // We should fail to split the match because of the $or clause. We should still be able to
+ // map the predicate on 'x' to a predicate on the control field.
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(3u, serialized.size());
+ ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{'control.min.x': {$_internalExprLte: 1}}]}}"),
+ serialized[0]);
+ ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
+ ASSERT_BSONOBJ_EQ(match, serialized[2]);
+}
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp
index d292f7a64de..6686626438b 100644
--- a/src/mongo/db/pipeline/document_source_match.cpp
+++ b/src/mongo/db/pipeline/document_source_match.cpp
@@ -382,9 +382,22 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other
pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields,
- const StringMap<std::string>& renames) {
+ const StringMap<std::string>& renames) && {
+ return std::move(*this).splitSourceByFunc(fields, renames, expression::isIndependentOf);
+}
+
+pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
+DocumentSourceMatch::extractMatchOnFieldsAndRemainder(const std::set<std::string>& fields,
+ const StringMap<std::string>& renames) && {
+ return std::move(*this).splitSourceByFunc(fields, renames, expression::isOnlyDependentOn);
+}
+
+pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
+DocumentSourceMatch::splitSourceByFunc(const std::set<std::string>& fields,
+ const StringMap<std::string>& renames,
+ expression::ShouldSplitExprFunc func) && {
pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr(
- expression::splitMatchExpressionBy(std::move(_expression), fields, renames));
+ expression::splitMatchExpressionBy(std::move(_expression), fields, renames, func));
invariant(newExpr.first || newExpr.second);
diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h
index 7c4150ecdb3..7618136946d 100644
--- a/src/mongo/db/pipeline/document_source_match.h
+++ b/src/mongo/db/pipeline/document_source_match.h
@@ -34,6 +34,7 @@
#include <utility>
#include "mongo/client/connpool.h"
+#include "mongo/db/matcher/expression_algo.h"
#include "mongo/db/matcher/matcher.h"
#include "mongo/db/pipeline/document_source.h"
#include "mongo/util/intrusive_counter.h"
@@ -187,7 +188,24 @@ public:
* z: "baz"}} and {$match: {a: "foo"}}.
*/
std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
- splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames);
+ splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames) &&;
+
+ /**
+ * Attempt to split this $match into two stages, where the first is ONLY dependent upon paths
+ * from 'fields', and where applying them in sequence is equivalent to applying this stage once.
+ *
+ * Will return two intrusive_ptrs to new $match stages, where the first pointer is dependent on
+ * 'fields' and the second is the remainder. Either pointer may be null, so be sure to check the
+ * return value.
+ *
+ * The 'renames' structure maps from a field to an alias that should be used in the dependent
+ * portion of the match. For example, suppose that we split by fields "a" with the rename "a" =>
+ * "c". The match {$match: {a: "foo", b: "bar", z: "baz"}} will split into {$match: {c: "foo"}}
+ * and {$match: {b: "bar", z: "baz"}}.
+ */
+ std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+ extractMatchOnFieldsAndRemainder(const std::set<std::string>& fields,
+ const StringMap<std::string>& renames) &&;
boost::optional<DistributedPlanLogic> distributedPlanLogic() final {
return boost::none;
@@ -206,6 +224,11 @@ protected:
BSONObj _predicate;
private:
+ std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+ splitSourceByFunc(const std::set<std::string>& fields,
+ const StringMap<std::string>& renames,
+ expression::ShouldSplitExprFunc func) &&;
+
std::unique_ptr<MatchExpression> _expression;
bool _isTextQuery;