SERVER-53484: Allow rewrite to push meta predicates past $unpackBucket

author: Hana Pearlman <hana.pearlman@mongodb.com> 2021-03-10 14:16:01 +0000
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2021-03-15 20:13:15 +0000
commit: fa66c2f9df10fbb8ef1083a286f45fbf0ec235fa (patch)
tree: f17748552cb02cf6b788c682dbeef7ce71b3125a /src/mongo/db/pipeline
parent: 7d9059301512c911850804ec67bdd0fb45f4067e (diff)
download: mongo-fa66c2f9df10fbb8ef1083a286f45fbf0ec235fa.tar.gz
8 files changed, 396 insertions, 21 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index 3f538a39b3b..6baca8e610b 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -397,6 +397,7 @@ env.CppUnitTest(
         'document_source_internal_unpack_bucket_test/internalize_project_test.cpp',
         'document_source_internal_unpack_bucket_test/sort_reorder_test.cpp',
         'document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp',
+        'document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp',
         'document_source_unwind_test.cpp',
         'expression_and_test.cpp',
         'expression_compare_test.cpp',
diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp
index aa0ca4885ea..1fd84f3f54f 100644
--- a/src/mongo/db/pipeline/document_source.cpp
+++ b/src/mongo/db/pipeline/document_source.cpp
@@ -169,7 +169,7 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc
                 semantic_analysis::extractModifiedDependencies(depsTracker.fields, preservedPaths);
         }
     }
-    return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames);
+    return std::move(*match).splitSourceBy(modifiedPaths, modifiedPathsRet.renames);
 }
 
 /**
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index c9a77ab25fc..b20be4cdd68 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -190,6 +190,13 @@ boost::intrusive_ptr<DocumentSourceSort> createMetadataSortForReorder(
                                       maxMemoryUsageBytes);
 }
 
+// Optimize the section of the pipeline before the $_internalUnpackBucket stage.
+void optimizePrefix(Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+    auto prefix = Pipeline::SourceContainer(container->begin(), itr);
+    Pipeline::optimizeContainer(&prefix);
+    container->erase(container->begin(), itr);
+    container->splice(itr, prefix);
+}
 }  // namespace
 
 void BucketUnpacker::reset(BSONObj&& bucket) {
@@ -566,6 +573,16 @@ DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField(
     return nullptr;
 }
 
+std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+DocumentSourceInternalUnpackBucket::splitMatchOnMetaAndRename(
+    boost::intrusive_ptr<DocumentSourceMatch> match) {
+    if (auto&& metaField = _bucketUnpacker.bucketSpec().metaField) {
+        return std::move(*match).extractMatchOnFieldsAndRemainder(
+            {*metaField}, {{*metaField, BucketUnpacker::kBucketMetaFieldName.toString()}});
+    }
+    return {nullptr, match};
+}
+
 Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt(
     Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
     invariant(*itr == this);
@@ -600,20 +617,27 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi
     // Optimize the pipeline after the $unpackBucket.
     optimizeEndOfPipeline(itr, container);
 
-    // Attempt to map predicates on bucketed fields to predicates on the control field.
     if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get())) {
-        if (auto match = createPredicatesOnBucketLevelField(nextMatch->getMatchExpression())) {
-            // Optimize the newly created MatchExpression.
-            auto optimized = MatchExpression::optimize(std::move(match));
-            BSONObjBuilder bob;
-            optimized->serialize(&bob);
-
-            // Because we insert any possible $match first before performing other
-            // $_internalUnpackBucket optimizations, it is not necessary to call
-            // optimizeContainer() here to allow for the newly inserted stage to engage in further
-            // optimizations with its neighbors, as this $match is already in the optimal place for
-            // predicate pushdown.
-            container->insert(itr, DocumentSourceMatch::create(bob.obj(), pExpCtx));
+        // Attempt to push predicates on the metaField past $_internalUnpackBucket.
+        auto [metaMatch, remainingMatch] = splitMatchOnMetaAndRename(nextMatch);
+
+        // 'metaMatch' is safe to move before $_internalUnpackBucket.
+        if (metaMatch) {
+            container->insert(itr, metaMatch);
+        }
+
+        // The old $match can be removed and potentially replaced with 'remainingMatch'.
+        container->erase(std::next(itr));
+        if (remainingMatch) {
+            container->insert(std::next(itr), remainingMatch);
+
+            // Attempt to map predicates on bucketed fields to predicates on the control field.
+            if (auto match =
+                    createPredicatesOnBucketLevelField(remainingMatch->getMatchExpression())) {
+                BSONObjBuilder bob;
+                match->serialize(&bob);
+                container->insert(itr, DocumentSourceMatch::create(bob.obj(), pExpCtx));
+            }
         }
     }
 
@@ -624,6 +648,9 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi
         internalizeProject(project, isInclusion);
     }
 
+    // Optimize the prefix of the pipeline, now that all optimizations have been completed.
+    optimizePrefix(itr, container);
+
     return container->end();
 }
 }  // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
index 50a1513cc51..18316a1a3e9 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
@@ -32,6 +32,7 @@
 #include <set>
 
 #include "mongo/db/pipeline/document_source.h"
+#include "mongo/db/pipeline/document_source_match.h"
 
 namespace mongo {
 
@@ -206,6 +207,16 @@ public:
         Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) const;
 
     /**
+     * Attempts to split 'match' into two stages, where the first is dependent only on the metaField
+     * and the second is the remainder, so that applying them in sequence is equivalent to applying
+     * 'match' once. Will return two intrusive_ptrs to new $match stages. Either pointer may be
+     * null. If the first is non-null, it will have the metaField renamed from the user defined name
+     * to 'kBucketMetaFieldName'.
+     */
+    std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+    splitMatchOnMetaAndRename(boost::intrusive_ptr<DocumentSourceMatch> match);
+
+    /**
      * Takes a predicate after $_internalUnpackBucket on a bucketed field as an argument and
      * attempts to map it to a new predicate on the 'control' field. For example, the predicate
      * {a: {$gt: 5}} will generate the predicate {control.max.a: {$_internalExprGt: 5}}, which will
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
index 88e5a2a2109..7e5f9511da0 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp
@@ -216,12 +216,15 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
     pipeline->optimizePipeline();
     ASSERT_EQ(pipeline->getSources().size(), 3U);
 
-    auto stages = pipeline->serializeToBson();
+    // To get the optimized $match from the pipeline, we have to serialize with explain.
+    auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner);
     ASSERT_EQ(stages.size(), 3U);
 
-    ASSERT_BSONOBJ_EQ(stages[0], fromjson("{$match: {'control.max.b': {$_internalExprGt: 1}}}"));
-    ASSERT_BSONOBJ_EQ(stages[1], unpackBucketObj);
-    ASSERT_BSONOBJ_EQ(stages[2], matchObj);
+    ASSERT_BSONOBJ_EQ(stages[0].getDocument().toBson(),
+                      fromjson("{$match: {'control.max.b': {$_internalExprGt: 1}}}"));
+    ASSERT_BSONOBJ_EQ(stages[1].getDocument().toBson(), unpackBucketObj);
+    ASSERT_BSONOBJ_EQ(stages[2].getDocument().toBson(),
+                      fromjson("{$match: {$and: [{b: {$gt: 1}}, {a: {$not: {$eq: 5}}}]}}"));
 }
 
 TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest,
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp
new file mode 100644
index 00000000000..535e182e659
--- /dev/null
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp
@@ -0,0 +1,297 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/pipeline/aggregation_context_fixture.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
+#include "mongo/db/pipeline/document_source_match.h"
+#include "mongo/db/pipeline/pipeline.h"
+#include "mongo/db/query/util/make_data_structure.h"
+#include "mongo/unittest/bson_test_util.h"
+
+namespace mongo {
+namespace {
+
+using InternalUnpackBucketSplitMatchOnMetaAndRename = AggregationContextFixture;
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, DoesNotSplitWhenNoMetaFieldSpecified) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}").firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(fromjson("{meta: {$gt: 1}}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can't split when there is no metaField specified in the stage.
+    ASSERT_FALSE(metaOnlyMatch);
+    ASSERT_TRUE(remainingMatch);
+    ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, DoesNotSplitWhenNoMatchOnMetaField) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(fromjson("{a: {$gt: 1}}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can't split when the match does not reference the metaField.
+    ASSERT_FALSE(metaOnlyMatch);
+    ASSERT_TRUE(remainingMatch);
+    ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsWhenEntireMatchIsOnMetaField) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(
+        fromjson("{$or: [{myMeta: {$gt: 1}}, {'myMeta.a': {$lt: 1}}]}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can split and rename when the match is entirely on the metaField.
+    ASSERT_TRUE(metaOnlyMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{$or: [{meta: {$gt: 1}}, {'meta.a': {$lt: 1}}]}"),
+                      metaOnlyMatch->getQuery());
+    ASSERT_FALSE(remainingMatch);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
+       SplitsWhenIndependentPartOfMatchIsOnMetaField) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(
+        fromjson("{$and: [{'myMeta.a': {$gt: 1}}, {b: {$lt: 1}}]}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can split and rename when an independent part of the match is on the metaField.
+    ASSERT_TRUE(metaOnlyMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{'meta.a': {$gt: 1}}"), metaOnlyMatch->getQuery());
+    ASSERT_TRUE(remainingMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{b: {$lt: 1}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
+       DoesNotSplitsWhenDependentPartOfMatchIsOnMetaField) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'meta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(
+        fromjson("{$or: [{'meta.a': {$gt: 1}}, {metaXYZ: {$lt: 1}}]}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can't split when the part of the match that is on the metaField is dependent on the rest.
+    // Even though 'metaXYZ' is prefixed by 'meta', it's not a subfield. The presence of a top-level
+    // $or means this match cannot be correctly split into two matches.
+    ASSERT_FALSE(metaOnlyMatch);
+    ASSERT_TRUE(remainingMatch);
+    ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsWhenSharedPrefixOfMetaIsNotSubfield) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(
+        fromjson("{$and: [{myMeta: {$gt: 1}}, {myMetaXYZ: {$lt: 1}}]}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can split and rename when an independent part of the match is on the metaField. Even though
+    // 'myMetaXYZ' is prefixed by 'myMeta', it's not a subfield, so it should not be pushed down.
+    ASSERT_TRUE(metaOnlyMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{meta: {$gt: 1}}"), metaOnlyMatch->getQuery());
+    ASSERT_TRUE(remainingMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{myMetaXYZ: {$lt: 1}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWithExpr) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit =
+        DocumentSourceMatch::create(fromjson("{$expr: {$eq: ['$myMeta.a', 2]}}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can split and rename when the $match includes a $expr.
+    ASSERT_TRUE(metaOnlyMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{$expr: {$eq: ['$meta.a', {$const: 2}]}}"),
+                      metaOnlyMatch->getQuery());
+    ASSERT_FALSE(remainingMatch);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWithType) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit =
+        DocumentSourceMatch::create(fromjson("{myMeta: {$type: [4]}}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can split and rename when the $match includes a $type.
+    ASSERT_TRUE(metaOnlyMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{meta: {$type: [4]}}"), metaOnlyMatch->getQuery());
+    ASSERT_FALSE(remainingMatch);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWhenMultiplePredicates) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(
+        fromjson("{myMeta: {$gte: 0, $lte: 5}, l: {$type: [4]}}"), getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can split and rename when the $match includes multiple predicates.
+    ASSERT_TRUE(metaOnlyMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}"),
+                      metaOnlyMatch->getQuery());
+    ASSERT_TRUE(remainingMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{l: {$type: [4]}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWhenSeveralFieldReferences) {
+    auto unpack = DocumentSourceInternalUnpackBucket::createFromBson(
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}")
+            .firstElement(),
+        getExpCtx());
+    auto matchToSplit = DocumentSourceMatch::create(
+        fromjson("{$and: [{myMeta: {$type: [3]}}, {'myMeta.a': {$gte: "
+                 "0}}, {'myMeta.b': {$type: [4]}}, {a: {$in: ['$b', '$c']}}]}"),
+        getExpCtx());
+
+    auto [metaOnlyMatch, remainingMatch] =
+        dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get())
+            ->splitMatchOnMetaAndRename(matchToSplit.get());
+
+    // Can split and rename when the $match includes several field references.
+    ASSERT_TRUE(metaOnlyMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{$and: [{meta: {$type: [3]}}, {'meta.a': {$gte: 0}}, "
+                               "{'meta.b': {$type: [4]}}]}"),
+                      metaOnlyMatch->getQuery());
+    ASSERT_TRUE(remainingMatch);
+    ASSERT_BSONOBJ_EQ(fromjson("{a: {$in: ['$b', '$c']}}"), remainingMatch->getQuery());
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMapsControlPredicates) {
+    auto unpack =
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}");
+    auto pipeline = Pipeline::parse(
+        makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0, $lte: 5}, a: {$lte: 4}}}")),
+        getExpCtx());
+    ASSERT_EQ(2u, pipeline->getSources().size());
+
+    pipeline->optimizePipeline();
+
+    // We should split and rename the $match. A separate optimization maps the predicate on 'a' to a
+    // predicate on 'control.min.a'. These two created $match stages should be added before
+    // $_internalUnpackBucket and merged.
+    auto serialized = pipeline->serializeToBson();
+    ASSERT_EQ(3u, serialized.size());
+    ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}, "
+                               "{'control.min.a': {$_internalExprLte: 4}}]}}"),
+                      serialized[0]);
+    ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
+    ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), serialized[2]);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeMovesMetaMatchBeforeUnpack) {
+    auto unpack =
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}");
+    auto pipeline =
+        Pipeline::parse(makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0}}}")), getExpCtx());
+    ASSERT_EQ(2u, pipeline->getSources().size());
+
+    pipeline->optimizePipeline();
+
+    // The $match on meta is moved before $_internalUnpackBucket and no other optimization is done.
+    auto serialized = pipeline->serializeToBson();
+    ASSERT_EQ(2u, serialized.size());
+    ASSERT_BSONOBJ_EQ(fromjson("{$match: {meta: {$gte: 0}}}"), serialized[0]);
+    ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
+}
+
+TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename,
+       OptimizeDoesNotErrorOnFailedSplitOfMetaMatch) {
+    auto unpack =
+        fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}");
+    auto match = fromjson(
+        "{$match: {$and: [{x: {$lte: 1}}, {$or: [{'myMeta.a': "
+        "{$gt: 1}}, {y: {$lt: 1}}]}]}}");
+    auto pipeline = Pipeline::parse(makeVector(unpack, match), getExpCtx());
+    ASSERT_EQ(2u, pipeline->getSources().size());
+
+    pipeline->optimizePipeline();
+
+    // We should fail to split the match because of the $or clause. We should still be able to
+    // map the predicate on 'x' to a predicate on the control field.
+    auto serialized = pipeline->serializeToBson();
+    ASSERT_EQ(3u, serialized.size());
+    ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{'control.min.x': {$_internalExprLte: 1}}]}}"),
+                      serialized[0]);
+    ASSERT_BSONOBJ_EQ(unpack, serialized[1]);
+    ASSERT_BSONOBJ_EQ(match, serialized[2]);
+}
+}  // namespace
+}  // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp
index d292f7a64de..6686626438b 100644
--- a/src/mongo/db/pipeline/document_source_match.cpp
+++ b/src/mongo/db/pipeline/document_source_match.cpp
@@ -382,9 +382,22 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other
 
 pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
 DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields,
-                                   const StringMap<std::string>& renames) {
+                                   const StringMap<std::string>& renames) && {
+    return std::move(*this).splitSourceByFunc(fields, renames, expression::isIndependentOf);
+}
+
+pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
+DocumentSourceMatch::extractMatchOnFieldsAndRemainder(const std::set<std::string>& fields,
+                                                      const StringMap<std::string>& renames) && {
+    return std::move(*this).splitSourceByFunc(fields, renames, expression::isOnlyDependentOn);
+}
+
+pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>>
+DocumentSourceMatch::splitSourceByFunc(const std::set<std::string>& fields,
+                                       const StringMap<std::string>& renames,
+                                       expression::ShouldSplitExprFunc func) && {
     pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr(
-        expression::splitMatchExpressionBy(std::move(_expression), fields, renames));
+        expression::splitMatchExpressionBy(std::move(_expression), fields, renames, func));
 
     invariant(newExpr.first || newExpr.second);
 
diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h
index 7c4150ecdb3..7618136946d 100644
--- a/src/mongo/db/pipeline/document_source_match.h
+++ b/src/mongo/db/pipeline/document_source_match.h
@@ -34,6 +34,7 @@
 #include <utility>
 
 #include "mongo/client/connpool.h"
+#include "mongo/db/matcher/expression_algo.h"
 #include "mongo/db/matcher/matcher.h"
 #include "mongo/db/pipeline/document_source.h"
 #include "mongo/util/intrusive_counter.h"
@@ -187,7 +188,24 @@ public:
      * z: "baz"}} and {$match: {a: "foo"}}.
      */
     std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
-    splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames);
+    splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames) &&;
+
+    /**
+     * Attempt to split this $match into two stages, where the first is ONLY dependent upon paths
+     * from 'fields', and where applying them in sequence is equivalent to applying this stage once.
+     *
+     * Will return two intrusive_ptrs to new $match stages, where the first pointer is dependent on
+     * 'fields' and the second is the remainder. Either pointer may be null, so be sure to check the
+     * return value.
+     *
+     * The 'renames' structure maps from a field to an alias that should be used in the dependent
+     * portion of the match. For example, suppose that we split by fields "a" with the rename "a" =>
+     * "c". The match {$match: {a: "foo", b: "bar", z: "baz"}} will split into {$match: {c: "foo"}}
+     * and {$match: {b: "bar", z: "baz"}}.
+     */
+    std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+    extractMatchOnFieldsAndRemainder(const std::set<std::string>& fields,
+                                     const StringMap<std::string>& renames) &&;
 
     boost::optional<DistributedPlanLogic> distributedPlanLogic() final {
         return boost::none;
@@ -206,6 +224,11 @@ protected:
     BSONObj _predicate;
 
 private:
+    std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>>
+    splitSourceByFunc(const std::set<std::string>& fields,
+                      const StringMap<std::string>& renames,
+                      expression::ShouldSplitExprFunc func) &&;
+
     std::unique_ptr<MatchExpression> _expression;
 
     bool _isTextQuery;
author	Hana Pearlman <hana.pearlman@mongodb.com>	2021-03-10 14:16:01 +0000
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2021-03-15 20:13:15 +0000
commit	fa66c2f9df10fbb8ef1083a286f45fbf0ec235fa (patch)
tree	f17748552cb02cf6b788c682dbeef7ce71b3125a /src/mongo/db/pipeline
parent	7d9059301512c911850804ec67bdd0fb45f4067e (diff)
download	mongo-fa66c2f9df10fbb8ef1083a286f45fbf0ec235fa.tar.gz