diff options
author | Hana Pearlman <hana.pearlman@mongodb.com> | 2021-03-10 14:16:01 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-03-15 20:13:15 +0000 |
commit | fa66c2f9df10fbb8ef1083a286f45fbf0ec235fa (patch) | |
tree | f17748552cb02cf6b788c682dbeef7ce71b3125a /src/mongo/db/pipeline | |
parent | 7d9059301512c911850804ec67bdd0fb45f4067e (diff) | |
download | mongo-fa66c2f9df10fbb8ef1083a286f45fbf0ec235fa.tar.gz |
SERVER-53484: Allow rewrite to push meta predicates past $unpackBucket
Diffstat (limited to 'src/mongo/db/pipeline')
8 files changed, 396 insertions, 21 deletions
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript index 3f538a39b3b..6baca8e610b 100644 --- a/src/mongo/db/pipeline/SConscript +++ b/src/mongo/db/pipeline/SConscript @@ -397,6 +397,7 @@ env.CppUnitTest( 'document_source_internal_unpack_bucket_test/internalize_project_test.cpp', 'document_source_internal_unpack_bucket_test/sort_reorder_test.cpp', 'document_source_internal_unpack_bucket_test/unpack_bucket_exec_test.cpp', + 'document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp', 'document_source_unwind_test.cpp', 'expression_and_test.cpp', 'expression_compare_test.cpp', diff --git a/src/mongo/db/pipeline/document_source.cpp b/src/mongo/db/pipeline/document_source.cpp index aa0ca4885ea..1fd84f3f54f 100644 --- a/src/mongo/db/pipeline/document_source.cpp +++ b/src/mongo/db/pipeline/document_source.cpp @@ -169,7 +169,7 @@ splitMatchByModifiedFields(const boost::intrusive_ptr<DocumentSourceMatch>& matc semantic_analysis::extractModifiedDependencies(depsTracker.fields, preservedPaths); } } - return match->splitSourceBy(modifiedPaths, modifiedPathsRet.renames); + return std::move(*match).splitSourceBy(modifiedPaths, modifiedPathsRet.renames); } /** diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index c9a77ab25fc..b20be4cdd68 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -190,6 +190,13 @@ boost::intrusive_ptr<DocumentSourceSort> createMetadataSortForReorder( maxMemoryUsageBytes); } +// Optimize the section of the pipeline before the $_internalUnpackBucket stage. +void optimizePrefix(Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + auto prefix = Pipeline::SourceContainer(container->begin(), itr); + Pipeline::optimizeContainer(&prefix); + container->erase(container->begin(), itr); + container->splice(itr, prefix); +} } // namespace void BucketUnpacker::reset(BSONObj&& bucket) { @@ -566,6 +573,16 @@ DocumentSourceInternalUnpackBucket::createPredicatesOnBucketLevelField( return nullptr; } +std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> +DocumentSourceInternalUnpackBucket::splitMatchOnMetaAndRename( + boost::intrusive_ptr<DocumentSourceMatch> match) { + if (auto&& metaField = _bucketUnpacker.bucketSpec().metaField) { + return std::move(*match).extractMatchOnFieldsAndRemainder( + {*metaField}, {{*metaField, BucketUnpacker::kBucketMetaFieldName.toString()}}); + } + return {nullptr, match}; +} + Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt( Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { invariant(*itr == this); @@ -600,20 +617,27 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi // Optimize the pipeline after the $unpackBucket. optimizeEndOfPipeline(itr, container); - // Attempt to map predicates on bucketed fields to predicates on the control field. if (auto nextMatch = dynamic_cast<DocumentSourceMatch*>((*std::next(itr)).get())) { - if (auto match = createPredicatesOnBucketLevelField(nextMatch->getMatchExpression())) { - // Optimize the newly created MatchExpression. - auto optimized = MatchExpression::optimize(std::move(match)); - BSONObjBuilder bob; - optimized->serialize(&bob); - - // Because we insert any possible $match first before performing other - // $_internalUnpackBucket optimizations, it is not necessary to call - // optimizeContainer() here to allow for the newly inserted stage to engage in further - // optimizations with its neighbors, as this $match is already in the optimal place for - // predicate pushdown. - container->insert(itr, DocumentSourceMatch::create(bob.obj(), pExpCtx)); + // Attempt to push predicates on the metaField past $_internalUnpackBucket. + auto [metaMatch, remainingMatch] = splitMatchOnMetaAndRename(nextMatch); + + // 'metaMatch' is safe to move before $_internalUnpackBucket. + if (metaMatch) { + container->insert(itr, metaMatch); + } + + // The old $match can be removed and potentially replaced with 'remainingMatch'. + container->erase(std::next(itr)); + if (remainingMatch) { + container->insert(std::next(itr), remainingMatch); + + // Attempt to map predicates on bucketed fields to predicates on the control field. + if (auto match = + createPredicatesOnBucketLevelField(remainingMatch->getMatchExpression())) { + BSONObjBuilder bob; + match->serialize(&bob); + container->insert(itr, DocumentSourceMatch::create(bob.obj(), pExpCtx)); + } } } @@ -624,6 +648,9 @@ Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimi internalizeProject(project, isInclusion); } + // Optimize the prefix of the pipeline, now that all optimizations have been completed. + optimizePrefix(itr, container); + return container->end(); } } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h index 50a1513cc51..18316a1a3e9 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h @@ -32,6 +32,7 @@ #include <set> #include "mongo/db/pipeline/document_source.h" +#include "mongo/db/pipeline/document_source_match.h" namespace mongo { @@ -206,6 +207,16 @@ public: Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) const; /** + * Attempts to split 'match' into two stages, where the first is dependent only on the metaField + * and the second is the remainder, so that applying them in sequence is equivalent to applying + * 'match' once. Will return two intrusive_ptrs to new $match stages. Either pointer may be + * null. If the first is non-null, it will have the metaField renamed from the user defined name + * to 'kBucketMetaFieldName'. + */ + std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> + splitMatchOnMetaAndRename(boost::intrusive_ptr<DocumentSourceMatch> match); + + /** * Takes a predicate after $_internalUnpackBucket on a bucketed field as an argument and * attempts to map it to a new predicate on the 'control' field. For example, the predicate * {a: {$gt: 5}} will generate the predicate {control.max.a: {$_internalExprGt: 5}}, which will diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp index 88e5a2a2109..7e5f9511da0 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/create_predicates_on_bucket_level_field_test.cpp @@ -216,12 +216,15 @@ TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, pipeline->optimizePipeline(); ASSERT_EQ(pipeline->getSources().size(), 3U); - auto stages = pipeline->serializeToBson(); + // To get the optimized $match from the pipeline, we have to serialize with explain. + auto stages = pipeline->writeExplainOps(ExplainOptions::Verbosity::kQueryPlanner); ASSERT_EQ(stages.size(), 3U); - ASSERT_BSONOBJ_EQ(stages[0], fromjson("{$match: {'control.max.b': {$_internalExprGt: 1}}}")); - ASSERT_BSONOBJ_EQ(stages[1], unpackBucketObj); - ASSERT_BSONOBJ_EQ(stages[2], matchObj); + ASSERT_BSONOBJ_EQ(stages[0].getDocument().toBson(), + fromjson("{$match: {'control.max.b': {$_internalExprGt: 1}}}")); + ASSERT_BSONOBJ_EQ(stages[1].getDocument().toBson(), unpackBucketObj); + ASSERT_BSONOBJ_EQ(stages[2].getDocument().toBson(), + fromjson("{$match: {$and: [{b: {$gt: 1}}, {a: {$not: {$eq: 5}}}]}}")); } TEST_F(InternalUnpackBucketPredicateMappingOptimizationTest, diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp new file mode 100644 index 00000000000..535e182e659 --- /dev/null +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test/split_match_on_meta_and_rename_test.cpp @@ -0,0 +1,297 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h" +#include "mongo/db/pipeline/document_source_match.h" +#include "mongo/db/pipeline/pipeline.h" +#include "mongo/db/query/util/make_data_structure.h" +#include "mongo/unittest/bson_test_util.h" + +namespace mongo { +namespace { + +using InternalUnpackBucketSplitMatchOnMetaAndRename = AggregationContextFixture; + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, DoesNotSplitWhenNoMetaFieldSpecified) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}").firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create(fromjson("{meta: {$gt: 1}}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can't split when there is no metaField specified in the stage. + ASSERT_FALSE(metaOnlyMatch); + ASSERT_TRUE(remainingMatch); + ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery()); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, DoesNotSplitWhenNoMatchOnMetaField) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create(fromjson("{a: {$gt: 1}}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can't split when the match does not reference the metaField. + ASSERT_FALSE(metaOnlyMatch); + ASSERT_TRUE(remainingMatch); + ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery()); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsWhenEntireMatchIsOnMetaField) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create( + fromjson("{$or: [{myMeta: {$gt: 1}}, {'myMeta.a': {$lt: 1}}]}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can split and rename when the match is entirely on the metaField. + ASSERT_TRUE(metaOnlyMatch); + ASSERT_BSONOBJ_EQ(fromjson("{$or: [{meta: {$gt: 1}}, {'meta.a': {$lt: 1}}]}"), + metaOnlyMatch->getQuery()); + ASSERT_FALSE(remainingMatch); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, + SplitsWhenIndependentPartOfMatchIsOnMetaField) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create( + fromjson("{$and: [{'myMeta.a': {$gt: 1}}, {b: {$lt: 1}}]}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can split and rename when an independent part of the match is on the metaField. + ASSERT_TRUE(metaOnlyMatch); + ASSERT_BSONOBJ_EQ(fromjson("{'meta.a': {$gt: 1}}"), metaOnlyMatch->getQuery()); + ASSERT_TRUE(remainingMatch); + ASSERT_BSONOBJ_EQ(fromjson("{b: {$lt: 1}}"), remainingMatch->getQuery()); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, + DoesNotSplitsWhenDependentPartOfMatchIsOnMetaField) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'meta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create( + fromjson("{$or: [{'meta.a': {$gt: 1}}, {metaXYZ: {$lt: 1}}]}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can't split when the part of the match that is on the metaField is dependent on the rest. + // Even though 'metaXYZ' is prefixed by 'meta', it's not a subfield. The presence of a top-level + // $or means this match cannot be correctly split into two matches. + ASSERT_FALSE(metaOnlyMatch); + ASSERT_TRUE(remainingMatch); + ASSERT_BSONOBJ_EQ(matchToSplit->getQuery(), remainingMatch->getQuery()); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsWhenSharedPrefixOfMetaIsNotSubfield) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create( + fromjson("{$and: [{myMeta: {$gt: 1}}, {myMetaXYZ: {$lt: 1}}]}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can split and rename when an independent part of the match is on the metaField. Even though + // 'myMetaXYZ' is prefixed by 'myMeta', it's not a subfield, so it should not be pushed down. + ASSERT_TRUE(metaOnlyMatch); + ASSERT_BSONOBJ_EQ(fromjson("{meta: {$gt: 1}}"), metaOnlyMatch->getQuery()); + ASSERT_TRUE(remainingMatch); + ASSERT_BSONOBJ_EQ(fromjson("{myMetaXYZ: {$lt: 1}}"), remainingMatch->getQuery()); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWithExpr) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = + DocumentSourceMatch::create(fromjson("{$expr: {$eq: ['$myMeta.a', 2]}}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can split and rename when the $match includes a $expr. + ASSERT_TRUE(metaOnlyMatch); + ASSERT_BSONOBJ_EQ(fromjson("{$expr: {$eq: ['$meta.a', {$const: 2}]}}"), + metaOnlyMatch->getQuery()); + ASSERT_FALSE(remainingMatch); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWithType) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = + DocumentSourceMatch::create(fromjson("{myMeta: {$type: [4]}}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can split and rename when the $match includes a $type. + ASSERT_TRUE(metaOnlyMatch); + ASSERT_BSONOBJ_EQ(fromjson("{meta: {$type: [4]}}"), metaOnlyMatch->getQuery()); + ASSERT_FALSE(remainingMatch); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWhenMultiplePredicates) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create( + fromjson("{myMeta: {$gte: 0, $lte: 5}, l: {$type: [4]}}"), getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can split and rename when the $match includes multiple predicates. + ASSERT_TRUE(metaOnlyMatch); + ASSERT_BSONOBJ_EQ(fromjson("{$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}"), + metaOnlyMatch->getQuery()); + ASSERT_TRUE(remainingMatch); + ASSERT_BSONOBJ_EQ(fromjson("{l: {$type: [4]}}"), remainingMatch->getQuery()); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, SplitsAndRenamesWhenSeveralFieldReferences) { + auto unpack = DocumentSourceInternalUnpackBucket::createFromBson( + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}") + .firstElement(), + getExpCtx()); + auto matchToSplit = DocumentSourceMatch::create( + fromjson("{$and: [{myMeta: {$type: [3]}}, {'myMeta.a': {$gte: " + "0}}, {'myMeta.b': {$type: [4]}}, {a: {$in: ['$b', '$c']}}]}"), + getExpCtx()); + + auto [metaOnlyMatch, remainingMatch] = + dynamic_cast<DocumentSourceInternalUnpackBucket*>(unpack.get()) + ->splitMatchOnMetaAndRename(matchToSplit.get()); + + // Can split and rename when the $match includes several field references. + ASSERT_TRUE(metaOnlyMatch); + ASSERT_BSONOBJ_EQ(fromjson("{$and: [{meta: {$type: [3]}}, {'meta.a': {$gte: 0}}, " + "{'meta.b': {$type: [4]}}]}"), + metaOnlyMatch->getQuery()); + ASSERT_TRUE(remainingMatch); + ASSERT_BSONOBJ_EQ(fromjson("{a: {$in: ['$b', '$c']}}"), remainingMatch->getQuery()); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeSplitsMatchAndMapsControlPredicates) { + auto unpack = + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}"); + auto pipeline = Pipeline::parse( + makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0, $lte: 5}, a: {$lte: 4}}}")), + getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + + // We should split and rename the $match. A separate optimization maps the predicate on 'a' to a + // predicate on 'control.min.a'. These two created $match stages should be added before + // $_internalUnpackBucket and merged. + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(3u, serialized.size()); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{$and: [{meta: {$gte: 0}}, {meta: {$lte: 5}}]}, " + "{'control.min.a': {$_internalExprLte: 4}}]}}"), + serialized[0]); + ASSERT_BSONOBJ_EQ(unpack, serialized[1]); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {a: {$lte: 4}}}"), serialized[2]); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, OptimizeMovesMetaMatchBeforeUnpack) { + auto unpack = + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}"); + auto pipeline = + Pipeline::parse(makeVector(unpack, fromjson("{$match: {myMeta: {$gte: 0}}}")), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + + // The $match on meta is moved before $_internalUnpackBucket and no other optimization is done. + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(2u, serialized.size()); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {meta: {$gte: 0}}}"), serialized[0]); + ASSERT_BSONOBJ_EQ(unpack, serialized[1]); +} + +TEST_F(InternalUnpackBucketSplitMatchOnMetaAndRename, + OptimizeDoesNotErrorOnFailedSplitOfMetaMatch) { + auto unpack = + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'myMeta'}}"); + auto match = fromjson( + "{$match: {$and: [{x: {$lte: 1}}, {$or: [{'myMeta.a': " + "{$gt: 1}}, {y: {$lt: 1}}]}]}}"); + auto pipeline = Pipeline::parse(makeVector(unpack, match), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + + // We should fail to split the match because of the $or clause. We should still be able to + // map the predicate on 'x' to a predicate on the control field. + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(3u, serialized.size()); + ASSERT_BSONOBJ_EQ(fromjson("{$match: {$and: [{'control.min.x': {$_internalExprLte: 1}}]}}"), + serialized[0]); + ASSERT_BSONOBJ_EQ(unpack, serialized[1]); + ASSERT_BSONOBJ_EQ(match, serialized[2]); +} +} // namespace +} // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index d292f7a64de..6686626438b 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -382,9 +382,22 @@ void DocumentSourceMatch::joinMatchWith(intrusive_ptr<DocumentSourceMatch> other pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>> DocumentSourceMatch::splitSourceBy(const std::set<std::string>& fields, - const StringMap<std::string>& renames) { + const StringMap<std::string>& renames) && { + return std::move(*this).splitSourceByFunc(fields, renames, expression::isIndependentOf); +} + +pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>> +DocumentSourceMatch::extractMatchOnFieldsAndRemainder(const std::set<std::string>& fields, + const StringMap<std::string>& renames) && { + return std::move(*this).splitSourceByFunc(fields, renames, expression::isOnlyDependentOn); +} + +pair<intrusive_ptr<DocumentSourceMatch>, intrusive_ptr<DocumentSourceMatch>> +DocumentSourceMatch::splitSourceByFunc(const std::set<std::string>& fields, + const StringMap<std::string>& renames, + expression::ShouldSplitExprFunc func) && { pair<unique_ptr<MatchExpression>, unique_ptr<MatchExpression>> newExpr( - expression::splitMatchExpressionBy(std::move(_expression), fields, renames)); + expression::splitMatchExpressionBy(std::move(_expression), fields, renames, func)); invariant(newExpr.first || newExpr.second); diff --git a/src/mongo/db/pipeline/document_source_match.h b/src/mongo/db/pipeline/document_source_match.h index 7c4150ecdb3..7618136946d 100644 --- a/src/mongo/db/pipeline/document_source_match.h +++ b/src/mongo/db/pipeline/document_source_match.h @@ -34,6 +34,7 @@ #include <utility> #include "mongo/client/connpool.h" +#include "mongo/db/matcher/expression_algo.h" #include "mongo/db/matcher/matcher.h" #include "mongo/db/pipeline/document_source.h" #include "mongo/util/intrusive_counter.h" @@ -187,7 +188,24 @@ public: * z: "baz"}} and {$match: {a: "foo"}}. */ std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> - splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames); + splitSourceBy(const std::set<std::string>& fields, const StringMap<std::string>& renames) &&; + + /** + * Attempt to split this $match into two stages, where the first is ONLY dependent upon paths + * from 'fields', and where applying them in sequence is equivalent to applying this stage once. + * + * Will return two intrusive_ptrs to new $match stages, where the first pointer is dependent on + * 'fields' and the second is the remainder. Either pointer may be null, so be sure to check the + * return value. + * + * The 'renames' structure maps from a field to an alias that should be used in the dependent + * portion of the match. For example, suppose that we split by fields "a" with the rename "a" => + * "c". The match {$match: {a: "foo", b: "bar", z: "baz"}} will split into {$match: {c: "foo"}} + * and {$match: {b: "bar", z: "baz"}}. + */ + std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> + extractMatchOnFieldsAndRemainder(const std::set<std::string>& fields, + const StringMap<std::string>& renames) &&; boost::optional<DistributedPlanLogic> distributedPlanLogic() final { return boost::none; @@ -206,6 +224,11 @@ protected: BSONObj _predicate; private: + std::pair<boost::intrusive_ptr<DocumentSourceMatch>, boost::intrusive_ptr<DocumentSourceMatch>> + splitSourceByFunc(const std::set<std::string>& fields, + const StringMap<std::string>& renames, + expression::ShouldSplitExprFunc func) &&; + std::unique_ptr<MatchExpression> _expression; bool _isTextQuery; |