diff options
author | Hana Pearlman <hana.pearlman@mongodb.com> | 2021-01-22 22:18:16 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-01-25 14:42:49 +0000 |
commit | dc70bfe933ba9f4b01a028eab72e44e2681fa09a (patch) | |
tree | bccf317c8495dd3bd7edbc9364b0b02f8d1d56c8 | |
parent | 62c3f69e2b2d2e805bb609edce01736f8ffa389b (diff) | |
download | mongo-dc70bfe933ba9f4b01a028eab72e44e2681fa09a.tar.gz |
SERVER-53487: Add utilized fields in an inclusion projection based on dependency analysis of pipeline after $unpackBucket
-rw-r--r-- | src/mongo/db/pipeline/dependencies.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies.h | 9 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies_test.cpp | 79 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp | 84 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket.h | 3 | ||||
-rw-r--r-- | src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp | 230 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline.h | 17 |
8 files changed, 445 insertions, 16 deletions
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index c53fe60e2c1..8b60a31637c 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -37,7 +37,8 @@ namespace mongo { -BSONObj DepsTracker::toProjectionWithoutMetadata() const { +BSONObj DepsTracker::toProjectionWithoutMetadata( + TruncateToRootLevel truncationBehavior /*= TruncateToRootLevel::no*/) const { BSONObjBuilder bb; if (needWholeDocument) { @@ -65,14 +66,17 @@ BSONObj DepsTracker::toProjectionWithoutMetadata() const { continue; } - last = field + '.'; + // Check that the field requested is a valid field name in the agg language. This + // constructor will throw if it isn't. + FieldPath fp(field); - { - // Check that the field requested is a valid field name in the agg language. This - // constructor will throw if it isn't. - FieldPath fp(field); + if (truncationBehavior == TruncateToRootLevel::yes) { + last = fp.front().toString() + '.'; + bb.append(fp.front(), 1); + } else { + last = field + '.'; + bb.append(field, 1); } - bb.append(field, 1); } if (!idSpecified) { diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h index 96d846cc216..bda3bf9b243 100644 --- a/src/mongo/db/pipeline/dependencies.h +++ b/src/mongo/db/pipeline/dependencies.h @@ -101,11 +101,16 @@ struct DepsTracker { DepsTracker(const QueryMetadataBitSet& unavailableMetadata = kNoMetadata) : _unavailableMetadata{unavailableMetadata} {} + enum class TruncateToRootLevel : bool { no, yes }; + /** * Returns a projection object covering the non-metadata dependencies tracked by this class, or - * empty BSONObj if the entire document is required. + * empty BSONObj if the entire document is required. By default, the resulting project will + * include the full, dotted field names of the dependencies. If 'truncationBehavior' is set to + * TruncateToRootLevel::yes, the project will contain only the root-level field names. */ - BSONObj toProjectionWithoutMetadata() const; + BSONObj toProjectionWithoutMetadata( + TruncateToRootLevel truncationBehavior = TruncateToRootLevel::no) const; /** * Returns 'true' if there is no dependency on the input documents or metadata. diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp index 58e1066d158..f366ad3ce1d 100644 --- a/src/mongo/db/pipeline/dependencies_test.cpp +++ b/src/mongo/db/pipeline/dependencies_test.cpp @@ -61,6 +61,52 @@ TEST(DependenciesTest, CheckClassConstants) { ASSERT_TRUE(DepsTracker::kOnlyTextScore[DocumentMetadataFields::kTextScore]); } +TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfMetadataAvailableAndNeeded) { + DepsTracker deps(~DepsTracker::kOnlyTextScore); + deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true); + ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); + ASSERT_TRUE(deps.getNeedsAnyMetadata()); +} + +TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfAllMetadataAvailableAndNeeded) { + DepsTracker deps(DepsTracker::kNoMetadata); + deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true); + ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); + ASSERT_TRUE(deps.getNeedsAnyMetadata()); + + deps.setNeedsMetadata(DocumentMetadataFields::kGeoNearPoint, true); + ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kGeoNearPoint)); + ASSERT_TRUE(deps.getNeedsAnyMetadata()); +} + +TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfMetadataUnavailableAndNotNeeded) { + DepsTracker deps(DepsTracker::kOnlyTextScore); + deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, false); + ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); + ASSERT_FALSE(deps.getNeedsAnyMetadata()); +} + +TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfMetadataAvailableAndNotNeeded) { + DepsTracker deps(~DepsTracker::kOnlyTextScore); + deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, false); + ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore)); + ASSERT_FALSE(deps.getNeedsAnyMetadata()); +} + +TEST(DependenciesNeedsMetadataTest, ShouldThrowIfMetadataUnavailableButNeeded) { + DepsTracker deps(DepsTracker::kOnlyTextScore); + ASSERT_THROWS(deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true), + AssertionException); +} + +TEST(DependenciesNeedsMetadataTest, ShouldThrowIfNoMetadataAvailableButNeeded) { + DepsTracker deps(DepsTracker::kAllMetadata); + ASSERT_THROWS(deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true), + AssertionException); + ASSERT_THROWS(deps.setNeedsMetadata(DocumentMetadataFields::kGeoNearPoint, true), + AssertionException); +} + TEST(DependenciesToProjectionTest, ShouldIncludeAllFieldsAndExcludeIdIfNotSpecified) { const char* array[] = {"a", "b"}; DepsTracker deps; @@ -83,6 +129,39 @@ TEST(DependenciesToProjectionTest, ShouldNotIncludeSubFieldIfTopLevelAlreadyIncl ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("a" << 1 << "b" << 1 << "_id" << 0)); } +TEST(DependenciesToProjectionTest, ShouldOnlyIncludeRootLevelPrefixesWithTruncate) { + const char* array[] = {"a.b", "a.c", "a.c.d", "_id.a"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes), + BSON("_id" << 1 << "a" << 1)); +} + +TEST(DependenciesToProjectionTest, ShouldIncludeDottedPrefixesWithoutTruncate) { + const char* array[] = {"a.b", "a.c", "a.c.d", "_id.a"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::no), + BSON("_id.a" << 1 << "a.b" << 1 << "a.c" << 1)); +} + +TEST(DependenciesToProjectionTest, + ShouldIncludeAllRootFieldsAndExcludeIdIfNotSpecifiedWithTruncate) { + const char* array[] = {"a", "b"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes), + BSON("a" << 1 << "b" << 1 << "_id" << 0)); +} + +TEST(DependenciesToProjectionTest, ShouldIncludeFieldEvenIfSuffixOfAnotherFieldWithTruncate) { + const char* array[] = {"a", "ab"}; + DepsTracker deps; + deps.fields = arrayToSet(array); + ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes), + BSON("a" << 1 << "ab" << 1 << "_id" << 0)); +} + TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) { const char* array[] = {"a", "_id"}; DepsTracker deps; diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp index c6ae7ee55c9..cfeb5ce0380 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp @@ -31,7 +31,10 @@ #include "mongo/db/pipeline/document_source_internal_unpack_bucket.h" +#include "mongo/bson/bsonobj.h" #include "mongo/db/exec/document_value/document.h" +#include "mongo/db/pipeline/document_source_project.h" +#include "mongo/db/pipeline/document_source_single_document_transformation.h" #include "mongo/db/pipeline/expression_context.h" #include "mongo/db/pipeline/lite_parsed_document_source.h" @@ -233,4 +236,85 @@ DocumentSource::GetNextResult DocumentSourceInternalUnpackBucket::doGetNext() { return nextResult; } + +namespace { +/** + * A projection can be internalized if it does not include any dotted field names and if every field + * name corresponds to a boolean value. + */ +bool canInternalizeProjectObj(const BSONObj& projObj) { + const auto names = projObj.getFieldNames<std::set<std::string>>(); + return std::all_of(names.begin(), names.end(), [&projObj](auto&& name) { + return name.find('.') == std::string::npos && projObj.getField(name).isBoolean(); + }); +} + +/** + * If 'src' represents an inclusion or exclusion $project, return a BSONObj representing it, else + * return an empty BSONObj. If 'inclusionOnly' is true, 'src' must be an inclusion $project. + */ +auto getProjectObj(DocumentSource* src, bool inclusionOnly) { + if (const auto projStage = dynamic_cast<DocumentSourceSingleDocumentTransformation*>(src); + projStage && + (projStage->getType() == TransformerInterface::TransformerType::kInclusionProjection || + (!inclusionOnly && + projStage->getType() == TransformerInterface::TransformerType::kExclusionProjection))) { + return projStage->getTransformer().serializeTransformation(boost::none).toBson(); + } + + return BSONObj{}; +} + +/** + * Given a source container and an iterator pointing to the $unpackBucket stage, builds a projection + * BSONObj that can be entirely moved into the $unpackBucket stage, following these rules: + * 1. If there is an inclusion projection immediately after the $unpackBucket which can be + * internalized, an empty BSONObj will be returned. + * 2. Otherwise, if there is a finite dependency set for the rest of the pipeline, an inclusion + * $project representing it and containing only root-level fields will be returned. An + * inclusion $project will be returned here even if there is a viable exclusion $project + * next in the pipeline. + * 3. Otherwise, an empty BSONObj will be returned. + */ +auto buildProjectToInternalize(const boost::intrusive_ptr<ExpressionContext>& expCtx, + Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) { + // Check for a viable inclusion $project after the $unpackBucket. This handles case 1. + if (auto projObj = getProjectObj(std::next(itr)->get(), true); + !projObj.isEmpty() && canInternalizeProjectObj(projObj)) { + return BSONObj{}; + } + + // If there is a finite dependency set for the pipeline after the $unpackBucket, obtain an + // inclusion $project representing its root-level fields. Otherwise, we get an empty BSONObj. + Pipeline::SourceContainer restOfPipeline(std::next(itr), container->end()); + auto deps = Pipeline::getDependenciesForContainer(expCtx, restOfPipeline, boost::none); + auto dependencyProj = deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes); + + // If 'dependencyProj' is not empty, we're in case 2. If it is empty, we're in case 3. There may + // be a viable exclusion $project in the pipeline, but we don't need to check for it here. + return dependencyProj; +} +} // namespace + +Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt( + Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) { + invariant(*itr == this); + + if (std::next(itr) == container->end()) { + return container->end(); + } + + // Attempt to build an internalizable $project based on dependency analysis. + if (auto projObj = buildProjectToInternalize(getContext(), itr, container); + !projObj.isEmpty()) { + // Give the new $project a chance to be optimized before internalizing. + container->insert(std::next(itr), + DocumentSourceProject::createFromBson( + BSON("$project" << projObj).firstElement(), pExpCtx)); + return std::next(itr); + } + + return std::next(itr); +} } // namespace mongo diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h index 9e4c18d273a..bb389310fe0 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h @@ -158,6 +158,9 @@ public: return boost::none; }; + Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr, + Pipeline::SourceContainer* container) final; + private: GetNextResult doGetNext() final; diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp index b4bbc9319dd..1a30cf8230b 100644 --- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp +++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp @@ -29,10 +29,17 @@ #include "mongo/platform/basic.h" +#include "mongo/bson/unordered_fields_bsonobj_comparator.h" #include "mongo/db/exec/document_value/document_value_test_util.h" #include "mongo/db/pipeline/aggregation_context_fixture.h" +#include "mongo/db/pipeline/document_source_group.h" #include "mongo/db/pipeline/document_source_internal_unpack_bucket.h" +#include "mongo/db/pipeline/document_source_match.h" #include "mongo/db/pipeline/document_source_mock.h" +#include "mongo/db/pipeline/document_source_project.h" +#include "mongo/db/pipeline/document_source_sort.h" +#include "mongo/db/pipeline/pipeline.h" +#include "mongo/db/query/util/make_data_structure.h" namespace mongo { namespace { @@ -723,5 +730,228 @@ TEST_F(InternalUnpackBucketStageTest, ParserRejectsMissingTimeField) { getExpCtx()), AssertionException); } + +TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectForGroupDependencies) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto groupSpecObj = fromjson("{$group: {_id: '$x', f: {$first: '$y'}}}"); + + auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, groupSpecObj), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(3u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(3u, serialized.size()); + + const UnorderedFieldsBSONObjComparator kComparator; + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_EQ( + kComparator.compare(fromjson("{$project: {_id: false, x: true, y: true}}"), serialized[1]), + 0); + ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[2]); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectForProjectDependencies) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto projectSpecObj = fromjson("{$project: {_id: true, x: {f: '$y'}}}"); + + auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, projectSpecObj), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(3u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(3u, serialized.size()); + + const UnorderedFieldsBSONObjComparator kComparator; + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_EQ( + kComparator.compare(fromjson("{$project: {_id: true, x: true, y: true}}"), serialized[1]), + 0); + ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[2]); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectWhenInMiddleOfPipeline) { + auto matchSpecObj = fromjson("{$match: {'meta.source': 'primary'}}"); + auto unpackSpecObj = + fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'meta'}}"); + auto groupSpecObj = fromjson("{$group: {_id: '$x', f: {$first: '$y'}}}"); + + auto pipeline = + Pipeline::parse(makeVector(matchSpecObj, unpackSpecObj, groupSpecObj), getExpCtx()); + ASSERT_EQ(3u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(4u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(4u, serialized.size()); + + const UnorderedFieldsBSONObjComparator kComparator; + ASSERT_BSONOBJ_EQ(matchSpecObj, serialized[0]); + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[1]); + ASSERT_EQ( + kComparator.compare(fromjson("{$project: {_id: false, x: true, y: true}}"), serialized[2]), + 0); + ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[3]); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectWhenDependenciesAreDotted) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto groupSpecObj = fromjson("{$group: {_id: '$x.y', f: {$first: '$a.b'}}}"); + + auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, groupSpecObj), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(3u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(3u, serialized.size()); + + const UnorderedFieldsBSONObjComparator kComparator; + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_EQ( + kComparator.compare(fromjson("{$project: {_id: false, x: true, a: true}}"), serialized[1]), + 0); + ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[2]); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenThereAreNoDependencies) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto groupSpecObj = fromjson("{$group: {_id: {$const: null}, count: { $sum: {$const: 1 }}}}"); + + auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, groupSpecObj), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(2u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(2u, serialized.size()); + + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[1]); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenSortDependenciesAreNotFinite) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto sortSpecObj = fromjson("{$sort: {x: 1}}"); + + auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, sortSpecObj), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(2u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(2u, serialized.size()); + + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_BSONOBJ_EQ(sortSpecObj, serialized[1]); +} + +TEST_F(InternalUnpackBucketStageTest, + OptimizeDoesNotAddProjectWhenProjectDependenciesAreNotFinite) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto sortSpecObj = fromjson("{$sort: {x: 1}}"); + auto projectSpecObj = fromjson("{$project: {_id: false, x: false}}"); + + auto pipeline = + Pipeline::parse(makeVector(unpackSpecObj, sortSpecObj, projectSpecObj), getExpCtx()); + ASSERT_EQ(3u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(3u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(3u, serialized.size()); + + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_BSONOBJ_EQ(sortSpecObj, serialized[1]); + ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[2]); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenViableInclusionProjectExists) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto projectSpecObj = fromjson("{$project: {_id: true, x: true}}"); + + auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, projectSpecObj), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(2u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(2u, serialized.size()); + + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[1]); +} + +TEST_F(InternalUnpackBucketStageTest, + OptimizeDoesNotAddProjectWhenViableNonBoolInclusionProjectExists) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto pipeline = Pipeline::parse( + makeVector(unpackSpecObj, fromjson("{$project: {_id: 1, x: 1.0, y: 1.5}}")), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(2u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(2u, serialized.size()); + + const UnorderedFieldsBSONObjComparator kComparator; + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_EQ( + kComparator.compare(fromjson("{$project: {_id: true, x: true, y: true}}"), serialized[1]), + 0); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenViableExclusionProjectExists) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto projectSpecObj = fromjson("{$project: {_id: false, x: false}}"); + + auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, projectSpecObj), getExpCtx()); + ASSERT_EQ(2u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(2u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(2u, serialized.size()); + + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[1]); +} + +TEST_F(InternalUnpackBucketStageTest, OptimizeAddsInclusionProjectInsteadOfViableExclusionProject) { + auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}"); + auto projectSpecObj = fromjson("{$project: {_id: false, x: false}}"); + auto sortSpecObj = fromjson("{$sort: {y: 1}}"); + auto groupSpecObj = fromjson("{$group: {_id: '$y', f: {$first: '$z'}}}"); + + auto pipeline = Pipeline::parse( + makeVector(unpackSpecObj, projectSpecObj, sortSpecObj, groupSpecObj), getExpCtx()); + ASSERT_EQ(4u, pipeline->getSources().size()); + + pipeline->optimizePipeline(); + ASSERT_EQ(5u, pipeline->getSources().size()); + + auto serialized = pipeline->serializeToBson(); + ASSERT_EQ(5u, serialized.size()); + + const UnorderedFieldsBSONObjComparator kComparator; + ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]); + ASSERT_EQ( + kComparator.compare(fromjson("{$project: {_id: false, y: true, z: true}}"), serialized[1]), + 0); + ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[2]); + ASSERT_BSONOBJ_EQ(sortSpecObj, serialized[3]); + ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[4]); +} } // namespace } // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp index 33f4cfc7dac..b54b76ab0a8 100644 --- a/src/mongo/db/pipeline/pipeline.cpp +++ b/src/mongo/db/pipeline/pipeline.cpp @@ -484,12 +484,23 @@ void Pipeline::addFinalSource(intrusive_ptr<DocumentSource> source) { _sources.push_back(source); } -DepsTracker Pipeline::getDependencies(QueryMetadataBitSet unavailableMetadata) const { - DepsTracker deps(unavailableMetadata); +DepsTracker Pipeline::getDependencies( + boost::optional<QueryMetadataBitSet> unavailableMetadata) const { + return getDependenciesForContainer(getContext(), _sources, unavailableMetadata); +} + +DepsTracker Pipeline::getDependenciesForContainer( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + const SourceContainer& container, + boost::optional<QueryMetadataBitSet> unavailableMetadata) { + // If 'unavailableMetadata' was not specified, we assume all metadata is available. This allows + // us to call 'deps.setNeedsMetadata()' without throwing. + DepsTracker deps(unavailableMetadata.get_value_or(DepsTracker::kNoMetadata)); + bool hasUnsupportedStage = false; bool knowAllFields = false; bool knowAllMeta = false; - for (auto&& source : _sources) { + for (auto&& source : container) { DepsTracker localDeps(deps.getUnavailableMetadata()); DepsTracker::State status = source->getDependencies(&localDeps); @@ -521,11 +532,11 @@ DepsTracker Pipeline::getDependencies(QueryMetadataBitSet unavailableMetadata) c if (!knowAllFields) deps.needWholeDocument = true; // don't know all fields we need - if (!unavailableMetadata[DocumentMetadataFields::kTextScore]) { + if (!deps.getUnavailableMetadata()[DocumentMetadataFields::kTextScore]) { // There is a text score available. If we are the first half of a split pipeline, then we // have to assume future stages might depend on the textScore (unless we've encountered a // stage that doesn't preserve metadata). - if (getContext()->needsMerge && !knowAllMeta) { + if (expCtx->needsMerge && !knowAllMeta) { deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true); } } else { diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h index 93663a68abf..fa3b3d6167a 100644 --- a/src/mongo/db/pipeline/pipeline.h +++ b/src/mongo/db/pipeline/pipeline.h @@ -268,9 +268,22 @@ public: /** * Returns the dependencies needed by this pipeline. 'unavailableMetadata' should reflect what - * metadata is not present on documents that are input to the front of the pipeline. + * metadata is not present on documents that are input to the front of the pipeline. If + * 'unavailableMetadata' is specified, this method will throw if any of the dependencies + * reference unavailable metadata. */ - DepsTracker getDependencies(QueryMetadataBitSet unavailableMetadata) const; + DepsTracker getDependencies(boost::optional<QueryMetadataBitSet> unavailableMetadata) const; + + /** + * Returns the dependencies needed by the SourceContainer. 'unavailableMetadata' should reflect + * what metadata is not present on documents that are input to the front of the pipeline. If + * 'unavailableMetadata' is specified, this method will throw if any of the dependencies + * reference unavailable metadata. + */ + static DepsTracker getDependenciesForContainer( + const boost::intrusive_ptr<ExpressionContext>& expCtx, + const SourceContainer& container, + boost::optional<QueryMetadataBitSet> unavailableMetadata); const SourceContainer& getSources() const { return _sources; |