summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHana Pearlman <hana.pearlman@mongodb.com>2021-01-22 22:18:16 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-01-25 14:42:49 +0000
commitdc70bfe933ba9f4b01a028eab72e44e2681fa09a (patch)
treebccf317c8495dd3bd7edbc9364b0b02f8d1d56c8
parent62c3f69e2b2d2e805bb609edce01736f8ffa389b (diff)
downloadmongo-dc70bfe933ba9f4b01a028eab72e44e2681fa09a.tar.gz
SERVER-53487: Add utilized fields in an inclusion projection based on dependency analysis of pipeline after $unpackBucket
-rw-r--r--src/mongo/db/pipeline/dependencies.cpp18
-rw-r--r--src/mongo/db/pipeline/dependencies.h9
-rw-r--r--src/mongo/db/pipeline/dependencies_test.cpp79
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp84
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket.h3
-rw-r--r--src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp230
-rw-r--r--src/mongo/db/pipeline/pipeline.cpp21
-rw-r--r--src/mongo/db/pipeline/pipeline.h17
8 files changed, 445 insertions, 16 deletions
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index c53fe60e2c1..8b60a31637c 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -37,7 +37,8 @@
namespace mongo {
-BSONObj DepsTracker::toProjectionWithoutMetadata() const {
+BSONObj DepsTracker::toProjectionWithoutMetadata(
+ TruncateToRootLevel truncationBehavior /*= TruncateToRootLevel::no*/) const {
BSONObjBuilder bb;
if (needWholeDocument) {
@@ -65,14 +66,17 @@ BSONObj DepsTracker::toProjectionWithoutMetadata() const {
continue;
}
- last = field + '.';
+ // Check that the field requested is a valid field name in the agg language. This
+ // constructor will throw if it isn't.
+ FieldPath fp(field);
- {
- // Check that the field requested is a valid field name in the agg language. This
- // constructor will throw if it isn't.
- FieldPath fp(field);
+ if (truncationBehavior == TruncateToRootLevel::yes) {
+ last = fp.front().toString() + '.';
+ bb.append(fp.front(), 1);
+ } else {
+ last = field + '.';
+ bb.append(field, 1);
}
- bb.append(field, 1);
}
if (!idSpecified) {
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index 96d846cc216..bda3bf9b243 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -101,11 +101,16 @@ struct DepsTracker {
DepsTracker(const QueryMetadataBitSet& unavailableMetadata = kNoMetadata)
: _unavailableMetadata{unavailableMetadata} {}
+ enum class TruncateToRootLevel : bool { no, yes };
+
/**
* Returns a projection object covering the non-metadata dependencies tracked by this class, or
- * empty BSONObj if the entire document is required.
+ * empty BSONObj if the entire document is required. By default, the resulting project will
+ * include the full, dotted field names of the dependencies. If 'truncationBehavior' is set to
+ * TruncateToRootLevel::yes, the project will contain only the root-level field names.
*/
- BSONObj toProjectionWithoutMetadata() const;
+ BSONObj toProjectionWithoutMetadata(
+ TruncateToRootLevel truncationBehavior = TruncateToRootLevel::no) const;
/**
* Returns 'true' if there is no dependency on the input documents or metadata.
diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp
index 58e1066d158..f366ad3ce1d 100644
--- a/src/mongo/db/pipeline/dependencies_test.cpp
+++ b/src/mongo/db/pipeline/dependencies_test.cpp
@@ -61,6 +61,52 @@ TEST(DependenciesTest, CheckClassConstants) {
ASSERT_TRUE(DepsTracker::kOnlyTextScore[DocumentMetadataFields::kTextScore]);
}
+TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfMetadataAvailableAndNeeded) {
+ DepsTracker deps(~DepsTracker::kOnlyTextScore);
+ deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true);
+ ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+ ASSERT_TRUE(deps.getNeedsAnyMetadata());
+}
+
+TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfAllMetadataAvailableAndNeeded) {
+ DepsTracker deps(DepsTracker::kNoMetadata);
+ deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true);
+ ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+ ASSERT_TRUE(deps.getNeedsAnyMetadata());
+
+ deps.setNeedsMetadata(DocumentMetadataFields::kGeoNearPoint, true);
+ ASSERT_TRUE(deps.getNeedsMetadata(DocumentMetadataFields::kGeoNearPoint));
+ ASSERT_TRUE(deps.getNeedsAnyMetadata());
+}
+
+TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfMetadataUnavailableAndNotNeeded) {
+ DepsTracker deps(DepsTracker::kOnlyTextScore);
+ deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, false);
+ ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+ ASSERT_FALSE(deps.getNeedsAnyMetadata());
+}
+
+TEST(DependenciesNeedsMetadataTest, ShouldSucceedIfMetadataAvailableAndNotNeeded) {
+ DepsTracker deps(~DepsTracker::kOnlyTextScore);
+ deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, false);
+ ASSERT_FALSE(deps.getNeedsMetadata(DocumentMetadataFields::kTextScore));
+ ASSERT_FALSE(deps.getNeedsAnyMetadata());
+}
+
+TEST(DependenciesNeedsMetadataTest, ShouldThrowIfMetadataUnavailableButNeeded) {
+ DepsTracker deps(DepsTracker::kOnlyTextScore);
+ ASSERT_THROWS(deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true),
+ AssertionException);
+}
+
+TEST(DependenciesNeedsMetadataTest, ShouldThrowIfNoMetadataAvailableButNeeded) {
+ DepsTracker deps(DepsTracker::kAllMetadata);
+ ASSERT_THROWS(deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true),
+ AssertionException);
+ ASSERT_THROWS(deps.setNeedsMetadata(DocumentMetadataFields::kGeoNearPoint, true),
+ AssertionException);
+}
+
TEST(DependenciesToProjectionTest, ShouldIncludeAllFieldsAndExcludeIdIfNotSpecified) {
const char* array[] = {"a", "b"};
DepsTracker deps;
@@ -83,6 +129,39 @@ TEST(DependenciesToProjectionTest, ShouldNotIncludeSubFieldIfTopLevelAlreadyIncl
ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("a" << 1 << "b" << 1 << "_id" << 0));
}
+TEST(DependenciesToProjectionTest, ShouldOnlyIncludeRootLevelPrefixesWithTruncate) {
+ const char* array[] = {"a.b", "a.c", "a.c.d", "_id.a"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes),
+ BSON("_id" << 1 << "a" << 1));
+}
+
+TEST(DependenciesToProjectionTest, ShouldIncludeDottedPrefixesWithoutTruncate) {
+ const char* array[] = {"a.b", "a.c", "a.c.d", "_id.a"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::no),
+ BSON("_id.a" << 1 << "a.b" << 1 << "a.c" << 1));
+}
+
+TEST(DependenciesToProjectionTest,
+ ShouldIncludeAllRootFieldsAndExcludeIdIfNotSpecifiedWithTruncate) {
+ const char* array[] = {"a", "b"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes),
+ BSON("a" << 1 << "b" << 1 << "_id" << 0));
+}
+
+TEST(DependenciesToProjectionTest, ShouldIncludeFieldEvenIfSuffixOfAnotherFieldWithTruncate) {
+ const char* array[] = {"a", "ab"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes),
+ BSON("a" << 1 << "ab" << 1 << "_id" << 0));
+}
+
TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) {
const char* array[] = {"a", "_id"};
DepsTracker deps;
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
index c6ae7ee55c9..cfeb5ce0380 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.cpp
@@ -31,7 +31,10 @@
#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
+#include "mongo/bson/bsonobj.h"
#include "mongo/db/exec/document_value/document.h"
+#include "mongo/db/pipeline/document_source_project.h"
+#include "mongo/db/pipeline/document_source_single_document_transformation.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/pipeline/lite_parsed_document_source.h"
@@ -233,4 +236,85 @@ DocumentSource::GetNextResult DocumentSourceInternalUnpackBucket::doGetNext() {
return nextResult;
}
+
+namespace {
+/**
+ * A projection can be internalized if it does not include any dotted field names and if every field
+ * name corresponds to a boolean value.
+ */
+bool canInternalizeProjectObj(const BSONObj& projObj) {
+ const auto names = projObj.getFieldNames<std::set<std::string>>();
+ return std::all_of(names.begin(), names.end(), [&projObj](auto&& name) {
+ return name.find('.') == std::string::npos && projObj.getField(name).isBoolean();
+ });
+}
+
+/**
+ * If 'src' represents an inclusion or exclusion $project, return a BSONObj representing it, else
+ * return an empty BSONObj. If 'inclusionOnly' is true, 'src' must be an inclusion $project.
+ */
+auto getProjectObj(DocumentSource* src, bool inclusionOnly) {
+ if (const auto projStage = dynamic_cast<DocumentSourceSingleDocumentTransformation*>(src);
+ projStage &&
+ (projStage->getType() == TransformerInterface::TransformerType::kInclusionProjection ||
+ (!inclusionOnly &&
+ projStage->getType() == TransformerInterface::TransformerType::kExclusionProjection))) {
+ return projStage->getTransformer().serializeTransformation(boost::none).toBson();
+ }
+
+ return BSONObj{};
+}
+
+/**
+ * Given a source container and an iterator pointing to the $unpackBucket stage, builds a projection
+ * BSONObj that can be entirely moved into the $unpackBucket stage, following these rules:
+ * 1. If there is an inclusion projection immediately after the $unpackBucket which can be
+ * internalized, an empty BSONObj will be returned.
+ * 2. Otherwise, if there is a finite dependency set for the rest of the pipeline, an inclusion
+ * $project representing it and containing only root-level fields will be returned. An
+ * inclusion $project will be returned here even if there is a viable exclusion $project
+ * next in the pipeline.
+ * 3. Otherwise, an empty BSONObj will be returned.
+ */
+auto buildProjectToInternalize(const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) {
+ // Check for a viable inclusion $project after the $unpackBucket. This handles case 1.
+ if (auto projObj = getProjectObj(std::next(itr)->get(), true);
+ !projObj.isEmpty() && canInternalizeProjectObj(projObj)) {
+ return BSONObj{};
+ }
+
+ // If there is a finite dependency set for the pipeline after the $unpackBucket, obtain an
+ // inclusion $project representing its root-level fields. Otherwise, we get an empty BSONObj.
+ Pipeline::SourceContainer restOfPipeline(std::next(itr), container->end());
+ auto deps = Pipeline::getDependenciesForContainer(expCtx, restOfPipeline, boost::none);
+ auto dependencyProj = deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes);
+
+ // If 'dependencyProj' is not empty, we're in case 2. If it is empty, we're in case 3. There may
+ // be a viable exclusion $project in the pipeline, but we don't need to check for it here.
+ return dependencyProj;
+}
+} // namespace
+
+Pipeline::SourceContainer::iterator DocumentSourceInternalUnpackBucket::doOptimizeAt(
+ Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
+ invariant(*itr == this);
+
+ if (std::next(itr) == container->end()) {
+ return container->end();
+ }
+
+ // Attempt to build an internalizable $project based on dependency analysis.
+ if (auto projObj = buildProjectToInternalize(getContext(), itr, container);
+ !projObj.isEmpty()) {
+ // Give the new $project a chance to be optimized before internalizing.
+ container->insert(std::next(itr),
+ DocumentSourceProject::createFromBson(
+ BSON("$project" << projObj).firstElement(), pExpCtx));
+ return std::next(itr);
+ }
+
+ return std::next(itr);
+}
} // namespace mongo
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
index 9e4c18d273a..bb389310fe0 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket.h
@@ -158,6 +158,9 @@ public:
return boost::none;
};
+ Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
+ Pipeline::SourceContainer* container) final;
+
private:
GetNextResult doGetNext() final;
diff --git a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp
index b4bbc9319dd..1a30cf8230b 100644
--- a/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp
+++ b/src/mongo/db/pipeline/document_source_internal_unpack_bucket_test.cpp
@@ -29,10 +29,17 @@
#include "mongo/platform/basic.h"
+#include "mongo/bson/unordered_fields_bsonobj_comparator.h"
#include "mongo/db/exec/document_value/document_value_test_util.h"
#include "mongo/db/pipeline/aggregation_context_fixture.h"
+#include "mongo/db/pipeline/document_source_group.h"
#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
+#include "mongo/db/pipeline/document_source_match.h"
#include "mongo/db/pipeline/document_source_mock.h"
+#include "mongo/db/pipeline/document_source_project.h"
+#include "mongo/db/pipeline/document_source_sort.h"
+#include "mongo/db/pipeline/pipeline.h"
+#include "mongo/db/query/util/make_data_structure.h"
namespace mongo {
namespace {
@@ -723,5 +730,228 @@ TEST_F(InternalUnpackBucketStageTest, ParserRejectsMissingTimeField) {
getExpCtx()),
AssertionException);
}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectForGroupDependencies) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto groupSpecObj = fromjson("{$group: {_id: '$x', f: {$first: '$y'}}}");
+
+ auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, groupSpecObj), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(3u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(3u, serialized.size());
+
+ const UnorderedFieldsBSONObjComparator kComparator;
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_EQ(
+ kComparator.compare(fromjson("{$project: {_id: false, x: true, y: true}}"), serialized[1]),
+ 0);
+ ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[2]);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectForProjectDependencies) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto projectSpecObj = fromjson("{$project: {_id: true, x: {f: '$y'}}}");
+
+ auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, projectSpecObj), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(3u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(3u, serialized.size());
+
+ const UnorderedFieldsBSONObjComparator kComparator;
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_EQ(
+ kComparator.compare(fromjson("{$project: {_id: true, x: true, y: true}}"), serialized[1]),
+ 0);
+ ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[2]);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectWhenInMiddleOfPipeline) {
+ auto matchSpecObj = fromjson("{$match: {'meta.source': 'primary'}}");
+ auto unpackSpecObj =
+ fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo', metaField: 'meta'}}");
+ auto groupSpecObj = fromjson("{$group: {_id: '$x', f: {$first: '$y'}}}");
+
+ auto pipeline =
+ Pipeline::parse(makeVector(matchSpecObj, unpackSpecObj, groupSpecObj), getExpCtx());
+ ASSERT_EQ(3u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(4u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(4u, serialized.size());
+
+ const UnorderedFieldsBSONObjComparator kComparator;
+ ASSERT_BSONOBJ_EQ(matchSpecObj, serialized[0]);
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[1]);
+ ASSERT_EQ(
+ kComparator.compare(fromjson("{$project: {_id: false, x: true, y: true}}"), serialized[2]),
+ 0);
+ ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[3]);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeAddsIncludeProjectWhenDependenciesAreDotted) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto groupSpecObj = fromjson("{$group: {_id: '$x.y', f: {$first: '$a.b'}}}");
+
+ auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, groupSpecObj), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(3u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(3u, serialized.size());
+
+ const UnorderedFieldsBSONObjComparator kComparator;
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_EQ(
+ kComparator.compare(fromjson("{$project: {_id: false, x: true, a: true}}"), serialized[1]),
+ 0);
+ ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[2]);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenThereAreNoDependencies) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto groupSpecObj = fromjson("{$group: {_id: {$const: null}, count: { $sum: {$const: 1 }}}}");
+
+ auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, groupSpecObj), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(2u, serialized.size());
+
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[1]);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenSortDependenciesAreNotFinite) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto sortSpecObj = fromjson("{$sort: {x: 1}}");
+
+ auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, sortSpecObj), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(2u, serialized.size());
+
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_BSONOBJ_EQ(sortSpecObj, serialized[1]);
+}
+
+TEST_F(InternalUnpackBucketStageTest,
+ OptimizeDoesNotAddProjectWhenProjectDependenciesAreNotFinite) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto sortSpecObj = fromjson("{$sort: {x: 1}}");
+ auto projectSpecObj = fromjson("{$project: {_id: false, x: false}}");
+
+ auto pipeline =
+ Pipeline::parse(makeVector(unpackSpecObj, sortSpecObj, projectSpecObj), getExpCtx());
+ ASSERT_EQ(3u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(3u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(3u, serialized.size());
+
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_BSONOBJ_EQ(sortSpecObj, serialized[1]);
+ ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[2]);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenViableInclusionProjectExists) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto projectSpecObj = fromjson("{$project: {_id: true, x: true}}");
+
+ auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, projectSpecObj), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(2u, serialized.size());
+
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[1]);
+}
+
+TEST_F(InternalUnpackBucketStageTest,
+ OptimizeDoesNotAddProjectWhenViableNonBoolInclusionProjectExists) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto pipeline = Pipeline::parse(
+ makeVector(unpackSpecObj, fromjson("{$project: {_id: 1, x: 1.0, y: 1.5}}")), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(2u, serialized.size());
+
+ const UnorderedFieldsBSONObjComparator kComparator;
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_EQ(
+ kComparator.compare(fromjson("{$project: {_id: true, x: true, y: true}}"), serialized[1]),
+ 0);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeDoesNotAddProjectWhenViableExclusionProjectExists) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto projectSpecObj = fromjson("{$project: {_id: false, x: false}}");
+
+ auto pipeline = Pipeline::parse(makeVector(unpackSpecObj, projectSpecObj), getExpCtx());
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(2u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(2u, serialized.size());
+
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[1]);
+}
+
+TEST_F(InternalUnpackBucketStageTest, OptimizeAddsInclusionProjectInsteadOfViableExclusionProject) {
+ auto unpackSpecObj = fromjson("{$_internalUnpackBucket: { exclude: [], timeField: 'foo'}}");
+ auto projectSpecObj = fromjson("{$project: {_id: false, x: false}}");
+ auto sortSpecObj = fromjson("{$sort: {y: 1}}");
+ auto groupSpecObj = fromjson("{$group: {_id: '$y', f: {$first: '$z'}}}");
+
+ auto pipeline = Pipeline::parse(
+ makeVector(unpackSpecObj, projectSpecObj, sortSpecObj, groupSpecObj), getExpCtx());
+ ASSERT_EQ(4u, pipeline->getSources().size());
+
+ pipeline->optimizePipeline();
+ ASSERT_EQ(5u, pipeline->getSources().size());
+
+ auto serialized = pipeline->serializeToBson();
+ ASSERT_EQ(5u, serialized.size());
+
+ const UnorderedFieldsBSONObjComparator kComparator;
+ ASSERT_BSONOBJ_EQ(unpackSpecObj, serialized[0]);
+ ASSERT_EQ(
+ kComparator.compare(fromjson("{$project: {_id: false, y: true, z: true}}"), serialized[1]),
+ 0);
+ ASSERT_BSONOBJ_EQ(projectSpecObj, serialized[2]);
+ ASSERT_BSONOBJ_EQ(sortSpecObj, serialized[3]);
+ ASSERT_BSONOBJ_EQ(groupSpecObj, serialized[4]);
+}
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/pipeline/pipeline.cpp b/src/mongo/db/pipeline/pipeline.cpp
index 33f4cfc7dac..b54b76ab0a8 100644
--- a/src/mongo/db/pipeline/pipeline.cpp
+++ b/src/mongo/db/pipeline/pipeline.cpp
@@ -484,12 +484,23 @@ void Pipeline::addFinalSource(intrusive_ptr<DocumentSource> source) {
_sources.push_back(source);
}
-DepsTracker Pipeline::getDependencies(QueryMetadataBitSet unavailableMetadata) const {
- DepsTracker deps(unavailableMetadata);
+DepsTracker Pipeline::getDependencies(
+ boost::optional<QueryMetadataBitSet> unavailableMetadata) const {
+ return getDependenciesForContainer(getContext(), _sources, unavailableMetadata);
+}
+
+DepsTracker Pipeline::getDependenciesForContainer(
+ const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ const SourceContainer& container,
+ boost::optional<QueryMetadataBitSet> unavailableMetadata) {
+ // If 'unavailableMetadata' was not specified, we assume all metadata is available. This allows
+ // us to call 'deps.setNeedsMetadata()' without throwing.
+ DepsTracker deps(unavailableMetadata.get_value_or(DepsTracker::kNoMetadata));
+
bool hasUnsupportedStage = false;
bool knowAllFields = false;
bool knowAllMeta = false;
- for (auto&& source : _sources) {
+ for (auto&& source : container) {
DepsTracker localDeps(deps.getUnavailableMetadata());
DepsTracker::State status = source->getDependencies(&localDeps);
@@ -521,11 +532,11 @@ DepsTracker Pipeline::getDependencies(QueryMetadataBitSet unavailableMetadata) c
if (!knowAllFields)
deps.needWholeDocument = true; // don't know all fields we need
- if (!unavailableMetadata[DocumentMetadataFields::kTextScore]) {
+ if (!deps.getUnavailableMetadata()[DocumentMetadataFields::kTextScore]) {
// There is a text score available. If we are the first half of a split pipeline, then we
// have to assume future stages might depend on the textScore (unless we've encountered a
// stage that doesn't preserve metadata).
- if (getContext()->needsMerge && !knowAllMeta) {
+ if (expCtx->needsMerge && !knowAllMeta) {
deps.setNeedsMetadata(DocumentMetadataFields::kTextScore, true);
}
} else {
diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h
index 93663a68abf..fa3b3d6167a 100644
--- a/src/mongo/db/pipeline/pipeline.h
+++ b/src/mongo/db/pipeline/pipeline.h
@@ -268,9 +268,22 @@ public:
/**
* Returns the dependencies needed by this pipeline. 'unavailableMetadata' should reflect what
- * metadata is not present on documents that are input to the front of the pipeline.
+ * metadata is not present on documents that are input to the front of the pipeline. If
+ * 'unavailableMetadata' is specified, this method will throw if any of the dependencies
+ * reference unavailable metadata.
*/
- DepsTracker getDependencies(QueryMetadataBitSet unavailableMetadata) const;
+ DepsTracker getDependencies(boost::optional<QueryMetadataBitSet> unavailableMetadata) const;
+
+ /**
+ * Returns the dependencies needed by the SourceContainer. 'unavailableMetadata' should reflect
+ * what metadata is not present on documents that are input to the front of the pipeline. If
+ * 'unavailableMetadata' is specified, this method will throw if any of the dependencies
+ * reference unavailable metadata.
+ */
+ static DepsTracker getDependenciesForContainer(
+ const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ const SourceContainer& container,
+ boost::optional<QueryMetadataBitSet> unavailableMetadata);
const SourceContainer& getSources() const {
return _sources;