diff options
author | Ivan Fefer <ivan.fefer@mongodb.com> | 2022-10-20 07:42:58 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-20 08:38:46 +0000 |
commit | 9c348fbfafacc93b06d8d4c5521925c97150089f (patch) | |
tree | b2aa2b1bc00d6d24d8a136b3af307e1789f26b4d | |
parent | 28450cf43e1c28c2bf6b75c9fdfec6ad96ad584b (diff) | |
download | mongo-9c348fbfafacc93b06d8d4c5521925c97150089f.tar.gz |
SERVER-61284: Support simple projection optimization for simple exclusion projections
-rw-r--r-- | jstests/aggregation/bugs/exclusion_projection_does_not_affect_field_order.js | 13 | ||||
-rw-r--r-- | jstests/aggregation/optimize_away_pipeline.js | 8 | ||||
-rw-r--r-- | jstests/core/cover_null_queries.js | 4 | ||||
-rw-r--r-- | src/mongo/db/exec/projection.cpp | 43 | ||||
-rw-r--r-- | src/mongo/db/exec/projection.h | 9 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.cpp | 175 | ||||
-rw-r--r-- | src/mongo/db/pipeline/pipeline_d.h | 1 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/query/projection.cpp | 14 | ||||
-rw-r--r-- | src/mongo/db/query/projection.h | 21 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 14 |
11 files changed, 203 insertions, 123 deletions
diff --git a/jstests/aggregation/bugs/exclusion_projection_does_not_affect_field_order.js b/jstests/aggregation/bugs/exclusion_projection_does_not_affect_field_order.js index 4bc45bccef5..09d2239389a 100644 --- a/jstests/aggregation/bugs/exclusion_projection_does_not_affect_field_order.js +++ b/jstests/aggregation/bugs/exclusion_projection_does_not_affect_field_order.js @@ -2,6 +2,10 @@ // changes. // // This is designed as a regression test for SERVER-37791. +// @tags: [ +// do_not_wrap_aggregations_in_facets, +// requires_fcv_62, +// ] (function() { "use strict"; @@ -13,13 +17,14 @@ assert.commandWorked(coll.insert({_id: 2, c: 1})); assert.commandWorked(coll.insert({_id: 3, y: 1, z: 1})); // We expect $addFields to retain the position of pre-existing fields, and then append new fields in -// the order that they are specified in the query. This rule should not be impacted by the presence -// of a preceding exclusion projection. +// the order that they are specified in the query. This rule should not be impacted by exclusion +// projection on non-existent fields. However, depending on projection implementation, excluding a +// field that already exists can affect $addFields behaviour: an excluded field can be put in +// the original place or appended to the end of the document. assert.eq( [ {_id: 1, x: 3, y: 4, b: 5, c: 6, a: 7}, - // Here "c" retains the position that it had prior to being excluded. - {_id: 2, c: 6, x: 3, y: 4, b: 5, a: 7}, + {_id: 2, x: 3, y: 4, b: 5, c: 6, a: 7}, {_id: 3, y: 4, z: 1, x: 3, b: 5, c: 6, a: 7} ], coll.aggregate([ diff --git a/jstests/aggregation/optimize_away_pipeline.js b/jstests/aggregation/optimize_away_pipeline.js index 36921f42c34..d08bde70f5d 100644 --- a/jstests/aggregation/optimize_away_pipeline.js +++ b/jstests/aggregation/optimize_away_pipeline.js @@ -690,13 +690,11 @@ projStage = getAggPlanStage(explain, "PROJECTION_SIMPLE"); assert.neq(null, projStage, explain); assertTransformByShape({a: 1, b: 1, _id: 0}, projStage.transformBy, explain); -// Test that an exclusion projection at the front of the pipeline is not pushed down, if there no +// Test that an exclusion projection at the front of the pipeline is pushed down if there is no // finite dependency set. pipeline = [{$project: {x: 0}}]; -assertPipelineUsesAggregation({pipeline: pipeline, expectedStages: ["COLLSCAN"]}); -explain = coll.explain().aggregate(pipeline); -assert(!planHasStage(db, explain, "PROJECTION_SIMPLE"), explain); -assert(!planHasStage(db, explain, "PROJECTION_DEFAULT"), explain); +assertPipelineDoesNotUseAggregation( + {pipeline: pipeline, expectedStages: ["PROJECTION_SIMPLE", "COLLSCAN"]}); // Test that a computed projection at the front of the pipeline is pushed down, even if there's no // finite dependency set. diff --git a/jstests/core/cover_null_queries.js b/jstests/core/cover_null_queries.js index 50bf0b58b07..c17b9e71929 100644 --- a/jstests/core/cover_null_queries.js +++ b/jstests/core/cover_null_queries.js @@ -3,7 +3,7 @@ * @tags: [ * assumes_unsharded_collection, * requires_non_retryable_writes, - * requires_fcv_52 + * requires_fcv_62, * ] */ (function() { @@ -201,7 +201,7 @@ validateFindCmdOutputAndPlan({ filter: {a: null}, projection: {a: 0, b: 0}, expectedOutput: [{_id: 3}, {_id: 4}, {_id: 6}, {_id: 7}], - expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_DEFAULT": 1}, + expectedStages: {"IXSCAN": 1, "FETCH": 1, "PROJECTION_SIMPLE": 1}, }); // Verify find({a: null}, {_id: 1, b: 1}) is not covered by an index so we still have a FETCH stage. diff --git a/src/mongo/db/exec/projection.cpp b/src/mongo/db/exec/projection.cpp index 5f8a1bee2b0..1fb41002e96 100644 --- a/src/mongo/db/exec/projection.cpp +++ b/src/mongo/db/exec/projection.cpp @@ -220,7 +220,7 @@ ProjectionStageCovered::ProjectionStageCovered(ExpressionContext* expCtx, const BSONObj& coveredKeyObj) : ProjectionStage{expCtx, projObj, ws, std::move(child), "PROJECTION_COVERED"}, _coveredKeyObj{coveredKeyObj} { - invariant(projection->isSimple()); + invariant(projection->isSimple() && projection->isInclusionOnly()); // If we're pulling data out of one index we can pre-compute the indices of the fields // in the key that we pull data from and avoid looking up the field name each time. @@ -273,10 +273,14 @@ ProjectionStageSimple::ProjectionStageSimple(ExpressionContext* expCtx, const projection_ast::Projection* projection, WorkingSet* ws, std::unique_ptr<PlanStage> child) - : ProjectionStage{expCtx, projObj, ws, std::move(child), "PROJECTION_SIMPLE"} { + : ProjectionStage{expCtx, projObj, ws, std::move(child), "PROJECTION_SIMPLE"}, + _projectType(projection->type()) { invariant(projection->isSimple()); - _includedFields = {projection->getRequiredFields().begin(), - projection->getRequiredFields().end()}; + if (_projectType == projection_ast::ProjectType::kInclusion) { + _fields = {projection->getRequiredFields().begin(), projection->getRequiredFields().end()}; + } else { + _fields = {projection->getExcludedPaths().begin(), projection->getExcludedPaths().end()}; + } } void ProjectionStageSimple::transform(WorkingSetMember* member) const { @@ -285,17 +289,28 @@ void ProjectionStageSimple::transform(WorkingSetMember* member) const { // If we got here because of SIMPLE_DOC the planner shouldn't have messed up. invariant(member->hasObj()); - // Apply the SIMPLE_DOC projection. - // Look at every field in the source document and see if we're including it. + // Apply the SIMPLE_DOC projection: look at every top level field in the source document and + // see if we should keep it. auto objToProject = member->doc.value().toBson(); - auto nFieldsNeeded = _includedFields.size(); - for (auto&& elt : objToProject) { - auto fieldName{elt.fieldNameStringData()}; - absl::string_view fieldNameKey{fieldName.rawData(), fieldName.size()}; - if (auto fieldIt = _includedFields.find(fieldNameKey); _includedFields.end() != fieldIt) { - bob.append(elt); - if (--nFieldsNeeded == 0) { - break; + auto nFieldsLeft = _fields.size(); + + if (_projectType == projection_ast::ProjectType::kInclusion) { + for (auto&& elt : objToProject) { + auto fieldName{elt.fieldNameStringData()}; + if (_fields.count(fieldName) > 0) { + bob.append(elt); + if (--nFieldsLeft == 0) { + break; + } + } + } + } else { + for (auto&& elt : objToProject) { + auto fieldName{elt.fieldNameStringData()}; + if (nFieldsLeft == 0 || _fields.count(fieldName) == 0) { + bob.append(elt); + } else { + --nFieldsLeft; } } } diff --git a/src/mongo/db/exec/projection.h b/src/mongo/db/exec/projection.h index 00e7fb33dbc..3c86983bb85 100644 --- a/src/mongo/db/exec/projection.h +++ b/src/mongo/db/exec/projection.h @@ -147,9 +147,8 @@ private: }; /** - * This class is used when we expect an object and the following rules are met: the projection - * consists only of inclusions e.g. '{field: 1}', it has no $meta projections, it is not a returnKey - * projection and it has no dotted fields. + * This class is used when we expect an object and the following rules are met: it has no $meta + * projections, it is not a returnKey projection and it has no dotted fields. */ class ProjectionStageSimple final : public ProjectionStage { public: @@ -169,8 +168,8 @@ public: private: void transform(WorkingSetMember* member) const final; - // Has the field names present in the simple projection. - stdx::unordered_set<std::string> _includedFields; + const projection_ast::ProjectType _projectType; + FieldSet _fields; }; } // namespace mongo diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp index 106b2d2d0b6..04826cbaa78 100644 --- a/src/mongo/db/pipeline/pipeline_d.cpp +++ b/src/mongo/db/pipeline/pipeline_d.cpp @@ -47,6 +47,7 @@ #include "mongo/db/exec/fetch.h" #include "mongo/db/exec/multi_iterator.h" #include "mongo/db/exec/multi_plan.h" +#include "mongo/db/exec/projection.h" #include "mongo/db/exec/queued_data_stage.h" #include "mongo/db/exec/sample_from_timeseries_bucket.h" #include "mongo/db/exec/shard_filter.h" @@ -410,6 +411,48 @@ std::pair<DocumentSourceSample*, DocumentSourceInternalUnpackBucket*> extractSam return std::pair{sampleStage, unpackStage}; } +bool areSortFieldsModifiedByEventProjection(const SortPattern& sortPattern, + const DocumentSource::GetModPathsReturn& modPaths) { + return std::any_of(sortPattern.begin(), sortPattern.end(), [&](const auto& sortPatternPart) { + const auto& fieldPath = sortPatternPart.fieldPath; + return !fieldPath || modPaths.canModify(*fieldPath); + }); +} + +bool areSortFieldsModifiedByBucketProjection(const SortPattern& sortPattern, + const DocumentSource::GetModPathsReturn& modPaths) { + // The time field maps to control.min.[time], control.max.[time], or + // _id, and $_internalUnpackBucket assumes that all of those fields are + // preserved. (We never push down a stage that would overwrite them.) + + // Each field [meta].a.b.c maps to 'meta.a.b.c'. + auto rename = [&](const FieldPath& eventField) -> FieldPath { + if (eventField.getPathLength() == 1) + return timeseries::kBucketMetaFieldName; + return FieldPath{timeseries::kBucketMetaFieldName}.concat(eventField.tail()); + }; + + return std::any_of(sortPattern.begin(), + // Skip the last field, which is time: only check the meta fields + std::prev(sortPattern.end()), + [&](const auto& sortPatternPart) { + auto bucketFieldPath = rename(*sortPatternPart.fieldPath); + return modPaths.canModify(bucketFieldPath); + }); +} + +bool areSortFieldsModifiedByProjection(bool seenUnpack, + const SortPattern& sortPattern, + const DocumentSource::GetModPathsReturn& modPaths) { + if (seenUnpack) { + // This stage operates on events: check the event-level field names. + return areSortFieldsModifiedByEventProjection(sortPattern, modPaths); + } else { + // This stage operates on buckets: check the bucket-level field names. + return areSortFieldsModifiedByBucketProjection(sortPattern, modPaths); + } +} + std::tuple<DocumentSourceInternalUnpackBucket*, DocumentSourceSort*> findUnpackThenSort( const Pipeline::SourceContainer& sources) { DocumentSourceSort* sortStage = nullptr; @@ -862,16 +905,21 @@ SkipThenLimit extractSkipAndLimitForPushdown(Pipeline* pipeline) { * as is. * 2. If there is no inclusion projection at the front of the pipeline, but there is a finite * dependency set, a projection representing this dependency set will be pushed down. - * 3. Otherwise, an empty projection is returned and no projection push down will happen. + * 3. If there is an exclusion projection at the front of the pipeline, it will be pushed down. + * 4. Otherwise, an empty projection is returned and no projection push down will happen. * * If 'allowExpressions' is true, the returned projection may include expressions (which can only * happen in case 1). If 'allowExpressions' is false and the projection we find has expressions, * then we fall through to case 2 and attempt to push down a pure-inclusion projection based on its * dependencies. + * + * If 'timeseriesBoundedSortOptimization' is true, an exclusion projection won't be pushed down, + * because it breaks PlanExecutorImpl analysis required to enable this optimization. */ auto buildProjectionForPushdown(const DepsTracker& deps, Pipeline* pipeline, - bool allowExpressions) { + bool allowExpressions, + bool timeseriesBoundedSortOptimization) { auto&& sources = pipeline->getSources(); // Short-circuit if the pipeline is empty: there is no projection and nothing to push down. @@ -879,30 +927,49 @@ auto buildProjectionForPushdown(const DepsTracker& deps, return BSONObj(); } - if (const auto projStage = - exact_pointer_cast<DocumentSourceSingleDocumentTransformation*>(sources.front().get()); - projStage) { - if (projStage->getType() == TransformerInterface::TransformerType::kInclusionProjection) { - auto projObj = - projStage->getTransformer().serializeTransformation(boost::none).toBson(); - auto projAst = - projection_ast::parseAndAnalyze(projStage->getContext(), - projObj, - ProjectionPolicies::aggregateProjectionPolicies()); - if (!projAst.hasExpressions() || allowExpressions) { - // If there is an inclusion projection at the front of the pipeline, we have case 1. - sources.pop_front(); - return projObj; - } + const auto projStage = + exact_pointer_cast<DocumentSourceSingleDocumentTransformation*>(sources.front().get()); + const auto getProjectionObj = [&]() { + return projStage->getTransformer().serializeTransformation(boost::none).toBson(); + }; + const auto parseProjection = [&](const BSONObj& projObj) { + return projection_ast::parseAndAnalyze( + projStage->getContext(), projObj, ProjectionPolicies::aggregateProjectionPolicies()); + }; + + // If there is an inclusion projection at the front of the pipeline, we have case 1. + if (projStage && + projStage->getType() == TransformerInterface::TransformerType::kInclusionProjection) { + auto projObj = getProjectionObj(); + if (allowExpressions || !parseProjection(projObj).hasExpressions()) { + sources.pop_front(); + return projObj; } } - // Depending of whether there is a finite dependency set, either return a projection - // representing this dependency set, or an empty BSON, meaning no projection push down will - // happen. This covers cases 2 and 3. - if (deps.getNeedsAnyMetadata()) - return BSONObj(); - return deps.toProjectionWithoutMetadata(); + // If there is a finite dependency set, return a projection representing this dependency set. + // This is case 2. + if (!deps.getNeedsAnyMetadata()) { + BSONObj depsProjObj = deps.toProjectionWithoutMetadata(); + if (!depsProjObj.isEmpty()) { + return depsProjObj; + } + } + + // If there is an exclusion projection at the front of the pipeline, we have case 3. + if (projStage && + projStage->getType() == TransformerInterface::TransformerType::kExclusionProjection && + // TODO SERVER-70655: Remove this check and argument when it is no longer needed. + !timeseriesBoundedSortOptimization) { + auto projObj = getProjectionObj(); + if (allowExpressions || !parseProjection(projObj).hasExpressions()) { + sources.pop_front(); + return projObj; + } + } + + // Case 4: no projection to push down + return BSONObj(); } } // namespace @@ -1209,11 +1276,13 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll // If this is a query on a time-series collection then it may be eligible for a post-planning // sort optimization. We check eligibility and perform the rewrite here. auto [unpack, sort] = findUnpackThenSort(pipeline->_sources); - QueryPlannerParams plannerOpts; - if (serverGlobalParams.featureCompatibility.isVersionInitialized() && + const bool timeseriesBoundedSortOptimization = + serverGlobalParams.featureCompatibility.isVersionInitialized() && feature_flags::gFeatureFlagBucketUnpackWithSort.isEnabled( serverGlobalParams.featureCompatibility) && - unpack && sort) { + unpack && sort; + QueryPlannerParams plannerOpts; + if (timeseriesBoundedSortOptimization) { plannerOpts.traversalPreference = createTimeSeriesTraversalPreference(unpack, sort); } @@ -1231,14 +1300,12 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll aggRequest, Pipeline::kAllowedMatcherFeatures, &shouldProduceEmptyDocs, + timeseriesBoundedSortOptimization, std::move(plannerOpts))); // If this is a query on a time-series collection then it may be eligible for a post-planning // sort optimization. We check eligibility and perform the rewrite here. - if (serverGlobalParams.featureCompatibility.isVersionInitialized() && - feature_flags::gFeatureFlagBucketUnpackWithSort.isEnabled( - serverGlobalParams.featureCompatibility) && - unpack && sort) { + if (timeseriesBoundedSortOptimization) { auto execImpl = dynamic_cast<PlanExecutorImpl*>(exec.get()); if (execImpl) { // Get source stage @@ -1330,45 +1397,8 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll dynamic_cast<const DocumentSourceSingleDocumentTransformation*>( iter->get())) { auto modPaths = projection->getModifiedPaths(); - - // Check to see if the sort paths are modified. - if (seenUnpack) { - // This stage operates on events: check the event-level field names. - for (auto sortIter = sortPattern.begin(); - !badStage && sortIter != sortPattern.end(); - ++sortIter) { - - auto fieldPath = sortIter->fieldPath; - // If they are then escape the loop & don't optimize. - if (!fieldPath || modPaths.canModify(*fieldPath)) { - badStage = true; - } - } - } else { - // This stage operates on buckets: check the bucket-level field names. - - // The time field maps to control.min.[time], control.max.[time], or - // _id, and $_internalUnpackBucket assumes that all of those fields are - // preserved. (We never push down a stage that would overwrite them.) - - // Each field [meta].a.b.c maps to 'meta.a.b.c'. - auto rename = [&](const FieldPath& eventField) -> FieldPath { - if (eventField.getPathLength() == 1) - return timeseries::kBucketMetaFieldName; - return FieldPath{timeseries::kBucketMetaFieldName}.concat( - eventField.tail()); - }; - - for (auto sortIter = sortPattern.begin(), - // Skip the last field, which is time: only check the meta - // fields. - end = std::prev(sortPattern.end()); - !badStage && sortIter != end; - ++sortIter) { - auto bucketFieldPath = rename(*sortIter->fieldPath); - if (modPaths.canModify(bucketFieldPath)) - badStage = true; - } + if (areSortFieldsModifiedByProjection(seenUnpack, sortPattern, modPaths)) { + badStage = true; } } else { badStage = true; @@ -1529,7 +1559,8 @@ PipelineD::buildInnerQueryExecutorGeoNear(const MultipleCollectionAccessor& coll SkipThenLimit{boost::none, boost::none}, aggRequest, Pipeline::kGeoNearMatcherFeatures, - &shouldProduceEmptyDocs)); + &shouldProduceEmptyDocs, + false /* timeseriesBoundedSortOptimization */)); auto attachExecutorCallback = [distanceField = geoNearStage->getDistanceField(), locationField = geoNearStage->getLocationField(), @@ -1564,6 +1595,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep const AggregateCommandRequest* aggRequest, const MatchExpressionParser::AllowedFeatureSet& matcherFeatures, bool* hasNoRequirements, + bool timeseriesBoundedSortOptimization, QueryPlannerParams plannerOpts) { invariant(hasNoRequirements); @@ -1635,7 +1667,8 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep // documents that the sort/skip/limit would have filtered out. (The sort stage can be a // top-k sort, which both sorts and limits.) bool allowExpressions = !sortStage && !skipThenLimit.getSkip() && !skipThenLimit.getLimit(); - projObj = buildProjectionForPushdown(deps, pipeline, allowExpressions); + projObj = buildProjectionForPushdown( + deps, pipeline, allowExpressions, timeseriesBoundedSortOptimization); plannerOpts.options |= QueryPlannerParams::RETURN_OWNED_DATA; } diff --git a/src/mongo/db/pipeline/pipeline_d.h b/src/mongo/db/pipeline/pipeline_d.h index c109e75b1b8..c9171748b1c 100644 --- a/src/mongo/db/pipeline/pipeline_d.h +++ b/src/mongo/db/pipeline/pipeline_d.h @@ -205,6 +205,7 @@ private: const AggregateCommandRequest* aggRequest, const MatchExpressionParser::AllowedFeatureSet& matcherFeatures, bool* hasNoRequirements, + bool timeseriesBoundedSortOptimization, QueryPlannerParams plannerOpts = QueryPlannerParams{}); /** diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index 7cbc7116a92..8c9a4f6ff84 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -421,6 +421,7 @@ std::unique_ptr<QuerySolutionNode> analyzeProjection(const CanonicalQuery& query // its generic nature. We will attempt to avoid that for some "fast paths" first. // All fast paths can only apply to "simple" projections - see the implementation for details. if (projection.isSimple()) { + const bool isInclusionOnly = projection.isInclusionOnly(); // First fast path: We have a COLUMN_SCAN providing the data, there are no computed // expressions, and the requested fields are provided exactly. For 'simple' projections // which must have only top-level fields, A COLUMN_SCAN can provide data in a format safe to @@ -428,7 +429,7 @@ std::unique_ptr<QuerySolutionNode> analyzeProjection(const CanonicalQuery& query // outputting exactly the set of fields that the user required. This may not be the case all // the time if say we needed an extra field for a sort or for shard filtering. const auto* columnScan = treeSourceIsColumnScan(solnRoot.get()); - if (columnScan && + if (columnScan && isInclusionOnly && columnScan->outputFields.size() == projection.getRequiredFields().size() && // TODO SERVER-64258 once filtering is supported we should be able to have meaningful // support for matched but not output fields. Until then, any match fields are treated @@ -449,15 +450,18 @@ std::unique_ptr<QuerySolutionNode> analyzeProjection(const CanonicalQuery& query addSortKeyGeneratorStageIfNeeded(query, hasSortStage, std::move(solnRoot)), *query.root(), projection); - } else if (auto coveredKeyObj = produceCoveredKeyObj(solnRoot.get()); - !coveredKeyObj.isEmpty()) { - // Final fast path: ProjectionNodeCovered for plans with an index scan that the - // projection can cover. - return std::make_unique<ProjectionNodeCovered>( - addSortKeyGeneratorStageIfNeeded(query, hasSortStage, std::move(solnRoot)), - *query.root(), - projection, - std::move(coveredKeyObj)); + } + if (isInclusionOnly) { + auto coveredKeyObj = produceCoveredKeyObj(solnRoot.get()); + if (!coveredKeyObj.isEmpty()) { + // Final fast path: ProjectionNodeCovered for plans with an index scan that the + // projection can cover. + return std::make_unique<ProjectionNodeCovered>( + addSortKeyGeneratorStageIfNeeded(query, hasSortStage, std::move(solnRoot)), + *query.root(), + projection, + std::move(coveredKeyObj)); + } } } diff --git a/src/mongo/db/query/projection.cpp b/src/mongo/db/query/projection.cpp index f5195a15f5f..6ccabc3174c 100644 --- a/src/mongo/db/query/projection.cpp +++ b/src/mongo/db/query/projection.cpp @@ -46,9 +46,14 @@ namespace { */ struct DepsAnalysisData { DepsTracker fieldDependencyTracker; + OrderedPathSet excludedPaths; - void addRequiredField(const std::string& fieldName) { - fieldDependencyTracker.fields.insert(fieldName); + void addRequiredField(std::string fieldName) { + fieldDependencyTracker.fields.insert(std::move(fieldName)); + } + + void addExcludedPath(std::string path) { + excludedPaths.insert(std::move(path)); } OrderedPathSet requiredFields() const { @@ -172,6 +177,8 @@ public: // For inclusions, we depend on the field. if (node->value()) { addFullPathAsDependency(); + } else { + _context->data().addExcludedPath(_context->fullPath().fullPath()); } } @@ -206,10 +213,11 @@ auto analyzeProjection(const ProjectionPathASTNode* root, ProjectType type) { const auto& tracker = userData.fieldDependencyTracker; if (type == ProjectType::kInclusion) { - deps.requiredFields = userData.requiredFields(); + deps.paths = userData.requiredFields(); } else { invariant(type == ProjectType::kExclusion); deps.requiresDocument = true; + deps.paths = std::move(userData.excludedPaths); } deps.metadataRequested = tracker.metadataDeps(); diff --git a/src/mongo/db/query/projection.h b/src/mongo/db/query/projection.h index d1336db9754..b0d539325ba 100644 --- a/src/mongo/db/query/projection.h +++ b/src/mongo/db/query/projection.h @@ -48,8 +48,10 @@ struct ProjectionDependencies { bool hasExpressions = false; bool containsElemMatch = false; - // Which fields are necessary to perform the projection, or boost::none if all are required. - boost::optional<OrderedPathSet> requiredFields; + // If inclusion projection, contains field paths that are necessary to perform the projection, + // or boost::none if all are required. If exclusion projection, contains field paths that are + // explicitly excluded. + boost::optional<OrderedPathSet> paths; bool hasDottedPath = false; @@ -97,7 +99,12 @@ public: */ const OrderedPathSet& getRequiredFields() const { invariant(_type == ProjectType::kInclusion); - return *_deps.requiredFields; + return *_deps.paths; + } + + const OrderedPathSet& getExcludedPaths() const { + invariant(_type == ProjectType::kExclusion); + return *_deps.paths; } const QueryMetadataBitSet& metadataDeps() const { @@ -112,13 +119,13 @@ public: bool isFieldRetainedExactly(StringData path) const; /** - * A projection is considered "simple" if it doesn't require the full document, operates only - * on top-level fields, has no positional projection or expressions, and doesn't require - * metadata. + * A projection is considered "simple" if it operates only on top-level fields, + * has no positional projection or expressions, and doesn't require metadata. + * Both exclusion and inclusion projections can be simple. */ bool isSimple() const { return !_deps.hasDottedPath && !_deps.requiresMatchDetails && - !_deps.metadataRequested.any() && !_deps.requiresDocument && !_deps.hasExpressions; + !_deps.metadataRequested.any() && !_deps.hasExpressions; } /** diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 3b38d9564dd..79922a1376d 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -1739,12 +1739,22 @@ SlotBasedStageBuilder::buildProjectionSimple(const QuerySolutionNode* root, const auto childResult = outputs.get(kResult); + sbe::MakeBsonObjStage::FieldBehavior behaviour; + const OrderedPathSet* fields; + if (pn->proj.type() == projection_ast::ProjectType::kInclusion) { + behaviour = sbe::MakeBsonObjStage::FieldBehavior::keep; + fields = &pn->proj.getRequiredFields(); + } else { + behaviour = sbe::MakeBsonObjStage::FieldBehavior::drop; + fields = &pn->proj.getExcludedPaths(); + } + outputs.set(kResult, _slotIdGenerator.generate()); inputStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inputStage), outputs.get(kResult), childResult, - sbe::MakeBsonObjStage::FieldBehavior::keep, - pn->proj.getRequiredFields(), + behaviour, + *fields, OrderedPathSet{}, sbe::value::SlotVector{}, true, |