diff options
author | Steve Tarzia <steve.tarzia@mongodb.com> | 2022-09-23 14:12:53 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-23 15:07:34 +0000 |
commit | 538c5d15bf6f84fcfab9328fdf9857e120321e00 (patch) | |
tree | 302af8dd18aeda20fb24911b1f170a5b384aac7c | |
parent | 125607bc02dfe1815bcef4eac4cdd6246b6445a7 (diff) | |
download | mongo-538c5d15bf6f84fcfab9328fdf9857e120321e00.tar.gz |
SERVER-68677 Skip row store projection in column scan plans when possible
-rw-r--r-- | jstests/core/column_scan_skip_row_store_projection.js | 227 | ||||
-rw-r--r-- | jstests/core/null_query_semantics.js | 7 | ||||
-rw-r--r-- | jstests/libs/parallelTester.js | 1 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 5 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 66 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.h | 9 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 25 |
11 files changed, 356 insertions, 55 deletions
diff --git a/jstests/core/column_scan_skip_row_store_projection.js b/jstests/core/column_scan_skip_row_store_projection.js new file mode 100644 index 00000000000..23dcb6b2ae9 --- /dev/null +++ b/jstests/core/column_scan_skip_row_store_projection.js @@ -0,0 +1,227 @@ +/** + * Tests that the row store expression is skipped when there is an appropriate group or projection + * above a COLUMN_SCAN stage. + * + * @tags: [ + * # explain is not supported in transactions + * does_not_support_transactions, + * requires_pipeline_optimization, + * # Runs explain on an aggregate command which is only compatible with readConcern local. + * assumes_read_concern_unchanged, + * # explain will be different in a sharded collection + * assumes_unsharded_collection, + * # column store row store expression skipping is new in 6.2. + * requires_fcv_62, + * uses_column_store_index, + * ] + */ +(function() { +"use strict"; + +load('jstests/aggregation/extras/utils.js'); // For assertArrayEq. +load("jstests/libs/sbe_util.js"); // For checkSBEEnabled. +// For areAllCollectionsClustered. +load("jstests/libs/clustered_collections/clustered_collection_util.js"); + +const columnstoreEnabled = checkSBEEnabled( + db, ["featureFlagColumnstoreIndexes", "featureFlagSbeFull"], true /* checkAllNodes */); +if (!columnstoreEnabled) { + jsTestLog("Skipping columnstore index test since the feature flag is not enabled."); + return; +} + +const indexedColl = db.column_scan_skip_row_store_projection_indexed; +const unindexedColl = db.column_scan_skip_row_store_projection_unindexed; + +function setupCollections() { + indexedColl.drop(); + unindexedColl.drop(); + assert.commandWorked(indexedColl.createIndex({"$**": "columnstore"})); + + const docs = [ + {_id: "a_number", a: 4}, + {_id: "a_subobject_c_not_null", a: {c: "hi"}}, + {_id: "a_subobject_c_null", a: {c: null}}, + {_id: "a_subobject_c_undefined", a: {c: undefined}}, + {_id: "no_a", b: 1}, + {_id: "a_and_b_nested", a: 2, b: {d: 1}}, + {_id: "a_nested_and_b_nested", a: {c: 5}, b: {d: {f: 2}}, e: 1}, + ]; + assert.commandWorked(indexedColl.insertMany(docs)); + assert.commandWorked(unindexedColl.insertMany(docs)); +} + +function test({agg, requiresRowStoreExpr, rowstoreFetches}) { + // Check that columnstore index is used, and we skip the row store expression appropriately. + const explainPlan = indexedColl.explain("queryPlanner").aggregate(agg); + let sbeStages = ('queryPlanner' in explainPlan) + // entirely SBE plan + ? explainPlan.queryPlanner.winningPlan.slotBasedPlan.stages + // SBE + classic plan + : explainPlan.stages[0]["$cursor"].queryPlanner.winningPlan.slotBasedPlan.stages; + assert(sbeStages.includes('COLUMN_SCAN'), `No COLUMN_SCAN in SBE stages: ${sbeStages}`); + const nullRegex = + /COLUMN_SCAN s.* ((s.*)|(none)) paths\[.*\] pathFilters\[.*\] rowStoreExpr\[\] @.* @.*/; + const notNullRegex = + /COLUMN_SCAN s.* ((s.*)|(none)) paths\[.*\] pathFilters\[.*\] rowStoreExpr\[.*, .*\] @.* @.*/; + if (requiresRowStoreExpr) { + assert(!nullRegex.test(sbeStages), `Don't expect null rowstoreExpr in ${sbeStages}`); + assert(notNullRegex.test(sbeStages), `Expected non-null rowstoreExpr in ${sbeStages}`); + } else { + assert(nullRegex.test(sbeStages), `Expected null rowStoreExpr in ${sbeStages}`); + assert(!notNullRegex.test(sbeStages), `Don't expect non-null rowStoreExpr in ${sbeStages}`); + } + // Check the expected number of row store fetches. + const explainExec = indexedColl.explain("executionStats").aggregate(agg); + const actualRowstoreFetches = + parseInt(JSON.stringify(explainExec).split('"numRowStoreFetches":')[1].split(",")[0]); + assert.eq( + actualRowstoreFetches, + rowstoreFetches, + `Unexpected nubmer of row store fetches in ${JSON.stringify(explainExec, null, '\t')}`); + + // Check that results are identical with and without columnstore index. + assertArrayEq({ + actual: indexedColl.aggregate(agg).toArray(), + expected: unindexedColl.aggregate(agg).toArray() + }); +} + +function runAllAggregations() { + // $project only. Requires row store expression regardless of nesting under the projected path. + test({agg: [{$project: {_id: 0, a: 1}}], requiresRowStoreExpr: true, rowstoreFetches: 4}); + test({agg: [{$project: {_id: 0, b: 1}}], requiresRowStoreExpr: true, rowstoreFetches: 2}); + + // $group only. + // The 4 cases below provide the same coverage but illustrate when row store fetches are needed. + test({ + agg: [{$group: {_id: null, a: {$push: "$a"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 4 + }); + test({ + agg: [{$group: {_id: null, b: {$push: "$b"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 2 + }); + test({ + agg: [{$group: {_id: null, e: {$push: "$e"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 0 + }); + test({ + agg: [{$group: {_id: "$_id", a: {$push: "$a"}, b: {$push: "$b"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 5 + }); + + // $group and $project, including _id. + test({ + agg: [{$project: {_id: 1, a: 1}}, {$group: {_id: "$_id", a: {$push: "$a"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 4 + }); + + // The rowStoreExpr is needed to prevent the $group from seeing b. + test({ + agg: [ + {$project: {_id: 1, a: 1}}, + {$group: {_id: "$_id", a: {$push: "$a"}, b: {$push: "$b"}}} + ], + requiresRowStoreExpr: true, + rowstoreFetches: 4 + }); + + // Same as above, but add another $group later that would be eligible for skipping the row store + // expression. + test({ + agg: [ + {$project: {_id: 1, a: 1}}, + {$group: {_id: "$_id", a: {$push: "$a"}, b: {$push: "$b"}}}, + {$project: {_id: 1, a: 1}}, + {$group: {_id: "$_id", a: {$push: "$a"}}} + ], + requiresRowStoreExpr: true, + rowstoreFetches: 4 + }); + + // $group and $project, excluding _id. + // Because _id is projected out, the $group will aggregate all docs together. The rowStoreExpr + // must not be skipped or else $group will behave incorrectly. + test({ + agg: [{$project: {_id: 0, a: 1}}, {$group: {_id: "$_id", a: {$push: "$a"}}}], + requiresRowStoreExpr: true, + rowstoreFetches: 4 + }); + + // $match with a filter that can be pushed down. + test({ + agg: [{$match: {a: 2}}, {$group: {_id: "$_id", b: {$push: "$b"}, a: {$push: "$a"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 1 + }); + + // Nested paths. + // The BrowserUsageByDistinctUserQuery that motivated this ticket is an example of this. + test({ + agg: [{$match: {"a.c": 5}}, {$group: {_id: "$_id", b_d: {$push: "$b.d"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 1 + }); + + // BrowserUsageByDistinctUserQuery from ColumnStoreIndex.yml in the genny repo. + // $addFields is not implemented in SBE, so this will have an SBE plan + an agg pipeline. + // This query does not match our documents, but the test checks for row store expression + // elimination. + test({ + agg: [ + {"$match": {"metadata.browser": {"$exists": true}}}, + { + "$addFields": + {"browserName": {"$arrayElemAt": [{"$split": ["$metadata.browser", " "]}, 0]}} + }, + { + "$match": { + "browserName": {"$nin": [null, "", "null"]}, + "created_at": {"$gte": ISODate("2020-03-10T01:17:41Z")} + } + }, + { + "$group": + {"_id": {"__alias_0": "$browserName"}, "__alias_1": {"$addToSet": "$user_id"}} + }, + { + "$project": + {"_id": 0, "__alias_0": "$_id.__alias_0", "__alias_1": {"$size": "$__alias_1"}} + }, + {"$project": {"label": "$__alias_0", "value": "$__alias_1", "_id": 0}}, + {"$limit": 5000} + ], + requiresRowStoreExpr: false, + rowstoreFetches: 0 + }); + + // Cases that may be improved by future work: + + // The limit below creates a Query Solution Node between the column scan and the group. + // Our optimization is not clever enough to see that the limit QSN is irrelevant. + test({ + agg: [{$limit: 100}, {$group: {_id: null, a: {$push: "$a"}}}], + requiresRowStoreExpr: true, // ideally this would be false + rowstoreFetches: 4 + }); + + // $match with a nested path filter than can be pushed down. + // This fails to even use the column store index. It should be able to in the future. + assert.throws(() => { + test({ + agg: [{$match: {"a.e": 1}}, {$group: {_id: "$_id", a: {$push: "$a"}}}], + requiresRowStoreExpr: false, + rowstoreFetches: 0 + }); + }); +} + +setupCollections(); +runAllAggregations(); +}()); diff --git a/jstests/core/null_query_semantics.js b/jstests/core/null_query_semantics.js index 1ff14cc710c..34e605db283 100644 --- a/jstests/core/null_query_semantics.js +++ b/jstests/core/null_query_semantics.js @@ -4,6 +4,8 @@ "use strict"; load("jstests/aggregation/extras/utils.js"); // For 'resultsEq'. +// For areAllCollectionsClustered. +load("jstests/libs/clustered_collections/clustered_collection_util.js"); function extractAValues(results) { return results.map(function(res) { @@ -699,6 +701,11 @@ const keyPatterns = [ {keyPattern: {"$**": 1}}, {keyPattern: {"a.$**": 1}} ]; +// Include Columnstore Index only if FF is enabled and collection is not clustered. +if (TestData.setParameters.hasOwnProperty("featureFlagColumnstoreIndexes") && + !ClusteredCollectionUtil.areAllCollectionsClustered(db.getMongo())) { + keyPatterns.push({keyPattern: {"$**": "columnstore"}}); +} // Test with a variety of other indexes. for (let indexSpec of keyPatterns) { diff --git a/jstests/libs/parallelTester.js b/jstests/libs/parallelTester.js index 92de2fa0878..9e18b0ad69c 100644 --- a/jstests/libs/parallelTester.js +++ b/jstests/libs/parallelTester.js @@ -260,6 +260,7 @@ if (typeof _threadInject != "undefined") { "columnstore_index_per_path_filters.js", "columnstore_large_array_index_correctness.js", "columnstore_validindex.js", + "column_scan_skip_row_store_projection.js", ]); // Get files, including files in subdirectories. diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index edc2f2e66cd..640894aa33b 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -659,11 +659,15 @@ PlanState ColumnScanStage::getNext() { value::bitcastFrom<const char*>(record->data.data())); if (_reconstructedRecordAccessor) { - // TODO: in absence of record expression set the reconstructed record to be the same - // as the record, retrieved from the row store. - invariant(_rowStoreExpr); - auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get()); - _reconstructedRecordAccessor->reset(owned, tag, val); + if (_rowStoreExpr) { + auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get()); + _reconstructedRecordAccessor->reset(owned, tag, val); + } else { + _reconstructedRecordAccessor->reset( + false, + value::TypeTags::bsonObject, + value::bitcastFrom<const char*>(record->data.data())); + } } } else { if (_reconstructedRecordAccessor) { @@ -762,7 +766,7 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const { } // Print out paths. - ret.emplace_back(DebugPrinter::Block("[`")); + ret.emplace_back(DebugPrinter::Block("paths[`")); for (size_t idx = 0; idx < _paths.size(); ++idx) { if (idx) { ret.emplace_back(DebugPrinter::Block("`,")); @@ -772,30 +776,28 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const { } ret.emplace_back(DebugPrinter::Block("`]")); - // Print out per-path filters (if any). - if (!_filteredPaths.empty()) { - ret.emplace_back(DebugPrinter::Block("[`")); - for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) { - if (idx) { - ret.emplace_back(DebugPrinter::Block("`;")); - } - - ret.emplace_back(str::stream() - << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": "); - DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId); - ret.emplace_back(DebugPrinter::Block("`,")); - DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint()); + // Print out per-path filters. + ret.emplace_back(DebugPrinter::Block("pathFilters[`")); + for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) { + if (idx) { + ret.emplace_back(DebugPrinter::Block("`;")); } - ret.emplace_back(DebugPrinter::Block("`]")); + + ret.emplace_back(str::stream() << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": "); + DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId); + ret.emplace_back(DebugPrinter::Block("`,")); + DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint()); } + ret.emplace_back(DebugPrinter::Block("`]")); + // Print out rowStoreExpression as [rowStoreSlot, rowStoreExpression] + ret.emplace_back(DebugPrinter::Block("rowStoreExpr[`")); if (_rowStoreExpr) { - ret.emplace_back(DebugPrinter::Block("[`")); DebugPrinter::addIdentifier(ret, _rowStoreSlot); ret.emplace_back(DebugPrinter::Block("`,")); DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint()); - ret.emplace_back(DebugPrinter::Block("`]")); } + ret.emplace_back(DebugPrinter::Block("`]")); ret.emplace_back("@\"`"); DebugPrinter::addIdentifier(ret, _collUuid.toString()); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index 8e954b63f9a..1eda315d94b 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -52,8 +52,9 @@ namespace sbe { * * Debug string representation: * - * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n] - * [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]? + * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none paths[path_1, ..., path_n] + * pathFilters[filter_path_1: filterSlot_1, filterExpr_1; ...] + * rowStoreExpr[slot, expr]|rowStoreExpr[] * collectionUuid indexName */ class ColumnScanStage final : public PlanStage { diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index 62afaa03e98..4b19c1fac59 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -626,26 +626,20 @@ void removeInclusionProjectionBelowGroupRecursive(QuerySolutionNode* solnRoot) { auto groupNode = static_cast<GroupNode*>(solnRoot); QuerySolutionNode* projectNodeCandidate = groupNode->children[0].get(); - if (projectNodeCandidate->getType() == StageType::STAGE_GROUP) { - // Multiple $group stages may be pushed down. So, if the child is a GROUP, then recurse. - return removeInclusionProjectionBelowGroupRecursive(projectNodeCandidate); - } else if (auto projection = attemptToGetProjectionFromQuerySolution(*projectNodeCandidate); - projection && projection.value()->isInclusionOnly()) { - // Check to see if the projectNode's field set is a super set of the groupNodes. - if (!isSubset(groupNode->requiredFields, projection.value()->getRequiredFields())) { - // The dependency set of the GROUP stage is wider than the projectNode field set. - return; - } - + if (auto projection = attemptToGetProjectionFromQuerySolution(*projectNodeCandidate); + // only eliminate inclusion projections + projection && projection.value()->isInclusionOnly() && + // only eliminate projections which preserve all fields used by the group + isSubset(groupNode->requiredFields, projection.value()->getRequiredFields())) { // Attach the projectNode's child directly as the groupNode's child, eliminating the // project node. groupNode->children[0] = std::move(projectNodeCandidate->children[0]); } - } else { - // Keep traversing the tree in search of a GROUP stage. - for (size_t i = 0; i < solnRoot->children.size(); ++i) { - removeInclusionProjectionBelowGroupRecursive(solnRoot->children[i].get()); - } + } + + // Keep traversing the tree in search of GROUP stages. + for (size_t i = 0; i < solnRoot->children.size(); ++i) { + removeInclusionProjectionBelowGroupRecursive(solnRoot->children[i].get()); } } @@ -682,6 +676,44 @@ std::unique_ptr<QuerySolution> QueryPlannerAnalysis::removeInclusionProjectionBe } // static +void QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(QuerySolutionNode& root) { + // If a group or projection's child is a COLUMN_SCAN node, try to eliminate the + // expression that projects documents retrieved from row store fallback. In other words, the + // COLUMN_SCAN's rowStoreExpression can be removed if it does not affect the group or + // project above. + for (auto& child : root.children) { + if (child->getType() == STAGE_COLUMN_SCAN) { + auto childColumnScan = static_cast<ColumnIndexScanNode*>(child.get()); + + // Look for group above column scan. + if (root.getType() == STAGE_GROUP) { + auto& parentGroup = static_cast<GroupNode&>(root); + // A row store expression that preserves all fields used by the parent $group is + // redundant and can be removed. + if (!childColumnScan->extraFieldsPermitted && + isSubset(parentGroup.requiredFields, childColumnScan->outputFields)) { + childColumnScan->extraFieldsPermitted = true; + } + } + // Look for projection above column scan. + else if (root.getType() == STAGE_PROJECTION_SIMPLE || + root.getType() == STAGE_PROJECTION_DEFAULT) { + auto& parentProjection = static_cast<ProjectionNode&>(root); + // A row store expression that preserves all fields used by the parent projection is + // redundant and can be removed. + if (!childColumnScan->extraFieldsPermitted && + isSubset(parentProjection.proj.getRequiredFields(), + childColumnScan->outputFields)) { + childColumnScan->extraFieldsPermitted = true; + } + } + } + // Recur on child. + removeUselessColumnScanRowStoreExpression(*child); + } +} + +// static std::pair<EqLookupNode::LookupStrategy, boost::optional<IndexEntry>> QueryPlannerAnalysis::determineLookupStrategy( const std::string& foreignCollName, @@ -1191,6 +1223,8 @@ std::unique_ptr<QuerySolution> QueryPlannerAnalysis::analyzeDataAccess( solnRoot = tryPushdownProjectBeneathSort(std::move(solnRoot)); + QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(*solnRoot); + soln->setRoot(std::move(solnRoot)); return soln; } diff --git a/src/mongo/db/query/planner_analysis.h b/src/mongo/db/query/planner_analysis.h index 3233ef82d7b..8f4c6ae3818 100644 --- a/src/mongo/db/query/planner_analysis.h +++ b/src/mongo/db/query/planner_analysis.h @@ -130,6 +130,15 @@ public: std::unique_ptr<QuerySolution> soln); /** + * Walks the QuerySolutionNode tree rooted in 'soln', and looks for a ColumnScan that + * is a child of either a Group or Projection. If the ColumnScan's parent will ignore + * extra fields, then eliminate its row store expression, allowing it to return extra fields + * in cases when it falls back to pulling the full document from the row store. + * If these conditions are not met this is a noop. + */ + static void removeUselessColumnScanRowStoreExpression(QuerySolutionNode& root); + + /** * For the provided 'foreignCollName' and 'foreignFieldName' corresponding to an EqLookupNode, * returns what join algorithm should be used to execute it. In particular: * - An empty array is produced for each document if the foreign collection does not exist. diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index ba398f446cb..c0e787c6d99 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -196,7 +196,7 @@ bool hintMatchesColumnStoreIndex(const BSONObj& hintObj, const ColumnIndexEntry& } /** - * Returns the dependencies for the CanoncialQuery, split by those needed to answer the filter, + * Returns the dependencies for the CanonicalQuery, split by those needed to answer the filter, * and those needed for "everything else" which is the project and sort. */ std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params, @@ -1601,7 +1601,12 @@ std::unique_ptr<QuerySolution> QueryPlanner::extendWithAggPipeline( } solution->extendWith(std::move(solnForAgg)); - return QueryPlannerAnalysis::removeInclusionProjectionBelowGroup(std::move(solution)); + + solution = QueryPlannerAnalysis::removeInclusionProjectionBelowGroup(std::move(solution)); + + QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(*solution->root()); + + return std::move(solution); } StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::choosePlanForSubqueries( diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 5dd0b545497..2e040bd710b 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -1102,13 +1102,15 @@ ColumnIndexScanNode::ColumnIndexScanNode(ColumnIndexEntry indexEntry, OrderedPathSet matchFieldsIn, OrderedPathSet allFieldsIn, StringMap<std::unique_ptr<MatchExpression>> filtersByPath, - std::unique_ptr<MatchExpression> postAssemblyFilter) + std::unique_ptr<MatchExpression> postAssemblyFilter, + bool extraFieldsPermitted) : indexEntry(std::move(indexEntry)), outputFields(std::move(outputFieldsIn)), matchFields(std::move(matchFieldsIn)), allFields(std::move(allFieldsIn)), filtersByPath(std::move(filtersByPath)), - postAssemblyFilter(std::move(postAssemblyFilter)) {} + postAssemblyFilter(std::move(postAssemblyFilter)), + extraFieldsPermitted(extraFieldsPermitted) {} void ColumnIndexScanNode::appendToString(str::stream* ss, int indent) const { addIndent(ss, indent); diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 96af62b4a90..a7182a4251d 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -516,7 +516,8 @@ struct ColumnIndexScanNode : public QuerySolutionNode { OrderedPathSet matchFields, OrderedPathSet allFields, StringMap<std::unique_ptr<MatchExpression>> filtersByPath, - std::unique_ptr<MatchExpression> postAssemblyFilter); + std::unique_ptr<MatchExpression> postAssemblyFilter, + bool extraFieldsPermitted = false); virtual StageType getType() const { return STAGE_COLUMN_SCAN; @@ -549,7 +550,8 @@ struct ColumnIndexScanNode : public QuerySolutionNode { matchFields, allFields, std::move(clonedFiltersByPath), - postAssemblyFilter->shallowClone()); + postAssemblyFilter->shallowClone(), + extraFieldsPermitted); } ColumnIndexEntry indexEntry; @@ -575,6 +577,10 @@ struct ColumnIndexScanNode : public QuerySolutionNode { // An optional filter to apply after assembling a document from all scanned columns. For // example: {$or: [{a: 2}, {b: 2}]}. std::unique_ptr<MatchExpression> postAssemblyFilter; + + // If set to true, we can include extra fields rather than project them out because projection + // happens anyway in a later stage (such a group stage). + bool extraFieldsPermitted; }; /** diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 80bb4ea4519..ab20b24f502 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -883,16 +883,23 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder }); } - // Generate the expression that is applied to the row store record(in the case when the result + // Generate the expression that is applied to the row store record (in the case when the result // cannot be reconstructed from the index). - optimizer::SlotVarMap slotMap{}; - slotMap[rootStr] = rowStoreSlot; - auto abt = builder.generateABT(); - // We might get null abt if no paths were added to the builder. It means we should be projecting - // an empty object. - tassert(6935000, "ABT must be valid if have fields to project", fieldsToProject.empty() || abt); - auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap) - : sbe::makeE<sbe::EFunction>("newObj", sbe::EExpression::Vector{}); + std::unique_ptr<sbe::EExpression> rowStoreExpr = nullptr; + + // Avoid generating the row store expression if the projection is not necessary, as indicated by + // the extraFieldsPermitted flag of the column store node. + if (boost::optional<optimizer::ABT> abt; + !csn->extraFieldsPermitted && (abt = builder.generateABT())) { + // We might get null abt if no paths were added to the builder. It means we should be + // projecting an empty object. + tassert( + 6935000, "ABT must be valid if have fields to project", fieldsToProject.empty() || abt); + optimizer::SlotVarMap slotMap{}; + slotMap[rootStr] = rowStoreSlot; + rowStoreExpr = abt ? abtToExpr(*abt, slotMap) + : sbe::makeE<sbe::EFunction>("newObj", sbe::EExpression::Vector{}); + } std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(), |