diff options
author | Steve Tarzia <steve.tarzia@mongodb.com> | 2022-09-23 14:12:53 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-23 15:07:34 +0000 |
commit | 538c5d15bf6f84fcfab9328fdf9857e120321e00 (patch) | |
tree | 302af8dd18aeda20fb24911b1f170a5b384aac7c /src/mongo | |
parent | 125607bc02dfe1815bcef4eac4cdd6246b6445a7 (diff) | |
download | mongo-538c5d15bf6f84fcfab9328fdf9857e120321e00.tar.gz |
SERVER-68677 Skip row store projection in column scan plans when possible
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/column_scan.h | 5 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.cpp | 66 | ||||
-rw-r--r-- | src/mongo/db/query/planner_analysis.h | 9 | ||||
-rw-r--r-- | src/mongo/db/query/query_planner.cpp | 9 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/query/query_solution.h | 10 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 25 |
8 files changed, 121 insertions, 55 deletions
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp index edc2f2e66cd..640894aa33b 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp @@ -659,11 +659,15 @@ PlanState ColumnScanStage::getNext() { value::bitcastFrom<const char*>(record->data.data())); if (_reconstructedRecordAccessor) { - // TODO: in absence of record expression set the reconstructed record to be the same - // as the record, retrieved from the row store. - invariant(_rowStoreExpr); - auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get()); - _reconstructedRecordAccessor->reset(owned, tag, val); + if (_rowStoreExpr) { + auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get()); + _reconstructedRecordAccessor->reset(owned, tag, val); + } else { + _reconstructedRecordAccessor->reset( + false, + value::TypeTags::bsonObject, + value::bitcastFrom<const char*>(record->data.data())); + } } } else { if (_reconstructedRecordAccessor) { @@ -762,7 +766,7 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const { } // Print out paths. - ret.emplace_back(DebugPrinter::Block("[`")); + ret.emplace_back(DebugPrinter::Block("paths[`")); for (size_t idx = 0; idx < _paths.size(); ++idx) { if (idx) { ret.emplace_back(DebugPrinter::Block("`,")); @@ -772,30 +776,28 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const { } ret.emplace_back(DebugPrinter::Block("`]")); - // Print out per-path filters (if any). - if (!_filteredPaths.empty()) { - ret.emplace_back(DebugPrinter::Block("[`")); - for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) { - if (idx) { - ret.emplace_back(DebugPrinter::Block("`;")); - } - - ret.emplace_back(str::stream() - << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": "); - DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId); - ret.emplace_back(DebugPrinter::Block("`,")); - DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint()); + // Print out per-path filters. + ret.emplace_back(DebugPrinter::Block("pathFilters[`")); + for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) { + if (idx) { + ret.emplace_back(DebugPrinter::Block("`;")); } - ret.emplace_back(DebugPrinter::Block("`]")); + + ret.emplace_back(str::stream() << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": "); + DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId); + ret.emplace_back(DebugPrinter::Block("`,")); + DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint()); } + ret.emplace_back(DebugPrinter::Block("`]")); + // Print out rowStoreExpression as [rowStoreSlot, rowStoreExpression] + ret.emplace_back(DebugPrinter::Block("rowStoreExpr[`")); if (_rowStoreExpr) { - ret.emplace_back(DebugPrinter::Block("[`")); DebugPrinter::addIdentifier(ret, _rowStoreSlot); ret.emplace_back(DebugPrinter::Block("`,")); DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint()); - ret.emplace_back(DebugPrinter::Block("`]")); } + ret.emplace_back(DebugPrinter::Block("`]")); ret.emplace_back("@\"`"); DebugPrinter::addIdentifier(ret, _collUuid.toString()); diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h index 8e954b63f9a..1eda315d94b 100644 --- a/src/mongo/db/exec/sbe/stages/column_scan.h +++ b/src/mongo/db/exec/sbe/stages/column_scan.h @@ -52,8 +52,9 @@ namespace sbe { * * Debug string representation: * - * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n] - * [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]? + * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none paths[path_1, ..., path_n] + * pathFilters[filter_path_1: filterSlot_1, filterExpr_1; ...] + * rowStoreExpr[slot, expr]|rowStoreExpr[] * collectionUuid indexName */ class ColumnScanStage final : public PlanStage { diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp index 62afaa03e98..4b19c1fac59 100644 --- a/src/mongo/db/query/planner_analysis.cpp +++ b/src/mongo/db/query/planner_analysis.cpp @@ -626,26 +626,20 @@ void removeInclusionProjectionBelowGroupRecursive(QuerySolutionNode* solnRoot) { auto groupNode = static_cast<GroupNode*>(solnRoot); QuerySolutionNode* projectNodeCandidate = groupNode->children[0].get(); - if (projectNodeCandidate->getType() == StageType::STAGE_GROUP) { - // Multiple $group stages may be pushed down. So, if the child is a GROUP, then recurse. - return removeInclusionProjectionBelowGroupRecursive(projectNodeCandidate); - } else if (auto projection = attemptToGetProjectionFromQuerySolution(*projectNodeCandidate); - projection && projection.value()->isInclusionOnly()) { - // Check to see if the projectNode's field set is a super set of the groupNodes. - if (!isSubset(groupNode->requiredFields, projection.value()->getRequiredFields())) { - // The dependency set of the GROUP stage is wider than the projectNode field set. - return; - } - + if (auto projection = attemptToGetProjectionFromQuerySolution(*projectNodeCandidate); + // only eliminate inclusion projections + projection && projection.value()->isInclusionOnly() && + // only eliminate projections which preserve all fields used by the group + isSubset(groupNode->requiredFields, projection.value()->getRequiredFields())) { // Attach the projectNode's child directly as the groupNode's child, eliminating the // project node. groupNode->children[0] = std::move(projectNodeCandidate->children[0]); } - } else { - // Keep traversing the tree in search of a GROUP stage. - for (size_t i = 0; i < solnRoot->children.size(); ++i) { - removeInclusionProjectionBelowGroupRecursive(solnRoot->children[i].get()); - } + } + + // Keep traversing the tree in search of GROUP stages. + for (size_t i = 0; i < solnRoot->children.size(); ++i) { + removeInclusionProjectionBelowGroupRecursive(solnRoot->children[i].get()); } } @@ -682,6 +676,44 @@ std::unique_ptr<QuerySolution> QueryPlannerAnalysis::removeInclusionProjectionBe } // static +void QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(QuerySolutionNode& root) { + // If a group or projection's child is a COLUMN_SCAN node, try to eliminate the + // expression that projects documents retrieved from row store fallback. In other words, the + // COLUMN_SCAN's rowStoreExpression can be removed if it does not affect the group or + // project above. + for (auto& child : root.children) { + if (child->getType() == STAGE_COLUMN_SCAN) { + auto childColumnScan = static_cast<ColumnIndexScanNode*>(child.get()); + + // Look for group above column scan. + if (root.getType() == STAGE_GROUP) { + auto& parentGroup = static_cast<GroupNode&>(root); + // A row store expression that preserves all fields used by the parent $group is + // redundant and can be removed. + if (!childColumnScan->extraFieldsPermitted && + isSubset(parentGroup.requiredFields, childColumnScan->outputFields)) { + childColumnScan->extraFieldsPermitted = true; + } + } + // Look for projection above column scan. + else if (root.getType() == STAGE_PROJECTION_SIMPLE || + root.getType() == STAGE_PROJECTION_DEFAULT) { + auto& parentProjection = static_cast<ProjectionNode&>(root); + // A row store expression that preserves all fields used by the parent projection is + // redundant and can be removed. + if (!childColumnScan->extraFieldsPermitted && + isSubset(parentProjection.proj.getRequiredFields(), + childColumnScan->outputFields)) { + childColumnScan->extraFieldsPermitted = true; + } + } + } + // Recur on child. + removeUselessColumnScanRowStoreExpression(*child); + } +} + +// static std::pair<EqLookupNode::LookupStrategy, boost::optional<IndexEntry>> QueryPlannerAnalysis::determineLookupStrategy( const std::string& foreignCollName, @@ -1191,6 +1223,8 @@ std::unique_ptr<QuerySolution> QueryPlannerAnalysis::analyzeDataAccess( solnRoot = tryPushdownProjectBeneathSort(std::move(solnRoot)); + QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(*solnRoot); + soln->setRoot(std::move(solnRoot)); return soln; } diff --git a/src/mongo/db/query/planner_analysis.h b/src/mongo/db/query/planner_analysis.h index 3233ef82d7b..8f4c6ae3818 100644 --- a/src/mongo/db/query/planner_analysis.h +++ b/src/mongo/db/query/planner_analysis.h @@ -130,6 +130,15 @@ public: std::unique_ptr<QuerySolution> soln); /** + * Walks the QuerySolutionNode tree rooted in 'soln', and looks for a ColumnScan that + * is a child of either a Group or Projection. If the ColumnScan's parent will ignore + * extra fields, then eliminate its row store expression, allowing it to return extra fields + * in cases when it falls back to pulling the full document from the row store. + * If these conditions are not met this is a noop. + */ + static void removeUselessColumnScanRowStoreExpression(QuerySolutionNode& root); + + /** * For the provided 'foreignCollName' and 'foreignFieldName' corresponding to an EqLookupNode, * returns what join algorithm should be used to execute it. In particular: * - An empty array is produced for each document if the foreign collection does not exist. diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp index ba398f446cb..c0e787c6d99 100644 --- a/src/mongo/db/query/query_planner.cpp +++ b/src/mongo/db/query/query_planner.cpp @@ -196,7 +196,7 @@ bool hintMatchesColumnStoreIndex(const BSONObj& hintObj, const ColumnIndexEntry& } /** - * Returns the dependencies for the CanoncialQuery, split by those needed to answer the filter, + * Returns the dependencies for the CanonicalQuery, split by those needed to answer the filter, * and those needed for "everything else" which is the project and sort. */ std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params, @@ -1601,7 +1601,12 @@ std::unique_ptr<QuerySolution> QueryPlanner::extendWithAggPipeline( } solution->extendWith(std::move(solnForAgg)); - return QueryPlannerAnalysis::removeInclusionProjectionBelowGroup(std::move(solution)); + + solution = QueryPlannerAnalysis::removeInclusionProjectionBelowGroup(std::move(solution)); + + QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(*solution->root()); + + return std::move(solution); } StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::choosePlanForSubqueries( diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp index 5dd0b545497..2e040bd710b 100644 --- a/src/mongo/db/query/query_solution.cpp +++ b/src/mongo/db/query/query_solution.cpp @@ -1102,13 +1102,15 @@ ColumnIndexScanNode::ColumnIndexScanNode(ColumnIndexEntry indexEntry, OrderedPathSet matchFieldsIn, OrderedPathSet allFieldsIn, StringMap<std::unique_ptr<MatchExpression>> filtersByPath, - std::unique_ptr<MatchExpression> postAssemblyFilter) + std::unique_ptr<MatchExpression> postAssemblyFilter, + bool extraFieldsPermitted) : indexEntry(std::move(indexEntry)), outputFields(std::move(outputFieldsIn)), matchFields(std::move(matchFieldsIn)), allFields(std::move(allFieldsIn)), filtersByPath(std::move(filtersByPath)), - postAssemblyFilter(std::move(postAssemblyFilter)) {} + postAssemblyFilter(std::move(postAssemblyFilter)), + extraFieldsPermitted(extraFieldsPermitted) {} void ColumnIndexScanNode::appendToString(str::stream* ss, int indent) const { addIndent(ss, indent); diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index 96af62b4a90..a7182a4251d 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -516,7 +516,8 @@ struct ColumnIndexScanNode : public QuerySolutionNode { OrderedPathSet matchFields, OrderedPathSet allFields, StringMap<std::unique_ptr<MatchExpression>> filtersByPath, - std::unique_ptr<MatchExpression> postAssemblyFilter); + std::unique_ptr<MatchExpression> postAssemblyFilter, + bool extraFieldsPermitted = false); virtual StageType getType() const { return STAGE_COLUMN_SCAN; @@ -549,7 +550,8 @@ struct ColumnIndexScanNode : public QuerySolutionNode { matchFields, allFields, std::move(clonedFiltersByPath), - postAssemblyFilter->shallowClone()); + postAssemblyFilter->shallowClone(), + extraFieldsPermitted); } ColumnIndexEntry indexEntry; @@ -575,6 +577,10 @@ struct ColumnIndexScanNode : public QuerySolutionNode { // An optional filter to apply after assembling a document from all scanned columns. For // example: {$or: [{a: 2}, {b: 2}]}. std::unique_ptr<MatchExpression> postAssemblyFilter; + + // If set to true, we can include extra fields rather than project them out because projection + // happens anyway in a later stage (such a group stage). + bool extraFieldsPermitted; }; /** diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 80bb4ea4519..ab20b24f502 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -883,16 +883,23 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder }); } - // Generate the expression that is applied to the row store record(in the case when the result + // Generate the expression that is applied to the row store record (in the case when the result // cannot be reconstructed from the index). - optimizer::SlotVarMap slotMap{}; - slotMap[rootStr] = rowStoreSlot; - auto abt = builder.generateABT(); - // We might get null abt if no paths were added to the builder. It means we should be projecting - // an empty object. - tassert(6935000, "ABT must be valid if have fields to project", fieldsToProject.empty() || abt); - auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap) - : sbe::makeE<sbe::EFunction>("newObj", sbe::EExpression::Vector{}); + std::unique_ptr<sbe::EExpression> rowStoreExpr = nullptr; + + // Avoid generating the row store expression if the projection is not necessary, as indicated by + // the extraFieldsPermitted flag of the column store node. + if (boost::optional<optimizer::ABT> abt; + !csn->extraFieldsPermitted && (abt = builder.generateABT())) { + // We might get null abt if no paths were added to the builder. It means we should be + // projecting an empty object. + tassert( + 6935000, "ABT must be valid if have fields to project", fieldsToProject.empty() || abt); + optimizer::SlotVarMap slotMap{}; + slotMap[rootStr] = rowStoreSlot; + rowStoreExpr = abt ? abtToExpr(*abt, slotMap) + : sbe::makeE<sbe::EFunction>("newObj", sbe::EExpression::Vector{}); + } std::unique_ptr<sbe::PlanStage> stage = std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(), |