summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorSteve Tarzia <steve.tarzia@mongodb.com>2022-09-23 14:12:53 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-23 15:07:34 +0000
commit538c5d15bf6f84fcfab9328fdf9857e120321e00 (patch)
tree302af8dd18aeda20fb24911b1f170a5b384aac7c /src/mongo
parent125607bc02dfe1815bcef4eac4cdd6246b6445a7 (diff)
downloadmongo-538c5d15bf6f84fcfab9328fdf9857e120321e00.tar.gz
SERVER-68677 Skip row store projection in column scan plans when possible
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.cpp46
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.h5
-rw-r--r--src/mongo/db/query/planner_analysis.cpp66
-rw-r--r--src/mongo/db/query/planner_analysis.h9
-rw-r--r--src/mongo/db/query/query_planner.cpp9
-rw-r--r--src/mongo/db/query/query_solution.cpp6
-rw-r--r--src/mongo/db/query/query_solution.h10
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp25
8 files changed, 121 insertions, 55 deletions
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index edc2f2e66cd..640894aa33b 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -659,11 +659,15 @@ PlanState ColumnScanStage::getNext() {
value::bitcastFrom<const char*>(record->data.data()));
if (_reconstructedRecordAccessor) {
- // TODO: in absence of record expression set the reconstructed record to be the same
- // as the record, retrieved from the row store.
- invariant(_rowStoreExpr);
- auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get());
- _reconstructedRecordAccessor->reset(owned, tag, val);
+ if (_rowStoreExpr) {
+ auto [owned, tag, val] = _bytecode.run(_rowStoreExprCode.get());
+ _reconstructedRecordAccessor->reset(owned, tag, val);
+ } else {
+ _reconstructedRecordAccessor->reset(
+ false,
+ value::TypeTags::bsonObject,
+ value::bitcastFrom<const char*>(record->data.data()));
+ }
}
} else {
if (_reconstructedRecordAccessor) {
@@ -762,7 +766,7 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const {
}
// Print out paths.
- ret.emplace_back(DebugPrinter::Block("[`"));
+ ret.emplace_back(DebugPrinter::Block("paths[`"));
for (size_t idx = 0; idx < _paths.size(); ++idx) {
if (idx) {
ret.emplace_back(DebugPrinter::Block("`,"));
@@ -772,30 +776,28 @@ std::vector<DebugPrinter::Block> ColumnScanStage::debugPrint() const {
}
ret.emplace_back(DebugPrinter::Block("`]"));
- // Print out per-path filters (if any).
- if (!_filteredPaths.empty()) {
- ret.emplace_back(DebugPrinter::Block("[`"));
- for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) {
- if (idx) {
- ret.emplace_back(DebugPrinter::Block("`;"));
- }
-
- ret.emplace_back(str::stream()
- << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": ");
- DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId);
- ret.emplace_back(DebugPrinter::Block("`,"));
- DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint());
+ // Print out per-path filters.
+ ret.emplace_back(DebugPrinter::Block("pathFilters[`"));
+ for (size_t idx = 0; idx < _filteredPaths.size(); ++idx) {
+ if (idx) {
+ ret.emplace_back(DebugPrinter::Block("`;"));
}
- ret.emplace_back(DebugPrinter::Block("`]"));
+
+ ret.emplace_back(str::stream() << "\"" << _paths[_filteredPaths[idx].pathIndex] << "\": ");
+ DebugPrinter::addIdentifier(ret, _filteredPaths[idx].inputSlotId);
+ ret.emplace_back(DebugPrinter::Block("`,"));
+ DebugPrinter::addBlocks(ret, _filteredPaths[idx].filterExpr->debugPrint());
}
+ ret.emplace_back(DebugPrinter::Block("`]"));
+ // Print out rowStoreExpression as [rowStoreSlot, rowStoreExpression]
+ ret.emplace_back(DebugPrinter::Block("rowStoreExpr[`"));
if (_rowStoreExpr) {
- ret.emplace_back(DebugPrinter::Block("[`"));
DebugPrinter::addIdentifier(ret, _rowStoreSlot);
ret.emplace_back(DebugPrinter::Block("`,"));
DebugPrinter::addBlocks(ret, _rowStoreExpr->debugPrint());
- ret.emplace_back(DebugPrinter::Block("`]"));
}
+ ret.emplace_back(DebugPrinter::Block("`]"));
ret.emplace_back("@\"`");
DebugPrinter::addIdentifier(ret, _collUuid.toString());
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index 8e954b63f9a..1eda315d94b 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -52,8 +52,9 @@ namespace sbe {
*
* Debug string representation:
*
- * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none [path_1, ..., path_n]
- * [filter_path_1: filterSlot_1, filterExpr_1; ...]? [roStoreSlot, rowStoreExpr]?
+ * COLUMN_SCAN reconstructedRecordSlot|none recordIdSlot|none paths[path_1, ..., path_n]
+ * pathFilters[filter_path_1: filterSlot_1, filterExpr_1; ...]
+ * rowStoreExpr[slot, expr]|rowStoreExpr[]
* collectionUuid indexName
*/
class ColumnScanStage final : public PlanStage {
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index 62afaa03e98..4b19c1fac59 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -626,26 +626,20 @@ void removeInclusionProjectionBelowGroupRecursive(QuerySolutionNode* solnRoot) {
auto groupNode = static_cast<GroupNode*>(solnRoot);
QuerySolutionNode* projectNodeCandidate = groupNode->children[0].get();
- if (projectNodeCandidate->getType() == StageType::STAGE_GROUP) {
- // Multiple $group stages may be pushed down. So, if the child is a GROUP, then recurse.
- return removeInclusionProjectionBelowGroupRecursive(projectNodeCandidate);
- } else if (auto projection = attemptToGetProjectionFromQuerySolution(*projectNodeCandidate);
- projection && projection.value()->isInclusionOnly()) {
- // Check to see if the projectNode's field set is a super set of the groupNodes.
- if (!isSubset(groupNode->requiredFields, projection.value()->getRequiredFields())) {
- // The dependency set of the GROUP stage is wider than the projectNode field set.
- return;
- }
-
+ if (auto projection = attemptToGetProjectionFromQuerySolution(*projectNodeCandidate);
+ // only eliminate inclusion projections
+ projection && projection.value()->isInclusionOnly() &&
+ // only eliminate projections which preserve all fields used by the group
+ isSubset(groupNode->requiredFields, projection.value()->getRequiredFields())) {
// Attach the projectNode's child directly as the groupNode's child, eliminating the
// project node.
groupNode->children[0] = std::move(projectNodeCandidate->children[0]);
}
- } else {
- // Keep traversing the tree in search of a GROUP stage.
- for (size_t i = 0; i < solnRoot->children.size(); ++i) {
- removeInclusionProjectionBelowGroupRecursive(solnRoot->children[i].get());
- }
+ }
+
+ // Keep traversing the tree in search of GROUP stages.
+ for (size_t i = 0; i < solnRoot->children.size(); ++i) {
+ removeInclusionProjectionBelowGroupRecursive(solnRoot->children[i].get());
}
}
@@ -682,6 +676,44 @@ std::unique_ptr<QuerySolution> QueryPlannerAnalysis::removeInclusionProjectionBe
}
// static
+void QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(QuerySolutionNode& root) {
+ // If a group or projection's child is a COLUMN_SCAN node, try to eliminate the
+ // expression that projects documents retrieved from row store fallback. In other words, the
+ // COLUMN_SCAN's rowStoreExpression can be removed if it does not affect the group or
+ // project above.
+ for (auto& child : root.children) {
+ if (child->getType() == STAGE_COLUMN_SCAN) {
+ auto childColumnScan = static_cast<ColumnIndexScanNode*>(child.get());
+
+ // Look for group above column scan.
+ if (root.getType() == STAGE_GROUP) {
+ auto& parentGroup = static_cast<GroupNode&>(root);
+ // A row store expression that preserves all fields used by the parent $group is
+ // redundant and can be removed.
+ if (!childColumnScan->extraFieldsPermitted &&
+ isSubset(parentGroup.requiredFields, childColumnScan->outputFields)) {
+ childColumnScan->extraFieldsPermitted = true;
+ }
+ }
+ // Look for projection above column scan.
+ else if (root.getType() == STAGE_PROJECTION_SIMPLE ||
+ root.getType() == STAGE_PROJECTION_DEFAULT) {
+ auto& parentProjection = static_cast<ProjectionNode&>(root);
+ // A row store expression that preserves all fields used by the parent projection is
+ // redundant and can be removed.
+ if (!childColumnScan->extraFieldsPermitted &&
+ isSubset(parentProjection.proj.getRequiredFields(),
+ childColumnScan->outputFields)) {
+ childColumnScan->extraFieldsPermitted = true;
+ }
+ }
+ }
+ // Recur on child.
+ removeUselessColumnScanRowStoreExpression(*child);
+ }
+}
+
+// static
std::pair<EqLookupNode::LookupStrategy, boost::optional<IndexEntry>>
QueryPlannerAnalysis::determineLookupStrategy(
const std::string& foreignCollName,
@@ -1191,6 +1223,8 @@ std::unique_ptr<QuerySolution> QueryPlannerAnalysis::analyzeDataAccess(
solnRoot = tryPushdownProjectBeneathSort(std::move(solnRoot));
+ QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(*solnRoot);
+
soln->setRoot(std::move(solnRoot));
return soln;
}
diff --git a/src/mongo/db/query/planner_analysis.h b/src/mongo/db/query/planner_analysis.h
index 3233ef82d7b..8f4c6ae3818 100644
--- a/src/mongo/db/query/planner_analysis.h
+++ b/src/mongo/db/query/planner_analysis.h
@@ -130,6 +130,15 @@ public:
std::unique_ptr<QuerySolution> soln);
/**
+ * Walks the QuerySolutionNode tree rooted in 'soln', and looks for a ColumnScan that
+ * is a child of either a Group or Projection. If the ColumnScan's parent will ignore
+ * extra fields, then eliminate its row store expression, allowing it to return extra fields
+ * in cases when it falls back to pulling the full document from the row store.
+ * If these conditions are not met this is a noop.
+ */
+ static void removeUselessColumnScanRowStoreExpression(QuerySolutionNode& root);
+
+ /**
* For the provided 'foreignCollName' and 'foreignFieldName' corresponding to an EqLookupNode,
* returns what join algorithm should be used to execute it. In particular:
* - An empty array is produced for each document if the foreign collection does not exist.
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index ba398f446cb..c0e787c6d99 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -196,7 +196,7 @@ bool hintMatchesColumnStoreIndex(const BSONObj& hintObj, const ColumnIndexEntry&
}
/**
- * Returns the dependencies for the CanoncialQuery, split by those needed to answer the filter,
+ * Returns the dependencies for the CanonicalQuery, split by those needed to answer the filter,
* and those needed for "everything else" which is the project and sort.
*/
std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params,
@@ -1601,7 +1601,12 @@ std::unique_ptr<QuerySolution> QueryPlanner::extendWithAggPipeline(
}
solution->extendWith(std::move(solnForAgg));
- return QueryPlannerAnalysis::removeInclusionProjectionBelowGroup(std::move(solution));
+
+ solution = QueryPlannerAnalysis::removeInclusionProjectionBelowGroup(std::move(solution));
+
+ QueryPlannerAnalysis::removeUselessColumnScanRowStoreExpression(*solution->root());
+
+ return std::move(solution);
}
StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::choosePlanForSubqueries(
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 5dd0b545497..2e040bd710b 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -1102,13 +1102,15 @@ ColumnIndexScanNode::ColumnIndexScanNode(ColumnIndexEntry indexEntry,
OrderedPathSet matchFieldsIn,
OrderedPathSet allFieldsIn,
StringMap<std::unique_ptr<MatchExpression>> filtersByPath,
- std::unique_ptr<MatchExpression> postAssemblyFilter)
+ std::unique_ptr<MatchExpression> postAssemblyFilter,
+ bool extraFieldsPermitted)
: indexEntry(std::move(indexEntry)),
outputFields(std::move(outputFieldsIn)),
matchFields(std::move(matchFieldsIn)),
allFields(std::move(allFieldsIn)),
filtersByPath(std::move(filtersByPath)),
- postAssemblyFilter(std::move(postAssemblyFilter)) {}
+ postAssemblyFilter(std::move(postAssemblyFilter)),
+ extraFieldsPermitted(extraFieldsPermitted) {}
void ColumnIndexScanNode::appendToString(str::stream* ss, int indent) const {
addIndent(ss, indent);
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 96af62b4a90..a7182a4251d 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -516,7 +516,8 @@ struct ColumnIndexScanNode : public QuerySolutionNode {
OrderedPathSet matchFields,
OrderedPathSet allFields,
StringMap<std::unique_ptr<MatchExpression>> filtersByPath,
- std::unique_ptr<MatchExpression> postAssemblyFilter);
+ std::unique_ptr<MatchExpression> postAssemblyFilter,
+ bool extraFieldsPermitted = false);
virtual StageType getType() const {
return STAGE_COLUMN_SCAN;
@@ -549,7 +550,8 @@ struct ColumnIndexScanNode : public QuerySolutionNode {
matchFields,
allFields,
std::move(clonedFiltersByPath),
- postAssemblyFilter->shallowClone());
+ postAssemblyFilter->shallowClone(),
+ extraFieldsPermitted);
}
ColumnIndexEntry indexEntry;
@@ -575,6 +577,10 @@ struct ColumnIndexScanNode : public QuerySolutionNode {
// An optional filter to apply after assembling a document from all scanned columns. For
// example: {$or: [{a: 2}, {b: 2}]}.
std::unique_ptr<MatchExpression> postAssemblyFilter;
+
+ // If set to true, we can include extra fields rather than project them out because projection
+ // happens anyway in a later stage (such a group stage).
+ bool extraFieldsPermitted;
};
/**
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index 80bb4ea4519..ab20b24f502 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -883,16 +883,23 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
});
}
- // Generate the expression that is applied to the row store record(in the case when the result
+ // Generate the expression that is applied to the row store record (in the case when the result
// cannot be reconstructed from the index).
- optimizer::SlotVarMap slotMap{};
- slotMap[rootStr] = rowStoreSlot;
- auto abt = builder.generateABT();
- // We might get null abt if no paths were added to the builder. It means we should be projecting
- // an empty object.
- tassert(6935000, "ABT must be valid if have fields to project", fieldsToProject.empty() || abt);
- auto rowStoreExpr = abt ? abtToExpr(*abt, slotMap)
- : sbe::makeE<sbe::EFunction>("newObj", sbe::EExpression::Vector{});
+ std::unique_ptr<sbe::EExpression> rowStoreExpr = nullptr;
+
+ // Avoid generating the row store expression if the projection is not necessary, as indicated by
+ // the extraFieldsPermitted flag of the column store node.
+ if (boost::optional<optimizer::ABT> abt;
+ !csn->extraFieldsPermitted && (abt = builder.generateABT())) {
+ // We might get null abt if no paths were added to the builder. It means we should be
+ // projecting an empty object.
+ tassert(
+ 6935000, "ABT must be valid if have fields to project", fieldsToProject.empty() || abt);
+ optimizer::SlotVarMap slotMap{};
+ slotMap[rootStr] = rowStoreSlot;
+ rowStoreExpr = abt ? abtToExpr(*abt, slotMap)
+ : sbe::makeE<sbe::EFunction>("newObj", sbe::EExpression::Vector{});
+ }
std::unique_ptr<sbe::PlanStage> stage =
std::make_unique<sbe::ColumnScanStage>(getCurrentCollection(reqs)->uuid(),