diff options
author | Drew Paroski <drew.paroski@mongodb.com> | 2022-10-14 17:22:35 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-10-31 03:49:58 +0000 |
commit | c44135cca044b087f529a3024bea8e2c2f1866f4 (patch) | |
tree | 87cc202a478795c14652370bccc0c20dcaff8149 | |
parent | 7974573a7bbc9c0121f5003ee80e8270d6dea1e4 (diff) | |
download | mongo-c44135cca044b087f529a3024bea8e2c2f1866f4.tar.gz |
SERVER-60234 Implement plumbing for getField() pushdown in the SBE stage builder
18 files changed, 1445 insertions, 1032 deletions
diff --git a/src/mongo/db/exec/exclusion_projection_executor.cpp b/src/mongo/db/exec/exclusion_projection_executor.cpp index 9823ed1b125..ad7d7ca9ddb 100644 --- a/src/mongo/db/exec/exclusion_projection_executor.cpp +++ b/src/mongo/db/exec/exclusion_projection_executor.cpp @@ -38,9 +38,10 @@ std::pair<BSONObj, bool> ExclusionNode::extractProjectOnFieldAndRename(const Str BSONObjBuilder extractedExclusion; // Check for a projection directly on 'oldName'. For example, {oldName: 0}. - if (auto it = _projectedFields.find(oldName); it != _projectedFields.end()) { + if (auto it = _projectedFieldsSet.find(oldName); it != _projectedFieldsSet.end()) { extractedExclusion.append(newName, false); - _projectedFields.erase(it); + _projectedFieldsSet.erase(it); + _projectedFields.remove(std::string(oldName)); } // Check for a projection on subfields of 'oldName'. For example, {oldName: {a: 0, b: 0}}. diff --git a/src/mongo/db/exec/inclusion_projection_executor.cpp b/src/mongo/db/exec/inclusion_projection_executor.cpp index 84117710063..30e06a76d82 100644 --- a/src/mongo/db/exec/inclusion_projection_executor.cpp +++ b/src/mongo/db/exec/inclusion_projection_executor.cpp @@ -68,7 +68,7 @@ void FastPathEligibleInclusionNode::_applyProjections(BSONObj bson, BSONObjBuild const auto bsonElement{it.next()}; const auto fieldName{bsonElement.fieldNameStringData()}; - if (_projectedFields.find(fieldName) != _projectedFields.end()) { + if (_projectedFieldsSet.find(fieldName) != _projectedFieldsSet.end()) { bob->append(bsonElement); --nFieldsNeeded; } else if (auto childIt = _children.find(fieldName); childIt != _children.end()) { @@ -169,7 +169,8 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject( if (std::get<2>(expressionSpec)) { // Replace the expression with an inclusion projected field. - _projectedFields.insert(fieldName); + auto it = _projectedFields.insert(_projectedFields.end(), fieldName); + _projectedFieldsSet.insert(StringData(*it)); _expressions.erase(fieldName); // Only computed projections at the beginning of the list were marked to become // projected fields. The new projected field is at the beginning of the diff --git a/src/mongo/db/exec/projection_node.cpp b/src/mongo/db/exec/projection_node.cpp index 00fcf70946f..bbd2ebcac6b 100644 --- a/src/mongo/db/exec/projection_node.cpp +++ b/src/mongo/db/exec/projection_node.cpp @@ -48,7 +48,8 @@ void ProjectionNode::addProjectionForPath(const FieldPath& path) { void ProjectionNode::_addProjectionForPath(const FieldPath& path) { makeOptimizationsStale(); if (path.getPathLength() == 1) { - _projectedFields.insert(path.fullPath()); + auto it = _projectedFields.insert(_projectedFields.end(), path.fullPath()); + _projectedFieldsSet.insert(StringData(*it)); return; } // FieldPath can't be empty, so it is safe to obtain the first path component here. @@ -141,7 +142,7 @@ void ProjectionNode::applyProjections(const Document& inputDoc, MutableDocument* while (it.more()) { auto fieldName = it.fieldName(); - if (_projectedFields.find(fieldName) != _projectedFields.end()) { + if (_projectedFieldsSet.find(fieldName) != _projectedFieldsSet.end()) { outputProjectedField( fieldName, applyLeafProjectionToValue(it.next().second), outputDoc); ++projectedFields; @@ -280,7 +281,7 @@ void ProjectionNode::serialize(boost::optional<ExplainOptions::Verbosity> explai const bool projVal = !applyLeafProjectionToValue(Value(true)).missing(); // Always put "_id" first if it was projected (implicitly or explicitly). - if (_projectedFields.find("_id") != _projectedFields.end()) { + if (_projectedFieldsSet.find("_id") != _projectedFieldsSet.end()) { output->addField("_id", Value(projVal)); } diff --git a/src/mongo/db/exec/projection_node.h b/src/mongo/db/exec/projection_node.h index 82cae8b145b..61e9b98b89d 100644 --- a/src/mongo/db/exec/projection_node.h +++ b/src/mongo/db/exec/projection_node.h @@ -29,6 +29,8 @@ #pragma once +#include <list> + #include "mongo/db/exec/projection_executor.h" #include "mongo/db/query/projection_policies.h" @@ -176,7 +178,14 @@ protected: StringMap<std::unique_ptr<ProjectionNode>> _children; StringMap<boost::intrusive_ptr<Expression>> _expressions; - StringSet _projectedFields; + + // List of the projected fields in the order in which they were specified. + std::list<std::string> _projectedFields; + + // Set of projected fields. Note that the _projectedFields list actually owns the strings, and + // this StringDataSet simply holds views of those strings. + StringDataSet _projectedFieldsSet; + ProjectionPolicies _policies; std::string _pathToNode; diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h index a24cff08f2f..d33794340fe 100644 --- a/src/mongo/db/query/query_solution.h +++ b/src/mongo/db/query/query_solution.h @@ -1396,15 +1396,15 @@ struct GroupNode : public QuerySolutionNode { shouldProduceBson(shouldProduceBson) { // Use the DepsTracker to extract the fields that the 'groupByExpression' and accumulator // expressions depend on. - for (auto& groupByExprField : expression::getDependencies(groupByExpression.get()).fields) { - requiredFields.insert(groupByExprField); - } + DepsTracker deps; + expression::addDependencies(groupByExpression.get(), &deps); for (auto&& acc : accumulators) { - auto argExpr = acc.expr.argument; - for (auto& argExprField : expression::getDependencies(argExpr.get()).fields) { - requiredFields.insert(argExprField); - } + expression::addDependencies(acc.expr.argument.get(), &deps); } + + requiredFields = deps.fields; + needWholeDocument = deps.needWholeDocument; + needsAnyMetadata = deps.getNeedsAnyMetadata(); } StageType getType() const override { @@ -1438,7 +1438,9 @@ struct GroupNode : public QuerySolutionNode { // Carries the fields this GroupNode depends on. Namely, 'requiredFields' contains the union of // the fields in the 'groupByExpressions' and the fields in the input Expressions of the // 'accumulators'. - StringSet requiredFields; + OrderedPathSet requiredFields; + bool needWholeDocument = false; + bool needsAnyMetadata = false; // If set to true, generated SBE plan will produce result as BSON object. If false, // 'sbe::Object' is produced instead. diff --git a/src/mongo/db/query/sbe_shard_filter_test.cpp b/src/mongo/db/query/sbe_shard_filter_test.cpp index 82585905f6e..a5d49bf4849 100644 --- a/src/mongo/db/query/sbe_shard_filter_test.cpp +++ b/src/mongo/db/query/sbe_shard_filter_test.cpp @@ -234,7 +234,7 @@ TEST_F(SbeShardFilterTest, MissingFieldsAtBottomDottedPathFilledCorrectly) { TEST_F(SbeShardFilterTest, CoveredShardFilterPlan) { auto indexKeyPattern = BSON("a" << 1 << "b" << 1 << "c" << 1 << "d" << 1); - auto projection = BSON("a" << 1 << "c" << 1); + auto projection = BSON("a" << 1 << "c" << 1 << "_id" << 0); auto mockedIndexKeys = std::vector<BSONArray>{BSON_ARRAY(BSON("a" << 2 << "b" << 2 << "c" << 2 << "d" << 2)), BSON_ARRAY(BSON("a" << 3 << "b" << 3 << "c" << 3 << "d" << 3))}; diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index ce8344a0d36..a196821e3dc 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -84,129 +84,9 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery - namespace mongo::stage_builder { namespace { /** - * For covered projections, each of the projection field paths represent respective index key. To - * rehydrate index keys into the result object, we first need to convert projection AST into - * 'IndexKeyPatternTreeNode' structure. Context structure and visitors below are used for this - * purpose. - */ -struct IndexKeysBuilderContext { - // Contains resulting tree of index keys converted from projection AST. - IndexKeyPatternTreeNode root; - - // Full field path of the currently visited projection node. - std::vector<StringData> currentFieldPath; - - // Each projection node has a vector of field names. This stack contains indexes of the - // currently visited field names for each of the projection nodes. - std::vector<size_t> currentFieldIndex; -}; - -/** - * Covered projections are always inclusion-only, so we ban all other operators. - */ -class IndexKeysBuilder : public projection_ast::ProjectionASTConstVisitor { -public: - using projection_ast::ProjectionASTConstVisitor::visit; - - IndexKeysBuilder(IndexKeysBuilderContext* context) : _context{context} {} - - void visit(const projection_ast::ProjectionPositionalASTNode* node) final { - tasserted(5474501, "Positional projection is not allowed in covered projection"); - } - - void visit(const projection_ast::ProjectionSliceASTNode* node) final { - tasserted(5474502, "$slice is not allowed in covered projection"); - } - - void visit(const projection_ast::ProjectionElemMatchASTNode* node) final { - tasserted(5474503, "$elemMatch is not allowed in covered projection"); - } - - void visit(const projection_ast::ExpressionASTNode* node) final { - tasserted(5474504, "Expressions are not allowed in covered projection"); - } - - void visit(const projection_ast::MatchExpressionASTNode* node) final { - tasserted(5474505, - "$elemMatch and positional projection are not allowed in covered projection"); - } - - void visit(const projection_ast::BooleanConstantASTNode* node) override {} - -protected: - IndexKeysBuilderContext* _context; -}; - -class IndexKeysPreBuilder final : public IndexKeysBuilder { -public: - using IndexKeysBuilder::IndexKeysBuilder; - using IndexKeysBuilder::visit; - - void visit(const projection_ast::ProjectionPathASTNode* node) final { - _context->currentFieldIndex.push_back(0); - _context->currentFieldPath.emplace_back(node->fieldNames().front()); - } -}; - -class IndexKeysInBuilder final : public IndexKeysBuilder { -public: - using IndexKeysBuilder::IndexKeysBuilder; - using IndexKeysBuilder::visit; - - void visit(const projection_ast::ProjectionPathASTNode* node) final { - auto& currentIndex = _context->currentFieldIndex.back(); - currentIndex++; - _context->currentFieldPath.back() = node->fieldNames()[currentIndex]; - } -}; - -class IndexKeysPostBuilder final : public IndexKeysBuilder { -public: - using IndexKeysBuilder::IndexKeysBuilder; - using IndexKeysBuilder::visit; - - void visit(const projection_ast::ProjectionPathASTNode* node) final { - _context->currentFieldIndex.pop_back(); - _context->currentFieldPath.pop_back(); - } - - void visit(const projection_ast::BooleanConstantASTNode* constantNode) final { - if (!constantNode->value()) { - // Even though only inclusion is allowed in covered projection, there still can be - // {_id: 0} component. We do not need to generate any nodes for it. - return; - } - - // Insert current field path into the index keys tree if it does not exist yet. - auto* node = &_context->root; - for (const auto& part : _context->currentFieldPath) { - if (auto it = node->children.find(part); it != node->children.end()) { - node = it->second.get(); - } else { - node = node->emplace(part); - } - } - } -}; - -sbe::value::SlotVector getSlotsToForward(const PlanStageReqs& reqs, const PlanStageSlots& outputs) { - std::vector<std::pair<StringData, sbe::value::SlotId>> pairs; - outputs.forEachSlot( - reqs, [&](auto&& slot, const StringData& name) { pairs.emplace_back(name, slot); }); - std::sort(pairs.begin(), pairs.end()); - - auto outputSlots = sbe::makeSV(); - for (auto&& p : pairs) { - outputSlots.emplace_back(p.second); - } - return outputSlots; -} - -/** * Generates an EOF plan. Note that even though this plan will return nothing, it will still define * the slots specified by 'reqs'. */ @@ -271,6 +151,26 @@ std::unique_ptr<sbe::RuntimeEnvironment> makeRuntimeEnvironment( return env; } +sbe::value::SlotVector getSlotsToForward(const PlanStageReqs& reqs, + const PlanStageSlots& outputs, + const sbe::value::SlotVector& exclude) { + auto excludeSet = sbe::value::SlotSet{exclude.begin(), exclude.end()}; + + std::vector<std::pair<PlanStageSlots::Name, sbe::value::SlotId>> pairs; + outputs.forEachSlot(reqs, [&](auto&& slot, const PlanStageSlots::Name& name) { + if (!excludeSet.count(slot)) { + pairs.emplace_back(name, slot); + } + }); + std::sort(pairs.begin(), pairs.end()); + + auto outputSlots = sbe::makeSV(); + for (auto&& p : pairs) { + outputSlots.emplace_back(p.second); + } + return outputSlots; +} + void prepareSlotBasedExecutableTree(OperationContext* opCtx, sbe::PlanStage* root, PlanStageData* data, @@ -524,22 +424,22 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::build(const QuerySolution std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildCollScan( const QuerySolutionNode* root, const PlanStageReqs& reqs) { - invariant(!reqs.getIndexKeyBitset()); + tassert(6023400, "buildCollScan() does not support kKey", !reqs.hasKeys()); + auto fields = reqs.getFields(); auto csn = static_cast<const CollectionScanNode*>(root); auto [stage, outputs] = generateCollScan(_state, getCurrentCollection(reqs), csn, + fields, _yieldPolicy, reqs.getIsTailableCollScanResumeBranch()); if (reqs.has(kReturnKey)) { // Assign the 'returnKeySlot' to be the empty object. outputs.set(kReturnKey, _slotIdGenerator.generate()); - stage = sbe::makeProjectStage(std::move(stage), - root->nodeId(), - outputs.get(kReturnKey), - sbe::makeE<sbe::EFunction>("newObj", sbe::makeEs())); + stage = sbe::makeProjectStage( + std::move(stage), root->nodeId(), outputs.get(kReturnKey), makeFunction("newObj"_sd)); } // Don't advertize the RecordId output if none of our ancestors are going to use it. if (!reqs.has(kRecordId)) { @@ -552,12 +452,20 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildVirtualScan( const QuerySolutionNode* root, const PlanStageReqs& reqs) { using namespace std::literals; + auto vsn = static_cast<const VirtualScanNode*>(root); - // The caller should only have requested components of the index key if the virtual scan is - // mocking an index scan. - if (vsn->scanType == VirtualScanNode::ScanType::kCollScan) { - invariant(!reqs.getIndexKeyBitset()); - } + auto reqKeys = reqs.getKeys(); + + // The caller should only request kKey slots if the virtual scan is mocking an index scan. + tassert(6023401, + "buildVirtualScan() does not support kKey when 'scanType' is not ixscan", + vsn->scanType == VirtualScanNode::ScanType::kIxscan || reqKeys.empty()); + + tassert(6023423, + "buildVirtualScan() does not support dotted paths for kKey slots", + std::all_of(reqKeys.begin(), reqKeys.end(), [](auto&& s) { + return s.find('.') == std::string::npos; + })); auto [inputTag, inputVal] = sbe::value::makeNewArray(); sbe::value::ValueGuard inputGuard{inputTag, inputVal}; @@ -572,7 +480,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder } inputGuard.reset(); - auto [scanSlots, stage] = + auto [scanSlots, scanStage] = generateVirtualScanMulti(&_slotIdGenerator, vsn->hasRecordId ? 2 : 1, inputTag, inputVal); sbe::value::SlotId resultSlot; @@ -586,41 +494,27 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder PlanStageSlots outputs; - if (reqs.has(kResult)) { + if (reqs.has(kResult) || reqs.hasFields()) { outputs.set(kResult, resultSlot); - } else if (reqs.getIndexKeyBitset()) { - // The caller wanted individual slots for certain components of a mock index scan. Use a - // project stage to produce those slots. Since the test will represent index keys as BSON - // objects, we use 'getField' expressions to extract the necessary fields. - invariant(!vsn->indexKeyPattern.isEmpty()); - - sbe::value::SlotVector indexKeySlots; - sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> projections; - - size_t indexKeyPos = 0; - for (auto&& field : vsn->indexKeyPattern) { - if (reqs.getIndexKeyBitset()->test(indexKeyPos)) { - indexKeySlots.push_back(_slotIdGenerator.generate()); - projections.emplace(indexKeySlots.back(), - makeFunction("getField"_sd, - sbe::makeE<sbe::EVariable>(resultSlot), - makeConstant(field.fieldName()))); - } - ++indexKeyPos; - } - - stage = - sbe::makeS<sbe::ProjectStage>(std::move(stage), std::move(projections), root->nodeId()); - - outputs.setIndexKeySlots(indexKeySlots); } - if (reqs.has(kRecordId)) { invariant(vsn->hasRecordId); invariant(scanSlots.size() == 2); outputs.set(kRecordId, scanSlots[0]); } + auto stage = std::move(scanStage); + + // The caller wants individual slots for certain components of the mock index scan. Retrieve + // the values for these paths and project them to slots. + auto [projectStage, slots] = projectTopLevelFields( + std::move(stage), reqKeys, resultSlot, root->nodeId(), &_slotIdGenerator); + stage = std::move(projectStage); + + for (size_t i = 0; i < reqKeys.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kKey, std::move(reqKeys[i])), slots[i]); + } + return {std::move(stage), std::move(outputs)}; } @@ -629,17 +523,35 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto ixn = static_cast<const IndexScanNode*>(root); invariant(reqs.has(kReturnKey) || !ixn->addKeyMetadata); - sbe::IndexKeysInclusionSet indexKeyBitset; + auto reqKeys = reqs.getKeys(); + auto reqKeysSet = StringDataSet{reqKeys.begin(), reqKeys.end()}; - if (reqs.has(PlanStageSlots::kReturnKey) || reqs.has(PlanStageSlots::kResult)) { - // If either 'reqs.result' or 'reqs.returnKey' is true, we need to get all parts of the - // index key (regardless of what was requested by 'reqs.indexKeyBitset') so that we can - // create the inflated index key (keyExpr). - for (int i = 0; i < ixn->index.keyPattern.nFields(); ++i) { - indexKeyBitset.set(i); + std::vector<StringData> keys; + sbe::IndexKeysInclusionSet keysBitset; + StringDataSet indexKeyPatternSet; + size_t i = 0; + for (const auto& elt : ixn->index.keyPattern) { + StringData name = elt.fieldNameStringData(); + indexKeyPatternSet.emplace(name); + if (reqKeysSet.count(name)) { + keysBitset.set(i); + keys.emplace_back(name); } - } else if (reqs.getIndexKeyBitset()) { - indexKeyBitset = *reqs.getIndexKeyBitset(); + ++i; + } + + auto additionalKeys = + filterVector(reqKeys, [&](const std::string& s) { return !indexKeyPatternSet.count(s); }); + + sbe::IndexKeysInclusionSet indexKeyBitset; + if (reqs.has(kReturnKey) || reqs.has(kResult) || reqs.hasFields()) { + // If either 'reqs.result' or 'reqs.returnKey' or 'reqs.hasFields()' is true, we need to + // get all parts of the index key so that we can create the inflated index key. + for (int j = 0; j < ixn->index.keyPattern.nFields(); ++j) { + indexKeyBitset.set(j); + } + } else { + indexKeyBitset = keysBitset; } // If the slots necessary for performing an index consistency check were not requested in @@ -652,28 +564,32 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder const auto generateIndexScanFunc = ixn->iets.empty() ? generateIndexScan : generateIndexScanWithDynamicBounds; - auto&& [stage, outputs] = generateIndexScanFunc(_state, - getCurrentCollection(reqs), - ixn, - indexKeyBitset, - _yieldPolicy, - iamMap, - reqs.has(kIndexKeyPattern)); + auto&& [scanStage, scanOutputs] = generateIndexScanFunc(_state, + getCurrentCollection(reqs), + ixn, + indexKeyBitset, + _yieldPolicy, + iamMap, + reqs.has(kIndexKeyPattern)); + + auto stage = std::move(scanStage); + auto outputs = std::move(scanOutputs); // Remove the RecordId from the output if we were not requested to produce it. if (!reqs.has(PlanStageSlots::kRecordId) && outputs.has(kRecordId)) { outputs.clear(kRecordId); } - if (reqs.has(PlanStageSlots::kReturnKey)) { - sbe::EExpression::Vector mkObjArgs; - size_t i = 0; + if (reqs.has(PlanStageSlots::kReturnKey)) { + sbe::EExpression::Vector args; for (auto&& elem : ixn->index.keyPattern) { - mkObjArgs.emplace_back(sbe::makeE<sbe::EConstant>(elem.fieldNameStringData())); - mkObjArgs.emplace_back(sbe::makeE<sbe::EVariable>((*outputs.getIndexKeySlots())[i++])); + StringData name = elem.fieldNameStringData(); + args.emplace_back(sbe::makeE<sbe::EConstant>(name)); + args.emplace_back( + makeVariable(outputs.get(std::make_pair(PlanStageSlots::kKey, name)))); } - auto rawKeyExpr = sbe::makeE<sbe::EFunction>("newObj", std::move(mkObjArgs)); + auto rawKeyExpr = sbe::makeE<sbe::EFunction>("newObj"_sd, std::move(args)); outputs.set(PlanStageSlots::kReturnKey, _slotIdGenerator.generate()); stage = sbe::makeProjectStage(std::move(stage), ixn->nodeId(), @@ -681,25 +597,30 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::move(rawKeyExpr)); } - if (reqs.has(PlanStageSlots::kResult)) { - outputs.set(PlanStageSlots::kResult, _slotIdGenerator.generate()); - stage = rehydrateIndexKey(std::move(stage), - ixn->index.keyPattern, - ixn->nodeId(), - *outputs.getIndexKeySlots(), - outputs.get(PlanStageSlots::kResult)); + if (reqs.has(kResult) || reqs.hasFields()) { + auto indexKeySlots = sbe::makeSV(); + for (auto&& elem : ixn->index.keyPattern) { + StringData name = elem.fieldNameStringData(); + indexKeySlots.emplace_back(outputs.get(std::make_pair(PlanStageSlots::kKey, name))); + } + + auto resultSlot = _slotIdGenerator.generate(); + outputs.set(kResult, resultSlot); + + stage = rehydrateIndexKey( + std::move(stage), ixn->index.keyPattern, ixn->nodeId(), indexKeySlots, resultSlot); } - if (reqs.getIndexKeyBitset()) { - outputs.setIndexKeySlots( - makeIndexKeyOutputSlotsMatchingParentReqs(ixn->index.keyPattern, - *reqs.getIndexKeyBitset(), - indexKeyBitset, - *outputs.getIndexKeySlots())); - } else { - outputs.setIndexKeySlots(boost::none); + auto [outStage, nothingSlots] = projectNothingToSlots( + std::move(stage), additionalKeys.size(), root->nodeId(), &_slotIdGenerator); + stage = std::move(outStage); + for (size_t i = 0; i < additionalKeys.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kKey, std::move(additionalKeys[i])), + nothingSlots[i]); } + outputs.clearNonRequiredSlots(reqs); + return {std::move(stage), std::move(outputs)}; } @@ -838,9 +759,7 @@ std::unique_ptr<sbe::EExpression> generatePerColumnFilterExpr(StageBuilderState& std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildColumnScan( const QuerySolutionNode* root, const PlanStageReqs& reqs) { - tassert(6312404, - "Unexpected index key bitset provided for column scan stage", - !reqs.getIndexKeyBitset()); + tassert(6023403, "buildColumnScan() does not support kKey", !reqs.hasKeys()); auto csn = static_cast<const ColumnIndexScanNode*>(root); tassert(6312405, @@ -976,19 +895,22 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder const QuerySolutionNode* root, const PlanStageReqs& reqs) { auto fn = static_cast<const FetchNode*>(root); - // The child must produce all of the slots required by the parent of this FetchNode, except for - // 'resultSlot' which will be produced by the call to makeLoopJoinForFetch() below. In addition - // to that, the child must always produce a 'recordIdSlot' because it's needed for the call to - // makeLoopJoinForFetch() below. + // The child must produce a kRecordId slot, as well as all the kMeta and kKey slots required + // by the parent of this FetchNode except for 'resultSlot'. Note that the child does _not_ + // need to produce any kField slots. Any kField requests by the parent will be handled by the + // logic below. + auto child = fn->children[0].get(); + auto childReqs = reqs.copy() .clear(kResult) + .clearAllFields() .set(kRecordId) .set(kSnapshotId) .set(kIndexId) .set(kIndexKey) .set(kIndexKeyPattern); - auto [stage, outputs] = build(fn->children[0].get(), childReqs); + auto [stage, outputs] = build(child, childReqs); auto iamMap = _data.iamMap; uassert(4822880, "RecordId slot is not defined", outputs.has(kRecordId)); @@ -999,44 +921,48 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder uassert(5290711, "Index key slot is not defined", outputs.has(kIndexKey)); uassert(5113713, "Index key pattern slot is not defined", outputs.has(kIndexKeyPattern)); - auto forwardingReqs = reqs.copy().clear(kResult).clear(kRecordId); + auto forwardingReqs = reqs.copy().clear(kResult).clear(kRecordId).clearAllFields(); auto relevantSlots = getSlotsToForward(forwardingReqs, outputs); - // Forward slots for components of the index key if our parent requested them. - if (auto indexKeySlots = outputs.getIndexKeySlots()) { - relevantSlots.insert(relevantSlots.end(), indexKeySlots->begin(), indexKeySlots->end()); - } - - sbe::value::SlotId fetchResultSlot, fetchRecordIdSlot; - std::tie(fetchResultSlot, fetchRecordIdSlot, stage) = - makeLoopJoinForFetch(std::move(stage), - outputs.get(kRecordId), - outputs.get(kSnapshotId), - outputs.get(kIndexId), - outputs.get(kIndexKey), - outputs.get(kIndexKeyPattern), - getCurrentCollection(reqs), - std::move(iamMap), - root->nodeId(), - std::move(relevantSlots), - _slotIdGenerator); + auto fields = reqs.getFields(); + + auto childRecordId = outputs.get(kRecordId); + auto fetchResultSlot = _slotIdGenerator.generate(); + auto fetchRecordIdSlot = _slotIdGenerator.generate(); + auto fieldSlots = _slotIdGenerator.generateMultiple(fields.size()); + + stage = makeLoopJoinForFetch(std::move(stage), + fetchResultSlot, + fetchRecordIdSlot, + fields, + fieldSlots, + childRecordId, + outputs.get(kSnapshotId), + outputs.get(kIndexId), + outputs.get(kIndexKey), + outputs.get(kIndexKeyPattern), + getCurrentCollection(reqs), + std::move(iamMap), + root->nodeId(), + std::move(relevantSlots)); outputs.set(kResult, fetchResultSlot); - // Propagate the RecordId output only if requested. + + // Only propagate kRecordId if requested. if (reqs.has(kRecordId)) { outputs.set(kRecordId, fetchRecordIdSlot); } else { outputs.clear(kRecordId); } + for (size_t i = 0; i < fields.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kField, fields[i]), fieldSlots[i]); + } + if (fn->filter) { forwardingReqs = reqs.copy().set(kResult); - auto relevantSlots = getSlotsToForward(forwardingReqs, outputs); - // Forward slots for components of the index key if our parent requested them. - if (auto indexKeySlots = outputs.getIndexKeySlots()) { - relevantSlots.insert(relevantSlots.end(), indexKeySlots->begin(), indexKeySlots->end()); - } + auto relevantSlots = getSlotsToForward(forwardingReqs, outputs); auto [_, outputStage] = generateFilter(_state, fn->filter.get(), @@ -1046,6 +972,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder stage = outputStage.extractStage(root->nodeId()); } + outputs.clearNonRequiredSlots(reqs); + return {std::move(stage), std::move(outputs)}; } @@ -1197,22 +1125,12 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder sortPattern.size() > 0); auto child = sn->children[0].get(); - if (reqs.getIndexKeyBitset().has_value() || - (!reqs.has(kResult) && child->getType() == STAGE_IXSCAN)) { - // We decide to IndexKeySlots when building the child if: - // 1) The query is covered; or - // 2) This is a SORT->IXSCAN and kResult wasn't requested. + + if (auto [ixn, ct] = getFirstNodeByType(root, STAGE_IXSCAN); + !sn->fetched() && !reqs.has(kResult) && ixn && ct == 1) { return buildSortCovered(root, reqs); } - // The child must produce the kResult slot as well as all slots required by the parent of - // this SortNode. - auto childReqs = reqs.copy().set(kResult); - - auto [stage, outputs] = build(child, childReqs); - - auto collatorSlot = _data.env->getSlotIfExists("collator"_sd); - StringDataSet prefixSet; bool hasPartsWithCommonPrefix = false; for (const auto& part : sortPattern) { @@ -1226,14 +1144,18 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder } } + auto childReqs = reqs.copy().set(kResult); + auto [stage, childOutputs] = build(child, childReqs); + auto outputs = std::move(childOutputs); + + auto collatorSlot = _data.env->getSlotIfExists("collator"_sd); + sbe::value::SlotVector orderBy; std::vector<sbe::value::SortDirection> direction; sbe::value::SlotId outputSlotId = outputs.get(kResult); if (!hasPartsWithCommonPrefix) { // Handle the case where we are using kResult and there are no common prefixes. - sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> projectMap; - orderBy.reserve(sortPattern.size()); // Sorting has a limitation where only one of the sort patterns can involve arrays. @@ -1329,8 +1251,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder makeConstant(sbe::value::TypeTags::sortSpec, sbe::value::bitcastFrom<sbe::value::SortSpec*>(sortSpec.release())); - auto collatorSlot = _data.env->getSlotIfExists("collator"_sd); - // generateSortKey() will handle the parallel arrays check and sort key traversal for us, // so we don't need to generate our own sort key traversal logic in the SBE plan. stage = sbe::makeProjectStage(std::move(stage), @@ -1347,7 +1267,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // Slots for sort stage to forward to parent stage. Values in these slots are not used during // sorting. - auto forwardedSlots = getSlotsToForward(childReqs, outputs); + auto forwardedSlots = getSlotsToForward(childReqs, outputs, orderBy); stage = sbe::makeS<sbe::SortStage>(std::move(stage), @@ -1359,52 +1279,40 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder _cq.getExpCtx()->allowDiskUse, root->nodeId()); + outputs.clearNonRequiredSlots(reqs); + return {std::move(stage), std::move(outputs)}; } std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildSortCovered( const QuerySolutionNode* root, const PlanStageReqs& reqs) { + tassert(6023404, "buildSortCovered() does not support kResult", !reqs.has(kResult)); + const auto sn = static_cast<const SortNode*>(root); auto sortPattern = SortPattern{sn->pattern, _cq.getExpCtx()}; tassert(7047600, "QueryPlannerAnalysis should not produce a SortNode with an empty sort pattern", sortPattern.size() > 0); + tassert(6023422, "buildSortCovered() expected 'sn' to not be fetched", !sn->fetched()); - // The child must produce all of the slots required by the parent of this SortNode. - auto childReqs = reqs.copy(); auto child = sn->children[0].get(); - - auto parentIndexKeyBitset = reqs.getIndexKeyBitset().get_value_or({}); - BSONObj indexKeyPattern; - sbe::IndexKeysInclusionSet sortPatternKeyBitset; - StringMap<size_t> sortSlotPosMap; - - // Set IndexKeyBitset to request each part of the index requested by the parent and to request - // each part of the sort pattern. auto indexScan = static_cast<const IndexScanNode*>(getLoneNodeByType(child, STAGE_IXSCAN)); tassert(7047601, "Expected index scan below sort", indexScan); - indexKeyPattern = indexScan->index.keyPattern; + auto indexKeyPattern = indexScan->index.keyPattern; + + // The child must produce all of the slots required by the parent of this SortNode. + auto childReqs = reqs.copy(); + std::vector<std::string> keys; StringDataSet sortPathsSet; for (const auto& part : sortPattern) { - sortPathsSet.insert(part.fieldPath->fullPath()); + const auto& key = part.fieldPath->fullPath(); + keys.emplace_back(key); + sortPathsSet.emplace(key); } - // 'sortPatternKeyBitset' will contain a bit pattern indicating which parts of the index - // pattern are needed for sorting. 'sortPaths' will contain the field paths used in the - // sort pattern, ordered according to the index pattern. - std::vector<std::string> sortPaths; - std::tie(sortPatternKeyBitset, sortPaths) = - makeIndexKeyInclusionSet(indexKeyPattern, sortPathsSet); - childReqs.getIndexKeyBitset() = sortPatternKeyBitset | parentIndexKeyBitset; - - // Build a map that maps each field path to its position in 'sortPaths'. This will help us - // later when we need to convert the output of makeIndexKeyOutputSlotsMatchingParentReqs() - // from the index pattern's order to sort pattern's order. - for (size_t i = 0; i < sortPaths.size(); ++i) { - sortSlotPosMap.emplace(std::move(sortPaths[i]), i); - } + childReqs.setKeys(std::move(keys)); auto [stage, outputs] = build(child, childReqs); @@ -1412,26 +1320,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder sbe::value::SlotVector orderBy; std::vector<sbe::value::SortDirection> direction; - - // Handle the case where we are using IndexKeySlots. - auto indexKeySlots = *outputs.extractIndexKeySlots(); - - // Currently, 'indexKeySlots' contains slots for two kinds of index keys: - // 1. Keys requested for sort pattern - // 2. Keys requested by parent (if any) - // The first category of slots needs to go into the 'orderBy' vector. The second category - // of slots goes into 'outputs' to be used by the parent. - auto& childIndexKeyBitset = *childReqs.getIndexKeyBitset(); - - // The query planner does not support covered queries or SORT->IXSCAN plans on array fields - // for multikey indexes. Therefore we don't have to generate the parallel arrays check or - // the sort key traversal logic. - auto sortIndexKeySlots = makeIndexKeyOutputSlotsMatchingParentReqs( - indexKeyPattern, sortPatternKeyBitset, childIndexKeyBitset, indexKeySlots); - - // 'sortIndexKeySlots' is ordered according to the index pattern, but 'orderBy' needs to - // be ordered according to the sort pattern. We use 'sortIndexKeySlots' to convert from - // the index pattern's order to the sort pattern's order. orderBy.reserve(sortPattern.size()); direction.reserve(sortPattern.size()); for (const auto& part : sortPattern) { @@ -1439,19 +1327,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // contains $meta, so this assertion should always be true. tassert(7047602, "Sort with $meta is not supported in SBE", part.fieldPath); - auto it = sortSlotPosMap.find(part.fieldPath->fullPath()); - tassert(6843200, - str::stream() << "Did not find sort path '" << part.fieldPath->fullPath() - << "' in sort path map", - it != sortSlotPosMap.end()); - - auto slotPos = it->second; - tassert(6843201, - str::stream() << "Sort path map for '" << part.fieldPath->fullPath() - << "' returned an index '" << slotPos << "' that is out of bounds", - slotPos < sortIndexKeySlots.size()); - - orderBy.push_back(sortIndexKeySlots[slotPos]); + orderBy.push_back( + outputs.get(std::make_pair(PlanStageSlots::kKey, part.fieldPath->fullPath()))); direction.push_back(part.isAscending ? sbe::value::SortDirection::Ascending : sbe::value::SortDirection::Descending); } @@ -1478,24 +1355,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // Slots for sort stage to forward to parent stage. Values in these slots are not used during // sorting. - auto forwardedSlots = getSlotsToForward(childReqs, outputs); - - if (parentIndexKeyBitset.any()) { - auto parentIndexKeySlots = makeIndexKeyOutputSlotsMatchingParentReqs( - indexKeyPattern, parentIndexKeyBitset, childIndexKeyBitset, indexKeySlots); - - // The 'forwardedSlots' vector should include all slots requested by parent excluding - // any slots that appear in the 'orderBy' vector. - sbe::value::SlotSet orderBySlotsSet(orderBy.begin(), orderBy.end()); - for (auto slot : parentIndexKeySlots) { - if (!orderBySlotsSet.count(slot)) { - forwardedSlots.push_back(slot); - } - } - - // Make sure to store all of the slots requested by the parent into 'outputs'. - outputs.setIndexKeySlots(std::move(parentIndexKeySlots)); - } + auto forwardedSlots = getSlotsToForward(childReqs, outputs, orderBy); stage = sbe::makeS<sbe::SortStage>(std::move(stage), @@ -1507,11 +1367,13 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder _cq.getExpCtx()->allowDiskUse, root->nodeId()); + outputs.clearNonRequiredSlots(reqs); + return {std::move(stage), std::move(outputs)}; } std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> -SlotBasedStageBuilder::buildSortKeyGeneraror(const QuerySolutionNode* root, +SlotBasedStageBuilder::buildSortKeyGenerator(const QuerySolutionNode* root, const PlanStageReqs& reqs) { uasserted(4822883, "Sort key generator in not supported in SBE yet"); } @@ -1534,64 +1396,21 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::vector<sbe::value::SlotVector> inputKeys; std::vector<sbe::value::SlotVector> inputVals; - // Children must produce all of the slots required by the parent of this SortMergeNode. In - // addition, children must always produce a 'recordIdSlot' if the 'dedup' flag is true. - auto childReqs = reqs.copy().setIf(kRecordId, mergeSortNode->dedup); + std::vector<std::string> keys; + StringSet sortPatternSet; + for (auto&& sortPart : sortPattern) { + sortPatternSet.emplace(sortPart.fieldPath->fullPath()); + keys.emplace_back(sortPart.fieldPath->fullPath()); + } - // If a parent node has requested an index key bitset, then we will produce index keys - // corresponding to the sort pattern parts needed by said parent node. - bool parentRequestsIdxKeys = reqs.getIndexKeyBitset().has_value(); + // Children must produce all of the slots required by the parent of this SortMergeNode. In + // addition, children must always produce a 'recordIdSlot' if the 'dedup' flag is true, and + // they must produce kKey slots for each part of the sort pattern. + auto childReqs = reqs.copy().setIf(kRecordId, mergeSortNode->dedup).setKeys(std::move(keys)); for (auto&& child : mergeSortNode->children) { sbe::value::SlotVector inputKeysForChild; - // Retrieve the sort pattern provided by the subtree rooted at 'child'. In particular, if - // our child is a MERGE_SORT, it will provide the sort directly. If instead it's a tree - // containing a lone IXSCAN, we have to check the key pattern because 'providedSorts()' may - // or may not provide the baseSortPattern depending on the index bounds (in particular, - // if the bounds are fixed, the fields will be marked as 'ignored'). Otherwise, we attempt - // to retrieve it from 'providedSorts'. - auto childSortPattern = [&]() { - if (auto [msn, _] = getFirstNodeByType(child.get(), STAGE_SORT_MERGE); msn) { - auto node = static_cast<const MergeSortNode*>(msn); - return node->sort; - } else { - auto [ixn, ct] = getFirstNodeByType(child.get(), STAGE_IXSCAN); - if (ixn && ct == 1) { - auto node = static_cast<const IndexScanNode*>(ixn); - return node->index.keyPattern; - } - } - auto baseSort = child->providedSorts().getBaseSortPattern(); - tassert(6149600, - str::stream() << "Did not find sort pattern for child " << child->toString(), - !baseSort.isEmpty()); - return baseSort; - }(); - - // Map of field name to position within the index key. This is used to account for - // mismatches between the sort pattern and the index key pattern. For instance, suppose - // the requested sort is {a: 1, b: 1} and the index key pattern is {c: 1, b: 1, a: 1}. - // When the slots for the relevant components of the index key are generated (i.e. - // extract keys for 'b' and 'a'), we wish to insert them into 'inputKeys' in the order - // that they appear in the sort pattern. - StringMap<size_t> indexKeyPositionMap; - - sbe::IndexKeysInclusionSet indexKeyBitset; - size_t i = 0; - for (auto&& elt : childSortPattern) { - for (auto&& sortPart : sortPattern) { - auto path = sortPart.fieldPath->fullPath(); - if (elt.fieldNameStringData() == path) { - indexKeyBitset.set(i); - indexKeyPositionMap.emplace(path, indexKeyPositionMap.size()); - break; - } - } - ++i; - } - childReqs.getIndexKeyBitset() = indexKeyBitset; - // Children must produce a 'resultSlot' if they produce fetched results. auto [stage, outputs] = build(child.get(), childReqs); @@ -1600,30 +1419,9 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder " if the 'dedup' flag is set", !mergeSortNode->dedup || outputs.has(kRecordId)); - // Clear the index key bitset after building the child stage. - childReqs.getIndexKeyBitset() = boost::none; - - // Insert the index key slots in the order of the sort pattern. - auto indexKeys = outputs.extractIndexKeySlots(); - tassert(5184302, - "SORT_MERGE must receive index key slots as input from its child stages", - indexKeys); - for (const auto& part : sortPattern) { - auto partPath = part.fieldPath->fullPath(); - auto index = indexKeyPositionMap.find(partPath); - tassert(5184303, - str::stream() << "Could not find index key position for sort key part " - << partPath, - index != indexKeyPositionMap.end()); - auto indexPos = index->second; - tassert(5184304, - str::stream() << "Index position " << indexPos - << " is not less than number of index components " - << indexKeys->size(), - indexPos < indexKeys->size()); - auto indexKeyPart = indexKeys->at(indexPos); - inputKeysForChild.push_back(indexKeyPart); + inputKeysForChild.push_back( + outputs.get(std::make_pair(PlanStageSlots::kKey, part.fieldPath->fullPath()))); } inputKeys.push_back(std::move(inputKeysForChild)); @@ -1631,32 +1429,12 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto sv = getSlotsToForward(childReqs, outputs); - // If the parent of 'root' has requested index keys, then we need to pass along our input - // keys as input values, as they will be part of the output 'root' provides its parent. - if (parentRequestsIdxKeys) { - for (auto&& inputKey : inputKeys.back()) { - sv.push_back(inputKey); - } - } inputVals.push_back(std::move(sv)); } PlanStageSlots outputs(childReqs, &_slotIdGenerator); - auto outputVals = getSlotsToForward(childReqs, outputs); - // If the parent of 'root' has requested index keys, then we need to generate output slots to - // hold the index keys that will be used as input to the parent of 'root'. - if (parentRequestsIdxKeys) { - auto idxKeySv = sbe::makeSV(); - for (int idx = 0; idx < mergeSortNode->sort.nFields(); ++idx) { - idxKeySv.emplace_back(_slotIdGenerator.generate()); - } - outputs.setIndexKeySlots(idxKeySv); - - for (auto keySlot : idxKeySv) { - outputVals.push_back(keySlot); - } - } + auto outputVals = getSlotsToForward(childReqs, outputs); auto stage = sbe::makeS<sbe::SortedMergeStage>(std::move(inputStages), std::move(inputKeys), @@ -1674,6 +1452,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder } } + outputs.clearNonRequiredSlots(reqs); + return {std::move(stage), std::move(outputs)}; } @@ -1681,48 +1461,63 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildProjectionSimple(const QuerySolutionNode* root, const PlanStageReqs& reqs) { using namespace std::literals; - invariant(!reqs.getIndexKeyBitset()); + tassert(6023405, "buildProjectionSimple() does not support kKey", !reqs.hasKeys()); auto pn = static_cast<const ProjectionNodeSimple*>(root); - // The child must produce all of the slots required by the parent of this ProjectionNodeSimple. - // In addition to that, the child must always produce a 'resultSlot' because it's needed by the - // projection logic below. - auto childReqs = reqs.copy().set(kResult); - auto [inputStage, outputs] = build(pn->children[0].get(), childReqs); + auto [fields, additionalFields] = splitVector(reqs.getFields(), [&](const std::string& s) { + return pn->proj.type() == projection_ast::ProjectType::kInclusion + ? pn->proj.getRequiredFields().count(s) + : !pn->proj.getExcludedPaths().count(s); + }); - const auto childResult = outputs.get(kResult); + auto childReqs = reqs.copy().clearAllFields().setFields(std::move(fields)); + auto [stage, childOutputs] = build(pn->children[0].get(), childReqs); + auto outputs = std::move(childOutputs); - sbe::MakeBsonObjStage::FieldBehavior behaviour; - const OrderedPathSet* fields; - if (pn->proj.type() == projection_ast::ProjectType::kInclusion) { - behaviour = sbe::MakeBsonObjStage::FieldBehavior::keep; - fields = &pn->proj.getRequiredFields(); - } else { - behaviour = sbe::MakeBsonObjStage::FieldBehavior::drop; - fields = &pn->proj.getExcludedPaths(); - } - - outputs.set(kResult, _slotIdGenerator.generate()); - inputStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inputStage), - outputs.get(kResult), - childResult, - behaviour, - *fields, - OrderedPathSet{}, - sbe::value::SlotVector{}, - true, - false, - root->nodeId()); + if (reqs.has(kResult)) { + const auto childResult = outputs.get(kResult); + + sbe::MakeBsonObjStage::FieldBehavior behaviour; + const OrderedPathSet* fields; + if (pn->proj.type() == projection_ast::ProjectType::kInclusion) { + behaviour = sbe::MakeBsonObjStage::FieldBehavior::keep; + fields = &pn->proj.getRequiredFields(); + } else { + behaviour = sbe::MakeBsonObjStage::FieldBehavior::drop; + fields = &pn->proj.getExcludedPaths(); + } + + outputs.set(kResult, _slotIdGenerator.generate()); + stage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(stage), + outputs.get(kResult), + childResult, + behaviour, + *fields, + OrderedPathSet{}, + sbe::value::SlotVector{}, + true, + false, + root->nodeId()); + } + + auto [outStage, nothingSlots] = projectNothingToSlots( + std::move(stage), additionalFields.size(), root->nodeId(), &_slotIdGenerator); + for (size_t i = 0; i < additionalFields.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kField, std::move(additionalFields[i])), + nothingSlots[i]); + } + + outputs.clearNonRequiredSlots(reqs); - return {std::move(inputStage), std::move(outputs)}; + return {std::move(outStage), std::move(outputs)}; } std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildProjectionCovered(const QuerySolutionNode* root, const PlanStageReqs& reqs) { using namespace std::literals; - invariant(!reqs.getIndexKeyBitset()); + tassert(6023406, "buildProjectionCovered() does not support kKey", !reqs.hasKeys()); auto pn = static_cast<const ProjectionNodeCovered*>(root); invariant(pn->proj.isSimple()); @@ -1733,51 +1528,86 @@ SlotBasedStageBuilder::buildProjectionCovered(const QuerySolutionNode* root, !pn->children[0]->fetched()); // This is a ProjectionCoveredNode, so we will be pulling all the data we need from one index. - // Prepare a bitset to indicate which parts of the index key we need for the projection. - StringSet requiredFields = {pn->proj.getRequiredFields().begin(), - pn->proj.getRequiredFields().end()}; - - // The child must produce all of the slots required by the parent of this ProjectionNodeSimple, - // except for 'resultSlot' which will be produced by the MakeBsonObjStage below. In addition to - // that, the child must produce the index key slots that are needed by this covered projection. - // // pn->coveredKeyObj is the "index.keyPattern" from the child (which is either an IndexScanNode // or DistinctNode). pn->coveredKeyObj lists all the fields that the index can provide, not the - // fields that the projection wants. requiredFields lists all of the fields that the projection - // needs. Since this is a covered projection, we're guaranteed that pn->coveredKeyObj contains - // all of the fields that the projection needs. - auto childReqs = reqs.copy().clear(kResult); - - auto [indexKeyBitset, keyFieldNames] = - makeIndexKeyInclusionSet(pn->coveredKeyObj, requiredFields); - childReqs.getIndexKeyBitset() = std::move(indexKeyBitset); - - auto [inputStage, outputs] = build(pn->children[0].get(), childReqs); - - // Assert that the index scan produced index key slots for this covered projection. - auto indexKeySlots = *outputs.extractIndexKeySlots(); - - outputs.set(kResult, _slotIdGenerator.generate()); - inputStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inputStage), - outputs.get(kResult), - boost::none, - boost::none, - std::vector<std::string>{}, - std::move(keyFieldNames), - std::move(indexKeySlots), - true, - false, - root->nodeId()); + // fields that the projection wants. 'pn->proj.getRequiredFields()' lists all of the fields + // that the projection needs. Since this is a simple covered projection, we're guaranteed that + // 'pn->proj.getRequiredFields()' is a subset of pn->coveredKeyObj. + + // List out the projected fields in the order they appear in 'coveredKeyObj'. + std::vector<std::string> keys; + StringDataSet keysSet; + for (auto&& elt : pn->coveredKeyObj) { + std::string key(elt.fieldNameStringData()); + if (pn->proj.getRequiredFields().count(key)) { + keys.emplace_back(std::move(key)); + keysSet.emplace(elt.fieldNameStringData()); + } + } + + // The child must produce all of the slots required by the parent of this ProjectionNodeSimple, + // except for 'resultSlot' which will be produced by the MakeBsonObjStage below if requested by + // the caller. In addition to that, the child must produce the index key slots that are needed + // by this covered projection. + auto childReqs = reqs.copy().clear(kResult).clearAllFields().setKeys(keys); + auto [stage, childOutputs] = build(pn->children[0].get(), childReqs); + auto outputs = std::move(childOutputs); + + if (reqs.has(kResult)) { + auto indexKeySlots = sbe::makeSV(); + std::vector<std::string> keyFieldNames; - return {std::move(inputStage), std::move(outputs)}; + if (keysSet.count("_id"_sd)) { + keyFieldNames.emplace_back("_id"_sd); + indexKeySlots.emplace_back(outputs.get(std::make_pair(PlanStageSlots::kKey, "_id"_sd))); + } + + for (const auto& key : keys) { + if (key != "_id"_sd) { + keyFieldNames.emplace_back(key); + indexKeySlots.emplace_back( + outputs.get(std::make_pair(PlanStageSlots::kKey, StringData(key)))); + } + } + + auto resultSlot = _slotIdGenerator.generate(); + stage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(stage), + resultSlot, + boost::none, + boost::none, + std::vector<std::string>{}, + std::move(keyFieldNames), + std::move(indexKeySlots), + true, + false, + root->nodeId()); + + outputs.set(kResult, resultSlot); + } + + auto [fields, additionalFields] = + splitVector(reqs.getFields(), [&](const std::string& s) { return keysSet.count(s); }); + for (size_t i = 0; i < fields.size(); ++i) { + auto slot = outputs.get(std::make_pair(PlanStageSlots::kKey, StringData(fields[i]))); + outputs.set(std::make_pair(PlanStageSlots::kField, std::move(fields[i])), slot); + } + + auto [outStage, nothingSlots] = projectNothingToSlots( + std::move(stage), additionalFields.size(), root->nodeId(), &_slotIdGenerator); + for (size_t i = 0; i < additionalFields.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kField, std::move(additionalFields[i])), + nothingSlots[i]); + } + + outputs.clearNonRequiredSlots(reqs); + + return {std::move(outStage), std::move(outputs)}; } std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildProjectionDefault(const QuerySolutionNode* root, const PlanStageReqs& reqs) { - tassert(7055400, - "buildProjectionDefault() does not support index key bitsets", - !reqs.getIndexKeyBitset()); + tassert(6023407, "buildProjectionDefault() does not support kKey", !reqs.hasKeys()); auto pn = static_cast<const ProjectionNodeDefault*>(root); const auto& projection = pn->proj; @@ -1791,7 +1621,7 @@ SlotBasedStageBuilder::buildProjectionDefault(const QuerySolutionNode* root, // The child must produce all of the slots required by the parent of this ProjectionNodeDefault. // In addition to that, the child must always produce 'kResult' because it's needed by the // projection logic below. - auto childReqs = reqs.copy().set(kResult); + auto childReqs = reqs.copy().set(kResult).clearAllFields(); auto [stage, outputs] = build(pn->children[0].get(), childReqs); @@ -1805,8 +1635,10 @@ SlotBasedStageBuilder::buildProjectionDefault(const QuerySolutionNode* root, root->nodeId()); stage = resultStage.extractStage(root->nodeId()); - outputs.set(kResult, resultSlot); + + outputs.clearNonRequiredSlots(reqs); + return {std::move(stage), std::move(outputs)}; } @@ -1814,9 +1646,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildProjectionDefaultCovered(const QuerySolutionNode* root, const PlanStageReqs& reqs, const IndexScanNode* ixn) { - tassert(7055401, - "buildProjectionDefaultCovered() does not support index key bitsets", - !reqs.getIndexKeyBitset()); + tassert(6023408, "buildProjectionDefaultCovered() does not support kKey", !reqs.hasKeys()); auto pn = static_cast<const ProjectionNodeDefault*>(root); const auto& projection = pn->proj; @@ -1827,64 +1657,60 @@ SlotBasedStageBuilder::buildProjectionDefaultCovered(const QuerySolutionNode* ro tassert( 7055403, "buildProjectionDefaultCovered() expected 'pn' to not be fetched", !pn->fetched()); - // Convert projection fieldpaths into the tree of 'IndexKeyPatternTreeNode'. - IndexKeysBuilderContext context; - IndexKeysPreBuilder preVisitor{&context}; - IndexKeysInBuilder inVisitor{&context}; - IndexKeysPostBuilder postVisitor{&context}; - projection_ast::ProjectionASTConstWalker walker{&preVisitor, &inVisitor, &postVisitor}; - tree_walker::walk<true, projection_ast::ASTNode>(projection.root(), &walker); - - IndexKeyPatternTreeNode patternRoot = std::move(context.root); - - // Construct a bitset requesting slots from the underlying index scan. These slots - // correspond to index keys for projection fieldpaths. if (!ixn) { ixn = static_cast<const IndexScanNode*>(getLoneNodeByType(root, STAGE_IXSCAN)); } + auto& indexKeyPattern = ixn->index.keyPattern; - sbe::IndexKeysInclusionSet patternBitSet; + auto patternRoot = buildPatternTree(pn->proj); + std::vector<std::string> keys; + StringDataSet keysSet; std::vector<IndexKeyPatternTreeNode*> patternNodesForSlots; - size_t i = 0; for (const auto& element : indexKeyPattern) { sbe::MatchPath fieldRef{element.fieldNameStringData()}; // Projection field paths are always leaf nodes. In other words, projection like // {a: 1, 'a.b': 1} would produce a path collision error. if (auto node = patternRoot.findLeafNode(fieldRef); node) { - patternBitSet.set(i); + keys.emplace_back(element.fieldNameStringData()); + keysSet.emplace(element.fieldNameStringData()); patternNodesForSlots.push_back(node); } - - ++i; } - // We do not need index scan to restore the entire object. Instead, we will restore only - // necessary parts of it below. - auto childReqs = reqs.copy().clear(kResult); - childReqs.getIndexKeyBitset() = patternBitSet; + auto childReqs = reqs.copy().clear(kResult).clearAllFields().setKeys(keys); auto [stage, outputs] = build(pn->children[0].get(), childReqs); - auto indexKeySlots = *outputs.extractIndexKeySlots(); + auto [fields, additionalFields] = + splitVector(reqs.getFields(), [&](const std::string& s) { return keysSet.count(s); }); + for (size_t i = 0; i < fields.size(); ++i) { + auto slot = outputs.get(std::make_pair(PlanStageSlots::kKey, StringData(fields[i]))); + outputs.set(std::make_pair(PlanStageSlots::kField, std::move(fields[i])), slot); + } - // Extract slots corresponding to each of the projection fieldpaths. - invariant(indexKeySlots.size() == patternNodesForSlots.size()); - for (size_t i = 0; i < indexKeySlots.size(); i++) { - patternNodesForSlots[i]->indexKeySlot = indexKeySlots[i]; + if (reqs.has(kResult) || !additionalFields.empty()) { + // Extract slots corresponding to each of the projection fieldpaths. + for (size_t i = 0; i < keys.size(); i++) { + patternNodesForSlots[i]->indexKeySlot = + outputs.get(std::make_pair(PlanStageSlots::kKey, StringData(keys[i]))); + } + + // Finally, build the expression to create object with requested projection fieldpaths. + auto resultSlot = _slotIdGenerator.generate(); + outputs.set(kResult, resultSlot); + + stage = sbe::makeProjectStage( + std::move(stage), root->nodeId(), resultSlot, buildNewObjExpr(&patternRoot)); } - // Finally, build the expression to create object with requested projection fieldpaths. - auto resultSlot = _slotIdGenerator.generate(); - stage = sbe::makeProjectStage( - std::move(stage), root->nodeId(), resultSlot, buildNewObjExpr(&patternRoot)); + outputs.clearNonRequiredSlots(reqs); - outputs.set(kResult, resultSlot); return {std::move(stage), std::move(outputs)}; } std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildOr( const QuerySolutionNode* root, const PlanStageReqs& reqs) { - invariant(!reqs.getIndexKeyBitset()); + tassert(6023409, "buildOr() does not support kKey", !reqs.hasKeys()); sbe::PlanStage::Vector inputStages; std::vector<sbe::value::SlotVector> inputSlots; @@ -1941,7 +1767,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto textNode = static_cast<const TextMatchNode*>(root); const auto& coll = getCurrentCollection(reqs); tassert(5432212, "no collection object", coll); - tassert(5432213, "index keys requested for text match node", !reqs.getIndexKeyBitset()); + tassert(6023410, "buildTextMatch() does not support kKey", !reqs.hasKeys()); tassert(5432215, str::stream() << "text match node must have one child, but got " << root->children.size(), @@ -1992,7 +1818,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildReturnKey( const QuerySolutionNode* root, const PlanStageReqs& reqs) { - invariant(!reqs.getIndexKeyBitset()); + tassert(6023411, "buildReturnKey() does not support kKey", !reqs.hasKeys()); // TODO SERVER-49509: If the projection includes {$meta: "sortKey"}, the result of this stage // should also include the sort key. Everything else in the projection is ignored. @@ -2002,7 +1828,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // for 'resultSlot'. In addition to that, the child must always produce a 'returnKeySlot'. // After build() returns, we take the 'returnKeySlot' produced by the child and store it into // 'resultSlot' for the parent of this ReturnKeyNode to consume. - auto childReqs = reqs.copy().clear(kResult).set(kReturnKey); + auto childReqs = reqs.copy().clear(kResult).clearAllFields().set(kReturnKey); auto [stage, outputs] = build(returnKeyNode->children[0].get(), childReqs); outputs.set(kResult, outputs.get(kReturnKey)); @@ -2020,9 +1846,10 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder const QuerySolutionNode* root, const PlanStageReqs& reqs) { auto andHashNode = static_cast<const AndHashNode*>(root); + tassert(6023412, "buildAndHash() does not support kKey", !reqs.hasKeys()); tassert(5073711, "need at least two children for AND_HASH", andHashNode->children.size() >= 2); - auto childReqs = reqs.copy().set(kResult).set(kRecordId); + auto childReqs = reqs.copy().set(kResult).set(kRecordId).clearAllFields(); auto outerChild = andHashNode->children[0].get(); auto innerChild = andHashNode->children[1].get(); @@ -2049,13 +1876,13 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto collatorSlot = _data.env->getSlotIfExists("collator"_sd); // Designate outputs. - PlanStageSlots outputs(reqs, &_slotIdGenerator); + PlanStageSlots outputs; + + outputs.set(kResult, innerResultSlot); + if (reqs.has(kRecordId)) { outputs.set(kRecordId, innerIdSlot); } - if (reqs.has(kResult)) { - outputs.set(kResult, innerResultSlot); - } if (reqs.has(kSnapshotId) && innerSnapshotIdSlot) { auto slot = *innerSnapshotIdSlot; innerProjectSlots.push_back(slot); @@ -2071,26 +1898,25 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder innerProjectSlots.push_back(slot); outputs.set(kIndexKey, slot); } - if (reqs.has(kIndexKeyPattern) && innerIndexKeyPatternSlot) { auto slot = *innerIndexKeyPatternSlot; innerProjectSlots.push_back(slot); outputs.set(kIndexKeyPattern, slot); } - auto hashJoinStage = sbe::makeS<sbe::HashJoinStage>(std::move(outerStage), - std::move(innerStage), - outerCondSlots, - outerProjectSlots, - innerCondSlots, - innerProjectSlots, - collatorSlot, - root->nodeId()); + auto stage = sbe::makeS<sbe::HashJoinStage>(std::move(outerStage), + std::move(innerStage), + outerCondSlots, + outerProjectSlots, + innerCondSlots, + innerProjectSlots, + collatorSlot, + root->nodeId()); // If there are more than 2 children, iterate all remaining children and hash // join together. for (size_t i = 2; i < andHashNode->children.size(); i++) { - auto [stage, outputs] = build(andHashNode->children[i].get(), childReqs); + auto [childStage, outputs] = build(andHashNode->children[i].get(), childReqs); tassert(5073714, "outputs must contain kRecordId slot", outputs.has(kRecordId)); tassert(5073715, "outputs must contain kResult slot", outputs.has(kResult)); auto idSlot = outputs.get(kRecordId); @@ -2100,32 +1926,30 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // The previous HashJoinStage is always set as the inner stage, so that we can reuse the // innerIdSlot and innerResultSlot that have been designated as outputs. - hashJoinStage = sbe::makeS<sbe::HashJoinStage>(std::move(stage), - std::move(hashJoinStage), - condSlots, - projectSlots, - innerCondSlots, - innerProjectSlots, - collatorSlot, - root->nodeId()); - } - // Stop propagating the RecordId output if none of our ancestors are going to use it. - if (!reqs.has(kRecordId)) { - outputs.clear(kRecordId); + stage = sbe::makeS<sbe::HashJoinStage>(std::move(childStage), + std::move(stage), + condSlots, + projectSlots, + innerCondSlots, + innerProjectSlots, + collatorSlot, + root->nodeId()); } - return {std::move(hashJoinStage), std::move(outputs)}; + return {std::move(stage), std::move(outputs)}; } std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildAndSorted( const QuerySolutionNode* root, const PlanStageReqs& reqs) { + tassert(6023413, "buildAndSorted() does not support kKey", !reqs.hasKeys()); + auto andSortedNode = static_cast<const AndSortedNode*>(root); // Need at least two children. tassert( 5073706, "need at least two children for AND_SORTED", andSortedNode->children.size() >= 2); - auto childReqs = reqs.copy().set(kResult).set(kRecordId); + auto childReqs = reqs.copy().set(kResult).set(kRecordId).clearAllFields(); auto outerChild = andSortedNode->children[0].get(); auto innerChild = andSortedNode->children[1].get(); @@ -2161,13 +1985,14 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto innerKeySlots = sbe::makeSV(innerIdSlot); auto innerProjectSlots = sbe::makeSV(innerResultSlot); - PlanStageSlots outputs(reqs, &_slotIdGenerator); + // Designate outputs. + PlanStageSlots outputs; + + outputs.set(kResult, innerResultSlot); + if (reqs.has(kRecordId)) { outputs.set(kRecordId, innerIdSlot); } - if (reqs.has(kResult)) { - outputs.set(kResult, innerResultSlot); - } if (reqs.has(kSnapshotId)) { auto innerSnapshotSlot = innerOutputs.get(kSnapshotId); innerProjectSlots.push_back(innerSnapshotSlot); @@ -2183,7 +2008,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder innerProjectSlots.push_back(innerIndexKeySlot); outputs.set(kIndexKey, innerIndexKeySlot); } - if (reqs.has(kIndexKeyPattern)) { auto innerIndexKeyPatternSlot = innerOutputs.get(kIndexKeyPattern); innerProjectSlots.push_back(innerIndexKeyPatternSlot); @@ -2193,19 +2017,19 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder std::vector<sbe::value::SortDirection> sortDirs(outerKeySlots.size(), sbe::value::SortDirection::Ascending); - auto mergeJoinStage = sbe::makeS<sbe::MergeJoinStage>(std::move(outerStage), - std::move(innerStage), - outerKeySlots, - outerProjectSlots, - innerKeySlots, - innerProjectSlots, - sortDirs, - root->nodeId()); + auto stage = sbe::makeS<sbe::MergeJoinStage>(std::move(outerStage), + std::move(innerStage), + outerKeySlots, + outerProjectSlots, + innerKeySlots, + innerProjectSlots, + sortDirs, + root->nodeId()); // If there are more than 2 children, iterate all remaining children and merge // join together. for (size_t i = 2; i < andSortedNode->children.size(); i++) { - auto [stage, outputs] = build(andSortedNode->children[i].get(), childReqs); + auto [childStage, outputs] = build(andSortedNode->children[i].get(), childReqs); tassert(5073709, "outputs must contain kRecordId slot", outputs.has(kRecordId)); tassert(5073710, "outputs must contain kResult slot", outputs.has(kResult)); auto idSlot = outputs.get(kRecordId); @@ -2213,21 +2037,17 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder auto keySlots = sbe::makeSV(idSlot); auto projectSlots = sbe::makeSV(resultSlot); - mergeJoinStage = sbe::makeS<sbe::MergeJoinStage>(std::move(stage), - std::move(mergeJoinStage), - keySlots, - projectSlots, - innerKeySlots, - innerProjectSlots, - sortDirs, - root->nodeId()); - } - // Stop propagating the RecordId output if none of our ancestors are going to use it. - if (!reqs.has(kRecordId)) { - outputs.clear(kRecordId); + stage = sbe::makeS<sbe::MergeJoinStage>(std::move(childStage), + std::move(stage), + keySlots, + projectSlots, + innerKeySlots, + innerProjectSlots, + sortDirs, + root->nodeId()); } - return {std::move(mergeJoinStage), std::move(outputs)}; + return {std::move(stage), std::move(outputs)}; } namespace { @@ -2313,39 +2133,6 @@ void walkAndActOnFieldPaths(Expression* expr, const F& fn) { } /** - * Checks whether all field paths in 'idExpr' and all accumulator expressions are top-level ones. - */ -bool areAllFieldPathsOptimizable(const boost::intrusive_ptr<Expression>& idExpr, - const std::vector<AccumulationStatement>& accStmts) { - auto areFieldPathsOptimizable = true; - - auto checkFieldPath = [&](const ExpressionFieldPath* fieldExpr, int32_t nestedCondLevel) { - // We optimize neither a field path for the top-level document itself (getPathLength() == 1) - // nor a field path that refers to a variable. We can optimize only top-level fields - // (getPathLength() == 2). - // - // The 'nestedCondLevel' being > 0 means that a field path is refered to below conditional - // expressions at the parent $group node, when we cannot optimize field path access and - // therefore, cannot avoid materialization. - if (nestedCondLevel > 0 || fieldExpr->getFieldPath().getPathLength() != 2 || - fieldExpr->isVariableReference()) { - areFieldPathsOptimizable = false; - return; - } - }; - - // Checks field paths from the group-by expression. - walkAndActOnFieldPaths(idExpr.get(), checkFieldPath); - - // Checks field paths from the accumulator expressions. - for (auto&& accStmt : accStmts) { - walkAndActOnFieldPaths(accStmt.expr.argument.get(), checkFieldPath); - } - - return areFieldPathsOptimizable; -} - -/** * If there are adjacent $group stages in a pipeline and two $group stages are pushed down together, * the first $group becomes a child GROUP node and the second $group becomes a parent GROUP node in * a query solution tree. In the case that all field paths are top-level fields for the parent GROUP @@ -2362,10 +2149,7 @@ EvalStage optimizeFieldPaths(StageBuilderState& state, const PlanStageSlots& childOutputs, PlanNodeId nodeId) { using namespace fmt::literals; - auto optionalRootSlot = childOutputs.getIfExists(SlotBasedStageBuilder::kResult); - // Absent root slot means that we optimized away mkbson stage and so, we need to search - // top-level field names in child outputs. - auto searchInChildOutputs = !optionalRootSlot.has_value(); + auto rootSlot = childOutputs.getIfExists(PlanStageSlots::kResult); auto retEvalStage = std::move(childEvalStage); walkAndActOnFieldPaths(expr.get(), [&](const ExpressionFieldPath* fieldExpr, int32_t) { @@ -2376,24 +2160,10 @@ EvalStage optimizeFieldPaths(StageBuilderState& state, } auto fieldPathStr = fieldExpr->getFieldPath().fullPath(); - if (searchInChildOutputs) { - if (auto optionalFieldPathSlot = childOutputs.getIfExists(fieldPathStr); - optionalFieldPathSlot) { - state.preGeneratedExprs.emplace(fieldPathStr, *optionalFieldPathSlot); - } else { - // getField('fieldPathStr') on a slot containing a BSONObj would have produced - // 'Nothing' if mkbson stage removal optimization didn't occur. So, generates a - // 'Nothing' const expression to simulate such a result. - state.preGeneratedExprs.emplace( - fieldPathStr, sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Nothing, 0)); - } - - return; - } if (!state.preGeneratedExprs.contains(fieldPathStr)) { auto [curEvalExpr, curEvalStage] = generateExpression( - state, fieldExpr, std::move(retEvalStage), optionalRootSlot, nodeId); + state, fieldExpr, std::move(retEvalStage), rootSlot, nodeId, &childOutputs); auto [slot, stage] = projectEvalExpr( std::move(curEvalExpr), std::move(curEvalStage), nodeId, state.slotIdGenerator); @@ -2418,7 +2188,7 @@ std::pair<EvalExpr, EvalStage> generateGroupByKeyImpl( optimizeFieldPaths(state, idExpr, std::move(childEvalStage), childOutputs, nodeId); auto [groupByEvalExpr, groupByEvalStage] = stage_builder::generateExpression( - state, idExpr.get(), std::move(evalStage), optionalRootSlot, nodeId); + state, idExpr.get(), std::move(evalStage), optionalRootSlot, nodeId, &childOutputs); return {std::move(groupByEvalExpr), std::move(groupByEvalStage)}; } @@ -2430,7 +2200,7 @@ std::tuple<sbe::value::SlotVector, EvalStage, std::unique_ptr<sbe::EExpression>> std::unique_ptr<sbe::PlanStage> childStage, PlanNodeId nodeId, sbe::value::SlotIdGenerator* slotIdGenerator) { - auto optionalRootSlot = childOutputs.getIfExists(SlotBasedStageBuilder::kResult); + auto optionalRootSlot = childOutputs.getIfExists(PlanStageSlots::kResult); EvalStage retEvalStage{std::move(childStage), optionalRootSlot ? sbe::value::SlotVector{*optionalRootSlot} : sbe::value::SlotVector{}}; @@ -2510,10 +2280,10 @@ std::tuple<sbe::value::SlotVector, EvalStage> generateAccumulator( PlanNodeId nodeId, sbe::value::SlotIdGenerator* slotIdGenerator, sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>>& accSlotToExprMap) { - // Input fields may need field traversal which ends up being a complex tree. + // Input fields may need field traversal. auto evalStage = optimizeFieldPaths( state, accStmt.expr.argument, std::move(childEvalStage), childOutputs, nodeId); - auto optionalRootSlot = childOutputs.getIfExists(SlotBasedStageBuilder::kResult); + auto optionalRootSlot = childOutputs.getIfExists(PlanStageSlots::kResult); auto [argExpr, accArgEvalStage] = stage_builder::buildArgument( state, accStmt, std::move(evalStage), optionalRootSlot, nodeId); @@ -2642,6 +2412,7 @@ sbe::value::SlotVector dedupGroupBySlots(const sbe::value::SlotVector& groupBySl std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildGroup( const QuerySolutionNode* root, const PlanStageReqs& reqs) { using namespace fmt::literals; + tassert(6023414, "buildGroup() does not support kKey", !reqs.hasKeys()); auto groupNode = static_cast<const GroupNode*>(root); auto nodeId = groupNode->nodeId(); @@ -2657,14 +2428,20 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder const auto& childNode = groupNode->children[0].get(); const auto& accStmts = groupNode->accumulators; - auto childStageType = childNode->getType(); - auto childReqs = reqs.copy(); - if (childStageType == StageType::STAGE_GROUP && areAllFieldPathsOptimizable(idExpr, accStmts)) { - // Does not ask the GROUP child for the result slot to avoid unnecessary materialization if - // all fields are top-level fields. See the end of this function. For example, GROUP - GROUP - // - COLLSCAN case. + auto childReqs = reqs.copy().set(kResult).clearAllFields(); + + // Don't ask the GROUP child for the result slot to avoid unnecessary materialization if it's + // possible to get everything we need from top-level field slots. + if (childNode->getType() == StageType::STAGE_GROUP && !groupNode->needWholeDocument && + !groupNode->needsAnyMetadata) { childReqs.clear(kResult); + + for (auto&& pathStr : groupNode->requiredFields) { + auto path = sbe::MatchPath{pathStr}; + const auto& topLevelField = path.getPart(0); + childReqs.set(std::make_pair(PlanStageSlots::kField, topLevelField)); + } } // Builds the child and gets the child result slot. @@ -2733,6 +2510,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder aggSlotsVec, nodeId, &_slotIdGenerator); + auto stage = groupFinalEvalStage.extractStage(nodeId); tassert(5851605, "The number of final slots must be as 1 (the final group-by slot) + the number of acc " @@ -2742,21 +2520,37 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // Cleans up optimized expressions. _state.preGeneratedExprs.clear(); + auto fieldNamesSet = StringDataSet{fieldNames.begin(), fieldNames.end()}; + auto [fields, additionalFields] = + splitVector(reqs.getFields(), [&](const std::string& s) { return fieldNamesSet.count(s); }); + auto fieldsSet = StringDataSet{fields.begin(), fields.end()}; + PlanStageSlots outputs; - std::unique_ptr<sbe::PlanStage> outStage; - // Builds a stage to create a result object out of a group-by slot and gathered accumulator - // result slots if the parent node requests so. Otherwise, returns field names and associated - // slots so that a parent stage above can directly refer to a slot by its name because there's - // no returned object. + for (size_t i = 0; i < fieldNames.size(); ++i) { + if (fieldsSet.count(fieldNames[i])) { + outputs.set(std::make_pair(PlanStageSlots::kField, fieldNames[i]), finalSlots[i]); + } + }; + + auto [outStage, nothingSlots] = projectNothingToSlots( + std::move(stage), additionalFields.size(), root->nodeId(), &_slotIdGenerator); + for (size_t i = 0; i < additionalFields.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kField, std::move(additionalFields[i])), + nothingSlots[i]); + } + + // Builds a outStage to create a result object out of a group-by slot and gathered accumulator + // result slots if the parent node requests so. if (reqs.has(kResult)) { - outputs.set(kResult, _slotIdGenerator.generate()); + auto resultSlot = _slotIdGenerator.generate(); + outputs.set(kResult, resultSlot); // This mkbson stage combines 'finalSlots' into a bsonObject result slot which has // 'fieldNames' fields. if (groupNode->shouldProduceBson) { - outStage = sbe::makeS<sbe::MakeBsonObjStage>(groupFinalEvalStage.extractStage(nodeId), - outputs.get(kResult), // objSlot - boost::none, // rootSlot - boost::none, // fieldBehavior + outStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(outStage), + resultSlot, // objSlot + boost::none, // rootSlot + boost::none, // fieldBehavior std::vector<std::string>{}, // fields std::move(fieldNames), // projectFields std::move(finalSlots), // projectVars @@ -2764,8 +2558,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder false, // returnOldObject nodeId); } else { - outStage = sbe::makeS<sbe::MakeObjStage>(groupFinalEvalStage.extractStage(nodeId), - outputs.get(kResult), // objSlot + outStage = sbe::makeS<sbe::MakeObjStage>(std::move(outStage), + resultSlot, // objSlot boost::none, // rootSlot boost::none, // fieldBehavior std::vector<std::string>{}, // fields @@ -2775,12 +2569,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder false, // returnOldObject nodeId); } - } else { - for (size_t i = 0; i < finalSlots.size(); ++i) { - outputs.set("CURRENT." + fieldNames[i], finalSlots[i]); - }; - - outStage = groupFinalEvalStage.extractStage(nodeId); } return {std::move(outStage), std::move(outputs)}; @@ -2790,7 +2578,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::makeUnionForTailableCollScan(const QuerySolutionNode* root, const PlanStageReqs& reqs) { using namespace std::literals; - invariant(!reqs.getIndexKeyBitset()); + tassert(6023415, "makeUnionForTailableCollScan() does not support kKey", !reqs.hasKeys()); // Register a SlotId in the global environment which would contain a recordId to resume a // tailable collection scan from. A PlanStage executor will track the last seen recordId and @@ -2814,7 +2602,7 @@ SlotBasedStageBuilder::makeUnionForTailableCollScan(const QuerySolutionNode* roo childReqs.setIsTailableCollScanResumeBranch(isTailableCollScanResumeBranch); auto [branch, outputs] = build(root, childReqs); - auto branchSlots = getSlotsToForward(reqs, outputs); + auto branchSlots = getSlotsToForward(childReqs, outputs); return {std::move(branchSlots), std::move(branch)}; }; @@ -2877,57 +2665,50 @@ auto buildShardFilterGivenShardKeySlot(sbe::value::SlotId shardKeySlot, } // namespace std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> -SlotBasedStageBuilder::buildShardFilterCovered(const ShardingFilterNode* filterNode, - sbe::value::SlotId shardFiltererSlot, - BSONObj shardKeyPattern, - BSONObj indexKeyPattern, - const QuerySolutionNode* child, - PlanStageReqs childReqs) { - StringDataSet shardKeyFields; - for (auto&& shardKeyElt : shardKeyPattern) { - shardKeyFields.insert(shardKeyElt.fieldNameStringData()); - } +SlotBasedStageBuilder::buildShardFilterCovered(const QuerySolutionNode* root, + const PlanStageReqs& reqs) { + // Constructs an optimized SBE plan for 'filterNode' in the case that the fields of the + // 'shardKeyPattern' are provided by 'child'. In this case, the SBE tree for 'child' will + // fill out slots for the necessary components of the index key. These slots can be read + // directly in order to determine the shard key that should be passed to the + // 'shardFiltererSlot'. + const auto filterNode = static_cast<const ShardingFilterNode*>(root); + auto child = filterNode->children[0].get(); + tassert(6023416, + "buildShardFilterCovered() expects ixscan below shard filter", + child->getType() == STAGE_IXSCAN || child->getType() == STAGE_VIRTUAL_SCAN); - // Save the bit vector describing the fields from the index that our parent requires. The shard - // filtering process may require additional fields that are not needed by the parent (for - // example, if the parent is projecting field "a" but the shard key is {a: 1, b: 1}). We will - // need the parent's reqs later on so that we can hand the correct slot vector for these fields - // back to our parent. - auto parentIndexKeyReqs = childReqs.getIndexKeyBitset(); + // Extract the child's key pattern. + BSONObj indexKeyPattern = child->getType() == STAGE_IXSCAN + ? static_cast<const IndexScanNode*>(child)->index.keyPattern + : static_cast<const VirtualScanNode*>(child)->indexKeyPattern; - // Determine the set of fields from the index required to obtain the shard key and union those - // with the set of fields from the index required by the parent stage. - auto [shardKeyIndexReqs, _] = makeIndexKeyInclusionSet(indexKeyPattern, shardKeyFields); - const auto ixKeyBitset = - parentIndexKeyReqs.value_or(sbe::IndexKeysInclusionSet{}) | shardKeyIndexReqs; - childReqs.getIndexKeyBitset() = ixKeyBitset; + auto childReqs = reqs.copy(); + + // If we're sharded make sure that we don't return data that isn't owned by the shard. This + // situation can occur when pending documents from in-progress migrations are inserted and when + // there are orphaned documents from aborted migrations. To check if the document is owned by + // the shard, we need to own a 'ShardFilterer', and extract the document's shard key as a + // BSONObj. + auto shardKeyPattern = _collections.getMainCollection().getShardKeyPattern(); + // We register the "shardFilterer" slot but we don't construct the ShardFilterer here, because + // once constructed the ShardFilterer will prevent orphaned documents from being deleted. We + // will construct the ShardFilterer later while preparing the SBE tree for execution. + auto shardFiltererSlot = _data.env->registerSlot( + "shardFilterer"_sd, sbe::value::TypeTags::Nothing, 0, false, &_slotIdGenerator); + + for (auto&& shardKeyElt : shardKeyPattern) { + childReqs.set(std::make_pair(PlanStageSlots::kKey, shardKeyElt.fieldNameStringData())); + } auto [stage, outputs] = build(child, childReqs); - tassert(5562302, "Expected child to produce index key slots", outputs.getIndexKeySlots()); - - // Maps from key name -> (index in outputs.getIndexKeySlots(), is hashed). - auto ixKeyPatternFieldToSlotIdx = [&, childOutputs = std::ref(outputs)]() { - StringDataMap<std::pair<sbe::value::SlotId, bool>> ret; - - // Keeps track of which component we're reading in the index key pattern. - size_t i = 0; - // Keeps track of the index we are in the slot vector produced by the ix scan. The slot - // vector produced by the ix scan may be a subset of the key pattern. - size_t slotIdx = 0; - for (auto&& ixPatternElt : indexKeyPattern) { - if (shardKeyFields.count(ixPatternElt.fieldNameStringData())) { - const bool isHashed = ixPatternElt.valueStringData() == IndexNames::HASHED; - const auto slotId = (*childOutputs.get().getIndexKeySlots())[slotIdx]; - ret.emplace(ixPatternElt.fieldNameStringData(), std::make_pair(slotId, isHashed)); - } - if (ixKeyBitset[i]) { - ++slotIdx; - } - ++i; - } - return ret; - }(); + // Maps from key name to a bool that indicates whether the key is hashed. + StringDataMap<bool> indexKeyPatternMap; + for (auto&& ixPatternElt : indexKeyPattern) { + indexKeyPatternMap.emplace(ixPatternElt.fieldNameStringData(), + ShardKeyPattern::isHashedPatternEl(ixPatternElt)); + } // Build a project stage to deal with hashed shard keys. This step *could* be skipped if we're // dealing with non-hashed sharding, but it's done this way for sake of simplicity. @@ -2935,33 +2716,28 @@ SlotBasedStageBuilder::buildShardFilterCovered(const ShardingFilterNode* filterN sbe::value::SlotVector fieldSlots; std::vector<std::string> projectFields; for (auto&& shardKeyPatternElt : shardKeyPattern) { - auto it = ixKeyPatternFieldToSlotIdx.find(shardKeyPatternElt.fieldNameStringData()); - tassert(5562303, "Could not find element", it != ixKeyPatternFieldToSlotIdx.end()); - auto [slotId, ixKeyEltHashed] = it->second; + auto it = indexKeyPatternMap.find(shardKeyPatternElt.fieldNameStringData()); + tassert(5562303, "Could not find element", it != indexKeyPatternMap.end()); + const auto ixKeyEltHashed = it->second; + const auto slotId = outputs.get( + std::make_pair(PlanStageSlots::kKey, shardKeyPatternElt.fieldNameStringData())); // Get the value stored in the index for this component of the shard key. We may have to // hash it. auto elem = makeVariable(slotId); + // Handle the case where the index key or shard key is hashed. const bool shardKeyEltHashed = ShardKeyPattern::isHashedPatternEl(shardKeyPatternElt); - - if (shardKeyEltHashed) { - if (ixKeyEltHashed) { - // (1) The index stores hashed data and the shard key field is hashed. - // Nothing to do here. We can apply shard filtering with no other changes. - } else { - // (2) The shard key field is hashed but the index stores unhashed data. We must - // apply the hash function before passing this off to the shard filter. - elem = makeFunction("shardHash"_sd, std::move(elem)); - } - } else { - if (ixKeyEltHashed) { - // (3) The index stores hashed data but the shard key is not hashed. This is a bug. - MONGO_UNREACHABLE_TASSERT(5562300); - } else { - // (4) The shard key field is not hashed, and the index does not store hashed data. - // Again, we do nothing here. - } + if (ixKeyEltHashed) { + // If the index stores hashed data, then we know the shard key field is hashed as + // well. Nothing to do here. We can apply shard filtering with no other changes. + tassert(6023421, + "Index key is hashed, expected corresponding shard key to be hashed", + shardKeyEltHashed); + } else if (shardKeyEltHashed) { + // The shard key field is hashed but the index stores unhashed data. We must apply + // the hash function before passing this off to the shard filter. + elem = makeFunction("shardHash"_sd, std::move(elem)); } fieldSlots.push_back(_slotIdGenerator.generate()); @@ -2987,46 +2763,17 @@ SlotBasedStageBuilder::buildShardFilterCovered(const ShardingFilterNode* filterN auto filterStage = buildShardFilterGivenShardKeySlot( shardKeySlot, std::move(mkObjStage), shardFiltererSlot, filterNode->nodeId()); - outputs.setIndexKeySlots(!parentIndexKeyReqs ? boost::none - : boost::optional<sbe::value::SlotVector>{ - makeIndexKeyOutputSlotsMatchingParentReqs( - indexKeyPattern, - *parentIndexKeyReqs, - *childReqs.getIndexKeyBitset(), - *outputs.getIndexKeySlots())}); + outputs.clearNonRequiredSlots(reqs); return {std::move(filterStage), std::move(outputs)}; } std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder::buildShardFilter( const QuerySolutionNode* root, const PlanStageReqs& reqs) { - const auto filterNode = static_cast<const ShardingFilterNode*>(root); - - // If we're sharded make sure that we don't return data that isn't owned by the shard. This - // situation can occur when pending documents from in-progress migrations are inserted and when - // there are orphaned documents from aborted migrations. To check if the document is owned by - // the shard, we need to own a 'ShardFilterer', and extract the document's shard key as a - // BSONObj. - auto shardKeyPattern = _collections.getMainCollection().getShardKeyPattern(); - // We register the "shardFilterer" slot but not construct the ShardFilterer here is because once - // constructed the ShardFilterer will prevent orphaned documents from being deleted. We will - // construct the 'ShardFiltered' later while preparing the SBE tree for execution. - auto shardFiltererSlot = _data.env->registerSlot( - "shardFilterer"_sd, sbe::value::TypeTags::Nothing, 0, false, &_slotIdGenerator); - - // Determine if our child is an index scan and extract it's key pattern, or empty BSONObj if our - // child is not an IXSCAN node. - BSONObj indexKeyPattern = [&]() { - auto childNode = filterNode->children[0].get(); - switch (childNode->getType()) { - case StageType::STAGE_IXSCAN: - return static_cast<const IndexScanNode*>(childNode)->index.keyPattern; - case StageType::STAGE_VIRTUAL_SCAN: - return static_cast<const VirtualScanNode*>(childNode)->indexKeyPattern; - default: - return BSONObj{}; - } - }(); + auto child = root->children[0].get(); + bool childIsIndexScan = child->getType() == STAGE_IXSCAN || + (child->getType() == STAGE_VIRTUAL_SCAN && + !static_cast<const VirtualScanNode*>(child)->indexKeyPattern.isEmpty()); // If we're not required to fill out the 'kResult' slot, then instead we can request a slot from // the child for each of the fields which constitute the shard key. This allows us to avoid @@ -3036,17 +2783,25 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // We only apply this optimization in the special case that the child QSN is an IXSCAN, since in // this case we can request exactly the fields we need according to their position in the index // key pattern. - auto childReqs = reqs.copy().setIf(kResult, indexKeyPattern.isEmpty()); - if (!childReqs.has(kResult)) { - return buildShardFilterCovered(filterNode, - shardFiltererSlot, - shardKeyPattern, - std::move(indexKeyPattern), - filterNode->children[0].get(), - std::move(childReqs)); + if (!reqs.has(kResult) && childIsIndexScan) { + return buildShardFilterCovered(root, reqs); } - auto [stage, outputs] = build(filterNode->children[0].get(), childReqs); + auto childReqs = reqs.copy().set(kResult); + + // If we're sharded make sure that we don't return data that isn't owned by the shard. This + // situation can occur when pending documents from in-progress migrations are inserted and when + // there are orphaned documents from aborted migrations. To check if the document is owned by + // the shard, we need to own a 'ShardFilterer', and extract the document's shard key as a + // BSONObj. + auto shardKeyPattern = _collections.getMainCollection().getShardKeyPattern(); + // We register the "shardFilterer" slot but we don't construct the ShardFilterer here, because + // once constructed the ShardFilterer will prevent orphaned documents from being deleted. We + // will construct the ShardFilterer later while preparing the SBE tree for execution. + auto shardFiltererSlot = _data.env->registerSlot( + "shardFilterer"_sd, sbe::value::TypeTags::Nothing, 0, false, &_slotIdGenerator); + + auto [stage, outputs] = build(child, childReqs); // Build an expression to extract the shard key from the document based on the shard key // pattern. To do this, we iterate over the shard key pattern parts and build nested 'getField' @@ -3136,7 +2891,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder {STAGE_SKIP, &SlotBasedStageBuilder::buildSkip}, {STAGE_SORT_SIMPLE, &SlotBasedStageBuilder::buildSort}, {STAGE_SORT_DEFAULT, &SlotBasedStageBuilder::buildSort}, - {STAGE_SORT_KEY_GENERATOR, &SlotBasedStageBuilder::buildSortKeyGeneraror}, + {STAGE_SORT_KEY_GENERATOR, &SlotBasedStageBuilder::buildSortKeyGenerator}, {STAGE_PROJECTION_SIMPLE, &SlotBasedStageBuilder::buildProjectionSimple}, {STAGE_PROJECTION_DEFAULT, &SlotBasedStageBuilder::buildProjectionDefault}, {STAGE_PROJECTION_COVERED, &SlotBasedStageBuilder::buildProjectionCovered}, @@ -3178,6 +2933,29 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder break; } - return std::invoke(kStageBuilders.at(root->getType()), *this, root, reqs); + auto [stage, slots] = std::invoke(kStageBuilders.at(root->getType()), *this, root, reqs); + auto outputs = std::move(slots); + + auto fields = filterVector(reqs.getFields(), [&](const std::string& s) { + return !outputs.has(std::make_pair(PlanStageSlots::kField, StringData(s))); + }); + + if (!fields.empty()) { + tassert(6023424, + str::stream() << "Expected build() for " << stageTypeToString(root->getType()) + << " to either produce a kResult slot or to satisfy all kField reqs", + outputs.has(PlanStageSlots::kResult)); + + auto resultSlot = outputs.get(PlanStageSlots::kResult); + auto [outStage, slots] = projectTopLevelFields( + std::move(stage), fields, resultSlot, root->nodeId(), &_slotIdGenerator); + stage = std::move(outStage); + + for (size_t i = 0; i < fields.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kField, std::move(fields[i])), slots[i]); + } + } + + return {std::move(stage), std::move(outputs)}; } } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h index 14bffbc1aa3..081660c43b1 100644 --- a/src/mongo/db/query/sbe_stage_builder.h +++ b/src/mongo/db/query/sbe_stage_builder.h @@ -40,6 +40,7 @@ #include "mongo/db/query/sbe_stage_builder_helpers.h" #include "mongo/db/query/shard_filterer_factory_interface.h" #include "mongo/db/query/stage_builder.h" +#include "mongo/util/pair_map.h" namespace mongo::stage_builder { /** @@ -51,6 +52,12 @@ std::unique_ptr<sbe::RuntimeEnvironment> makeRuntimeEnvironment( OperationContext* opCtx, sbe::value::SlotIdGenerator* slotIdGenerator); +class PlanStageReqs; +class PlanStageSlots; +sbe::value::SlotVector getSlotsToForward(const PlanStageReqs& reqs, + const PlanStageSlots& outputs, + const sbe::value::SlotVector& exclude = sbe::makeSV()); + /** * This function prepares the SBE tree for execution, such as attaching the OperationContext, * ensuring that the SBE tree is registered with the PlanYieldPolicySBE and populating the @@ -105,78 +112,86 @@ struct ParameterizedIndexScanSlots { */ class PlanStageSlots { public: - static constexpr StringData kResult = "result"_sd; - static constexpr StringData kRecordId = "recordId"_sd; - static constexpr StringData kReturnKey = "returnKey"_sd; - static constexpr StringData kSnapshotId = "snapshotId"_sd; - static constexpr StringData kIndexId = "indexId"_sd; - static constexpr StringData kIndexKey = "indexKey"_sd; - static constexpr StringData kIndexKeyPattern = "indexKeyPattern"_sd; + // The _slots map is capable of holding different "classes" of slots: + // 1) kMeta slots are used to hold the current document (kResult), record ID (kRecordId), and + // various pieces of metadata. + // 2) kField slots represent the values of top-level fields, or in some cases of dotted field + // paths (when we are getting the dotted field from a non-multikey index and we know no array + // traversal is needed). These slots hold the actual values of the fields / field paths (not + // the sort key or collation comparison key for the field). + // 3) kKey slots represent the raw key value that comes from an ixscan / ixseek stage for a + // given field path. This raw key value can be used for sorting / comparison, but it is not + // always equal to the actual value of the field path (for example, if the key is coming from + // an index that has a non-simple collation). + enum class Type { + kMeta, + kField, + kKey, + }; + + using Name = std::pair<Type, StringData>; + using OwnedName = std::pair<Type, std::string>; + + static constexpr auto kField = Type::kField; + static constexpr auto kKey = Type::kKey; + static constexpr auto kMeta = Type::kMeta; + + static constexpr Name kResult = {kMeta, "result"_sd}; + static constexpr Name kRecordId = {kMeta, "recordId"_sd}; + static constexpr Name kReturnKey = {kMeta, "returnKey"_sd}; + static constexpr Name kSnapshotId = {kMeta, "snapshotId"_sd}; + static constexpr Name kIndexId = {kMeta, "indexId"_sd}; + static constexpr Name kIndexKey = {kMeta, "indexKey"_sd}; + static constexpr Name kIndexKeyPattern = {kMeta, "indexKeyPattern"_sd}; PlanStageSlots() = default; PlanStageSlots(const PlanStageReqs& reqs, sbe::value::SlotIdGenerator* slotIdGenerator); - bool has(StringData str) const { + bool has(const Name& str) const { return _slots.count(str); } - sbe::value::SlotId get(StringData str) const { + sbe::value::SlotId get(const Name& str) const { auto it = _slots.find(str); invariant(it != _slots.end()); return it->second; } - boost::optional<sbe::value::SlotId> getIfExists(StringData str) const { + boost::optional<sbe::value::SlotId> getIfExists(const Name& str) const { if (auto it = _slots.find(str); it != _slots.end()) { return it->second; } return boost::none; } - void set(StringData str, sbe::value::SlotId slot) { - _slots[str] = slot; - } - - void clear(StringData str) { - _slots.erase(str); + void set(const Name& str, sbe::value::SlotId slot) { + _slots.insert_or_assign(str, slot); } - const boost::optional<sbe::value::SlotVector>& getIndexKeySlots() const { - return _indexKeySlots; + void set(OwnedName str, sbe::value::SlotId slot) { + _slots.insert_or_assign(std::move(str), slot); } - boost::optional<sbe::value::SlotVector> extractIndexKeySlots() { - ON_BLOCK_EXIT([this] { _indexKeySlots = boost::none; }); - return std::move(_indexKeySlots); - } - - void setIndexKeySlots(sbe::value::SlotVector iks) { - _indexKeySlots = std::move(iks); - } - - void setIndexKeySlots(boost::optional<sbe::value::SlotVector> iks) { - _indexKeySlots = std::move(iks); + void clear(const Name& str) { + _slots.erase(str); } /** - * This method applies an action to some/all of the slots within this struct (excluding index - * key slots). For each slot in this struct, the action is will be applied to the slot if (and - * only if) the corresponding flag in 'reqs' is true. + * This method applies an action to some/all of the slots within this struct. For each slot in + * this struct, the action is will be applied to the slot if (and only if) the corresponding + * flag in 'reqs' is true. */ inline void forEachSlot(const PlanStageReqs& reqs, const std::function<void(sbe::value::SlotId)>& fn) const; - inline void forEachSlot( - const PlanStageReqs& reqs, - const std::function<void(sbe::value::SlotId, const StringData&)>& fn) const; + inline void forEachSlot(const PlanStageReqs& reqs, + const std::function<void(sbe::value::SlotId, const Name&)>& fn) const; -private: - StringMap<sbe::value::SlotId> _slots; + inline void clearNonRequiredSlots(const PlanStageReqs& reqs); - // When an index scan produces parts of an index key for a covered plan, this is where the - // slots for the produced values are stored. - boost::optional<sbe::value::SlotVector> _indexKeySlots; +private: + PairMap<Type, std::string, sbe::value::SlotId> _slots; }; /** @@ -185,38 +200,71 @@ private: */ class PlanStageReqs { public: + using Type = PlanStageSlots::Type; + using Name = PlanStageSlots::Name; + using OwnedName = std::pair<Type, std::string>; + + static constexpr auto kField = PlanStageSlots::Type::kField; + static constexpr auto kKey = PlanStageSlots::Type::kKey; + static constexpr auto kMeta = PlanStageSlots::Type::kMeta; + PlanStageReqs copy() const { return *this; } - bool has(StringData str) const { + bool has(const Name& str) const { auto it = _slots.find(str); return it != _slots.end() && it->second; } - PlanStageReqs& set(StringData str) { - _slots[str] = true; + PlanStageReqs& set(const Name& str) { + _slots.insert_or_assign(str, true); + return *this; + } + + PlanStageReqs& set(OwnedName str) { + _slots.insert_or_assign(std::move(str), true); + return *this; + } + + PlanStageReqs& set(const std::vector<Name>& strs) { + for (size_t i = 0; i < strs.size(); ++i) { + _slots.insert_or_assign(strs[i], true); + } + return *this; + } + + PlanStageReqs& set(std::vector<OwnedName> strs) { + for (size_t i = 0; i < strs.size(); ++i) { + _slots.insert_or_assign(std::move(strs[i]), true); + } return *this; } - PlanStageReqs& setIf(StringData str, bool condition) { + PlanStageReqs& setIf(const Name& str, bool condition) { if (condition) { - _slots[str] = true; + _slots.insert_or_assign(str, true); } return *this; } - PlanStageReqs& clear(StringData str) { - _slots.erase(str); + PlanStageReqs& setFields(std::vector<std::string> strs) { + for (size_t i = 0; i < strs.size(); ++i) { + _slots.insert_or_assign(std::make_pair(kField, std::move(strs[i])), true); + } return *this; } - boost::optional<sbe::IndexKeysInclusionSet>& getIndexKeyBitset() { - return _indexKeyBitset; + PlanStageReqs& setKeys(std::vector<std::string> strs) { + for (size_t i = 0; i < strs.size(); ++i) { + _slots.insert_or_assign(std::make_pair(kKey, std::move(strs[i])), true); + } + return *this; } - const boost::optional<sbe::IndexKeysInclusionSet>& getIndexKeyBitset() const { - return _indexKeyBitset; + PlanStageReqs& clear(const Name& str) { + _slots.erase(str); + return *this; } bool getIsBuildingUnionForTailableCollScan() const { @@ -243,6 +291,52 @@ public: return _targetNamespace; } + bool hasType(Type t) const { + for (auto&& [name, isRequired] : _slots) { + if (isRequired && name.first == t) { + return true; + } + } + return false; + } + bool hasFields() const { + return hasType(kField); + } + bool hasKeys() const { + return hasType(kKey); + } + + std::vector<std::string> getOfType(Type t) const { + std::vector<std::string> res; + for (auto&& [name, isRequired] : _slots) { + if (isRequired && name.first == t) { + res.push_back(name.second); + } + } + std::sort(res.begin(), res.end()); + return res; + } + std::vector<std::string> getFields() const { + return getOfType(kField); + } + std::vector<std::string> getKeys() const { + return getOfType(kKey); + } + + PlanStageReqs& clearAllOfType(Type t) { + auto fields = getOfType(t); + for (const auto& field : fields) { + _slots.erase(std::make_pair(kField, StringData(field))); + } + return *this; + } + PlanStageReqs& clearAllFields() { + return clearAllOfType(kField); + } + PlanStageReqs& clearAllKeys() { + return clearAllOfType(kKey); + } + friend PlanStageSlots::PlanStageSlots(const PlanStageReqs& reqs, sbe::value::SlotIdGenerator* slotIdGenerator); @@ -251,14 +345,12 @@ public: friend void PlanStageSlots::forEachSlot( const PlanStageReqs& reqs, - const std::function<void(sbe::value::SlotId, const StringData&)>& fn) const; + const std::function<void(sbe::value::SlotId, const Name&)>& fn) const; -private: - StringMap<bool> _slots; + friend void PlanStageSlots::clearNonRequiredSlots(const PlanStageReqs& reqs); - // A bitset here indicates that we have a covered projection that is expecting to parts of the - // index key from an index scan. - boost::optional<sbe::IndexKeysInclusionSet> _indexKeyBitset; +private: + PairMap<Type, std::string, bool> _slots; // When we're in the middle of building a special union sub-tree implementing a tailable cursor // collection scan, this flag will be set to true. Otherwise this flag will be false. @@ -283,11 +375,11 @@ void PlanStageSlots::forEachSlot(const PlanStageReqs& reqs, // Clang raises an error if we attempt to use 'name' in the tassert() below, because // tassert() is a macro that uses lambdas and 'name' is defined via "local binding". // We work-around this by copying 'name' to a local variable 'slotName'. - auto slotName = StringData(name); + auto slotName = Name(name); auto it = _slots.find(slotName); tassert(7050900, - str::stream() << "Could not find '" << slotName - << "' slot in the map, expected slot to exist", + str::stream() << "Could not find " << static_cast<int>(slotName.first) << ":'" + << slotName.second << "' in the slot map, expected slot to exist", it != _slots.end()); fn(it->second); @@ -297,17 +389,17 @@ void PlanStageSlots::forEachSlot(const PlanStageReqs& reqs, void PlanStageSlots::forEachSlot( const PlanStageReqs& reqs, - const std::function<void(sbe::value::SlotId, const StringData&)>& fn) const { + const std::function<void(sbe::value::SlotId, const Name&)>& fn) const { for (auto&& [name, isRequired] : reqs._slots) { if (isRequired) { // Clang raises an error if we attempt to use 'name' in the tassert() below, because // tassert() is a macro that uses lambdas and 'name' is defined via "local binding". // We work-around this by copying 'name' to a local variable 'slotName'. - auto slotName = StringData(name); + auto slotName = Name(name); auto it = _slots.find(slotName); tassert(7050901, - str::stream() << "Could not find '" << slotName - << "' slot in the map, expected slot to exist", + str::stream() << "Could not find " << static_cast<int>(slotName.first) << ":'" + << slotName.second << "' in the slot map, expected slot to exist", it != _slots.end()); fn(it->second, slotName); @@ -315,6 +407,21 @@ void PlanStageSlots::forEachSlot( } } +void PlanStageSlots::clearNonRequiredSlots(const PlanStageReqs& reqs) { + auto it = _slots.begin(); + while (it != _slots.end()) { + auto& name = it->first; + auto reqIt = reqs._slots.find(name); + // We never clear kResult, regardless of whether it is required by 'reqs'. + if ((reqIt != reqs._slots.end() && reqIt->second) || + (name.first == kResult.first && name.second == kResult.second)) { + ++it; + } else { + _slots.erase(it++); + } + } +} + using InputParamToSlotMap = stdx::unordered_map<MatchExpression::InputParamId, sbe::value::SlotId>; using VariableIdToSlotMap = stdx::unordered_map<Variables::Id, sbe::value::SlotId>; @@ -443,13 +550,13 @@ private: */ class SlotBasedStageBuilder final : public StageBuilder<sbe::PlanStage> { public: - static constexpr StringData kResult = PlanStageSlots::kResult; - static constexpr StringData kRecordId = PlanStageSlots::kRecordId; - static constexpr StringData kReturnKey = PlanStageSlots::kReturnKey; - static constexpr StringData kSnapshotId = PlanStageSlots::kSnapshotId; - static constexpr StringData kIndexId = PlanStageSlots::kIndexId; - static constexpr StringData kIndexKey = PlanStageSlots::kIndexKey; - static constexpr StringData kIndexKeyPattern = PlanStageSlots::kIndexKeyPattern; + static constexpr auto kResult = PlanStageSlots::kResult; + static constexpr auto kRecordId = PlanStageSlots::kRecordId; + static constexpr auto kReturnKey = PlanStageSlots::kReturnKey; + static constexpr auto kSnapshotId = PlanStageSlots::kSnapshotId; + static constexpr auto kIndexId = PlanStageSlots::kIndexId; + static constexpr auto kIndexKey = PlanStageSlots::kIndexKey; + static constexpr auto kIndexKeyPattern = PlanStageSlots::kIndexKeyPattern; SlotBasedStageBuilder(OperationContext* opCtx, const MultipleCollectionAccessor& collections, @@ -457,6 +564,12 @@ public: const QuerySolution& solution, PlanYieldPolicySBE* yieldPolicy); + /** + * This method will build an SBE PlanStage tree for QuerySolutionNode 'root' and its + * descendents. + * + * This method is a wrapper around 'build(const QuerySolutionNode*, const PlanStageReqs&)'. + */ std::unique_ptr<sbe::PlanStage> build(const QuerySolutionNode* root) final; PlanStageData getPlanStageData() { @@ -464,6 +577,14 @@ public: } private: + /** + * This method will build an SBE PlanStage tree for QuerySolutionNode 'root' and its + * descendents. + * + * Based on the type of 'root', this method will dispatch to the appropriate buildXXX() method. + * This method will also handle generating calls to getField() to satisfy kField reqs that were + * not satisfied by the buildXXX() method. + */ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> build(const QuerySolutionNode* node, const PlanStageReqs& reqs); @@ -494,7 +615,7 @@ private: std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildSortCovered( const QuerySolutionNode* root, const PlanStageReqs& reqs); - std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildSortKeyGeneraror( + std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildSortKeyGenerator( const QuerySolutionNode* root, const PlanStageReqs& reqs); std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildSortMerge( @@ -539,19 +660,14 @@ private: const QuerySolutionNode* root, const PlanStageReqs& reqs); /** - * Constructs an optimized SBE plan for 'filterNode' in the case that the fields of the - * 'shardKeyPattern' are provided by 'childIxscan'. In this case, the SBE plan for the child + * Constructs an optimized SBE plan for 'root' in the case that the fields of the shard key + * pattern are provided by the child index scan. In this case, the SBE plan for the child * index scan node will fill out slots for the necessary components of the index key. These * slots can be read directly in order to determine the shard key that should be passed to the * 'shardFiltererSlot'. */ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildShardFilterCovered( - const ShardingFilterNode* filterNode, - sbe::value::SlotId shardFiltererSlot, - BSONObj shardKeyPattern, - BSONObj indexKeyPattern, - const QuerySolutionNode* child, - PlanStageReqs childReqs); + const QuerySolutionNode* root, const PlanStageReqs& reqs); std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> buildGroup( const QuerySolutionNode* root, const PlanStageReqs& reqs); diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp index c44c088ac87..34a6b8a00a6 100644 --- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp @@ -158,7 +158,7 @@ std::unique_ptr<sbe::PlanStage> buildResumeFromRecordIdSubtree( std::move(seekRecordIdExpression)); // Construct a 'seek' branch of the 'union'. If we're succeeded to reposition the cursor, - // the branch will output the 'seekSlot' to start the real scan from, otherwise it will + // the branch will output the 'seekSlot' to start the real scan from, otherwise it will // produce EOF. auto seekBranch = sbe::makeS<sbe::LoopJoinStage>(std::move(projStage), @@ -232,7 +232,7 @@ std::unique_ptr<sbe::PlanStage> buildResumeFromRecordIdSubtree( * Creates a collection scan sub-tree optimized for oplog scans. We can built an optimized scan * when any of the following scenarios apply: * - * 1. There is a predicted on the 'ts' field of the oplog collection. + * 1. There is a predicate on the 'ts' field of the oplog collection. * 1.1 If a lower bound on 'ts' is present, the collection scan will seek directly to the * RecordId of an oplog entry as close to this lower bound as possible without going higher. * 1.2 If the query is *only* a lower bound on 'ts' on a forward scan, every document in the @@ -248,6 +248,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo StageBuilderState& state, const CollectionPtr& collection, const CollectionScanNode* csn, + const std::vector<std::string>& fields, PlanYieldPolicy* yieldPolicy, bool isTailableResumeBranch) { invariant(collection->ns().isOplog()); @@ -258,6 +259,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo // Oplog scan optimizations can only be done for a forward scan. invariant(csn->direction == CollectionScanParams::FORWARD); + auto fieldSlots = state.slotIdGenerator->generateMultiple(fields.size()); + auto resultSlot = state.slotId(); auto recordIdSlot = state.slotId(); @@ -290,9 +293,16 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo // of if we need to track the latest oplog timestamp. const auto shouldTrackLatestOplogTimestamp = (csn->maxRecord || csn->shouldTrackLatestOplogTimestamp); - auto&& [fields, slots, tsSlot] = makeOplogTimestampSlotsIfNeeded( + auto&& [scanFields, scanFieldSlots, tsSlot] = makeOplogTimestampSlotsIfNeeded( state.data->env, state.slotIdGenerator, shouldTrackLatestOplogTimestamp); + bool createScanWithAndWithoutFilter = (csn->filter && csn->stopApplyingFilterAfterFirstMatch); + + if (!createScanWithAndWithoutFilter) { + scanFields.insert(scanFields.end(), fields.begin(), fields.end()); + scanFieldSlots.insert(scanFieldSlots.end(), fieldSlots.begin(), fieldSlots.end()); + } + sbe::ScanCallbacks callbacks({}, {}, makeOpenCallbackIfNeeded(collection, csn)); auto stage = sbe::makeS<sbe::ScanStage>(collection->uuid(), resultSlot, @@ -302,8 +312,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo boost::none /* indexKeySlot */, boost::none /* keyPatternSlot */, tsSlot, - std::move(fields), - std::move(slots), + std::move(scanFields), + std::move(scanFieldSlots), seekRecordIdSlot, true /* forward */, yieldPolicy, @@ -342,7 +352,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo auto oObjSlot = state.slotId(); auto minTsSlot = state.slotId(); sbe::value::SlotVector minTsSlots = {minTsSlot, opTypeSlot, oObjSlot}; - std::vector<std::string> fields = {repl::OpTime::kTimestampFieldName.toString(), "op", "o"}; + std::vector<std::string> minTsFields = { + repl::OpTime::kTimestampFieldName.toString(), "op", "o"}; // If the first entry we see in the oplog is the replset initialization, then it doesn't // matter if its timestamp is later than the specified minTs; no events earlier than the @@ -375,7 +386,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo boost::none /* indexKeySlot */, boost::none /* keyPatternSlot */, boost::none /* oplogTsSlot*/, - std::move(fields), + std::move(minTsFields), minTsSlots, /* don't move this */ boost::none, true /* forward */, @@ -421,6 +432,18 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo tsSlot = state.slotId(); auto outputSlots = sbe::makeSV(resultSlot, recordIdSlot, *tsSlot); + if (!createScanWithAndWithoutFilter) { + auto unusedFieldSlots = state.slotIdGenerator->generateMultiple(fieldSlots.size()); + minTsSlots.insert(minTsSlots.end(), unusedFieldSlots.begin(), unusedFieldSlots.end()); + + realSlots.insert(realSlots.end(), fieldSlots.begin(), fieldSlots.end()); + + size_t numFieldSlots = fieldSlots.size(); + fieldSlots = state.slotIdGenerator->generateMultiple(numFieldSlots); + + outputSlots.insert(outputSlots.end(), fieldSlots.begin(), fieldSlots.end()); + } + // Create the union stage. The left branch, which runs first, is our resumability check. stage = sbe::makeS<sbe::UnionStage>( sbe::makeSs(std::move(minTsBranch), std::move(stage)), @@ -454,6 +477,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo relevantSlots.push_back(*tsSlot); } + relevantSlots.insert(relevantSlots.end(), fieldSlots.begin(), fieldSlots.end()); + auto [_, outputStage] = generateFilter(state, csn->filter.get(), {std::move(stage), std::move(relevantSlots)}, @@ -483,7 +508,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo // matches. This RecordId is then used as a starting point of the collection scan in the // inner branch, and the execution will continue from this point further on, without // applying the filter. - if (csn->stopApplyingFilterAfterFirstMatch) { + if (createScanWithAndWithoutFilter) { invariant(!csn->maxRecord); invariant(csn->direction == CollectionScanParams::FORWARD); @@ -491,9 +516,12 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo resultSlot = state.slotId(); recordIdSlot = state.slotId(); - std::tie(fields, slots, tsSlot) = makeOplogTimestampSlotsIfNeeded( + std::tie(scanFields, scanFieldSlots, tsSlot) = makeOplogTimestampSlotsIfNeeded( state.data->env, state.slotIdGenerator, shouldTrackLatestOplogTimestamp); + scanFields.insert(scanFields.end(), fields.begin(), fields.end()); + scanFieldSlots.insert(scanFieldSlots.end(), fieldSlots.begin(), fieldSlots.end()); + stage = sbe::makeS<sbe::LoopJoinStage>( sbe::makeS<sbe::LimitSkipStage>(std::move(stage), 1, boost::none, csn->nodeId()), sbe::makeS<sbe::ScanStage>(collection->uuid(), @@ -504,8 +532,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo boost::none /* indexKeySlot */, boost::none /* keyPatternSlot */, tsSlot, - std::move(fields), - std::move(slots), + std::move(scanFields), + std::move(scanFieldSlots), seekRecordIdSlot, true /* forward */, yieldPolicy, @@ -524,6 +552,9 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo PlanStageSlots outputs; outputs.set(PlanStageSlots::kResult, resultSlot); outputs.set(PlanStageSlots::kRecordId, recordIdSlot); + for (size_t i = 0; i < fields.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kField, fields[i]), fieldSlots[i]); + } return {std::move(stage), std::move(outputs)}; } @@ -540,6 +571,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc StageBuilderState& state, const CollectionPtr& collection, const CollectionScanNode* csn, + const std::vector<std::string>& fields, PlanYieldPolicy* yieldPolicy, bool isTailableResumeBranch) { const auto forward = csn->direction == CollectionScanParams::FORWARD; @@ -548,6 +580,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc invariant(!csn->resumeAfterRecordId || forward); invariant(!csn->resumeAfterRecordId || !csn->tailable); + auto fieldSlots = state.slotIdGenerator->generateMultiple(fields.size()); + auto resultSlot = state.slotId(); auto recordIdSlot = state.slotId(); auto [seekRecordIdSlot, seekRecordIdExpression] = @@ -563,9 +597,12 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc }(); // See if we need to project out an oplog latest timestamp. - auto&& [fields, slots, tsSlot] = makeOplogTimestampSlotsIfNeeded( + auto&& [scanFields, scanFieldSlots, tsSlot] = makeOplogTimestampSlotsIfNeeded( state.data->env, state.slotIdGenerator, csn->shouldTrackLatestOplogTimestamp); + scanFields.insert(scanFields.end(), fields.begin(), fields.end()); + scanFieldSlots.insert(scanFieldSlots.end(), fieldSlots.begin(), fieldSlots.end()); + sbe::ScanCallbacks callbacks({}, {}, makeOpenCallbackIfNeeded(collection, csn)); auto stage = sbe::makeS<sbe::ScanStage>(collection->uuid(), resultSlot, @@ -575,8 +612,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc boost::none /* indexKeySlot */, boost::none /* keyPatternSlot */, tsSlot, - std::move(fields), - std::move(slots), + std::move(scanFields), + std::move(scanFieldSlots), seekRecordIdSlot, forward, yieldPolicy, @@ -602,6 +639,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc invariant(!csn->stopApplyingFilterAfterFirstMatch); auto relevantSlots = sbe::makeSV(resultSlot, recordIdSlot); + relevantSlots.insert(relevantSlots.end(), fieldSlots.begin(), fieldSlots.end()); auto [_, outputStage] = generateFilter(state, csn->filter.get(), @@ -614,6 +652,9 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateGenericCollSc PlanStageSlots outputs; outputs.set(PlanStageSlots::kResult, resultSlot); outputs.set(PlanStageSlots::kRecordId, recordIdSlot); + for (size_t i = 0; i < fields.size(); ++i) { + outputs.set(std::make_pair(PlanStageSlots::kField, fields[i]), fieldSlots[i]); + } return {std::move(stage), std::move(outputs)}; } @@ -623,13 +664,15 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateCollScan( StageBuilderState& state, const CollectionPtr& collection, const CollectionScanNode* csn, + const std::vector<std::string>& fields, PlanYieldPolicy* yieldPolicy, bool isTailableResumeBranch) { if (csn->minRecord || csn->maxRecord || csn->stopApplyingFilterAfterFirstMatch) { return generateOptimizedOplogScan( - state, collection, csn, yieldPolicy, isTailableResumeBranch); + state, collection, csn, fields, yieldPolicy, isTailableResumeBranch); } else { - return generateGenericCollScan(state, collection, csn, yieldPolicy, isTailableResumeBranch); + return generateGenericCollScan( + state, collection, csn, fields, yieldPolicy, isTailableResumeBranch); } } } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.h b/src/mongo/db/query/sbe_stage_builder_coll_scan.h index b204c5487a8..3b6e1b861ae 100644 --- a/src/mongo/db/query/sbe_stage_builder_coll_scan.h +++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.h @@ -41,7 +41,10 @@ namespace mongo::stage_builder { class PlanStageSlots; /** - * Generates an SBE plan stage sub-tree implementing an collection scan. + * Generates an SBE plan stage sub-tree implementing an collection scan. 'fields' can be used to + * specify top-level fields that should be retrieved during the scan. For each name in 'fields', + * there will be a corresponding kField slot in the PlanStageSlots object returned with the same + * name. * * On success, a tuple containing the following data is returned: * * A slot to access a fetched document (a resultSlot) @@ -56,6 +59,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateCollScan( StageBuilderState& state, const CollectionPtr& collection, const CollectionScanNode* csn, + const std::vector<std::string>& fields, PlanYieldPolicy* yieldPolicy, bool isTailableResumeBranch); diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp index d96f7ea6a92..c1566970b6d 100644 --- a/src/mongo/db/query/sbe_stage_builder_expression.cpp +++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp @@ -76,8 +76,9 @@ struct ExpressionVisitorContext { ExpressionVisitorContext(StageBuilderState& state, EvalStage inputStage, boost::optional<sbe::value::SlotId> optionalRootSlot, - PlanNodeId planNodeId) - : state(state), optionalRootSlot(optionalRootSlot), planNodeId(planNodeId) { + PlanNodeId planNodeId, + const PlanStageSlots* slots = nullptr) + : state(state), optionalRootSlot(optionalRootSlot), planNodeId(planNodeId), slots(slots) { evalStack.emplaceFrame(std::move(inputStage)); } @@ -142,6 +143,9 @@ struct ExpressionVisitorContext { std::stack<VarsFrame> varsFrameStack; // The id of the QuerySolutionNode to which the expression we are converting to SBE is attached. const PlanNodeId planNodeId; + + const PlanStageSlots* slots = nullptr; + // This stack contains slot id for the current element variable of $filter expression. std::stack<sbe::value::SlotId> filterExprSlotIdStack; // We use this counter to track which children of $filter we've already processed. @@ -149,17 +153,23 @@ struct ExpressionVisitorContext { }; std::unique_ptr<sbe::EExpression> generateTraverseHelper( - const sbe::EVariable& inputVar, + std::unique_ptr<sbe::EExpression> inputExpr, const FieldPath& fp, size_t level, - sbe::value::FrameIdGenerator* frameIdGenerator) { + sbe::value::FrameIdGenerator* frameIdGenerator, + boost::optional<sbe::value::SlotId> topLevelFieldSlot = boost::none) { using namespace std::literals; invariant(level < fp.getPathLength()); + tassert(6023417, + "Expected an input expression or top level field", + inputExpr.get() || topLevelFieldSlot.has_value()); // Generate an expression to read a sub-field at the current nested level. auto fieldName = sbe::makeE<sbe::EConstant>(fp.getFieldName(level)); - auto fieldExpr = makeFunction("getField"_sd, inputVar.clone(), std::move(fieldName)); + auto fieldExpr = topLevelFieldSlot + ? makeVariable(*topLevelFieldSlot) + : makeFunction("getField"_sd, std::move(inputExpr), std::move(fieldName)); if (level == fp.getPathLength() - 1) { // For the last level, we can just return the field slot without the need for a @@ -171,7 +181,7 @@ std::unique_ptr<sbe::EExpression> generateTraverseHelper( auto lambdaFrameId = frameIdGenerator->generate(); auto lambdaParam = sbe::EVariable{lambdaFrameId, 0}; - auto resultExpr = generateTraverseHelper(lambdaParam, fp, level + 1, frameIdGenerator); + auto resultExpr = generateTraverseHelper(lambdaParam.clone(), fp, level + 1, frameIdGenerator); auto lambdaExpr = sbe::makeE<sbe::ELocalLambda>(lambdaFrameId, std::move(resultExpr)); @@ -186,28 +196,32 @@ std::unique_ptr<sbe::EExpression> generateTraverseHelper( * For the given MatchExpression 'expr', generates a path traversal SBE plan stage sub-tree * implementing the comparison expression. */ -std::unique_ptr<sbe::EExpression> generateTraverse(const sbe::EVariable& inputVar, - bool expectsDocumentInputOnly, - const FieldPath& fp, - sbe::value::FrameIdGenerator* frameIdGenerator) { +std::unique_ptr<sbe::EExpression> generateTraverse( + std::unique_ptr<sbe::EExpression> inputExpr, + bool expectsDocumentInputOnly, + const FieldPath& fp, + sbe::value::FrameIdGenerator* frameIdGenerator, + boost::optional<sbe::value::SlotId> topLevelFieldSlot = boost::none) { size_t level = 0; if (expectsDocumentInputOnly) { - // When we know for sure that 'inputVar' will be a document and _not_ an array (such as - // when traversing the root document), we can generate a simpler expression. - return generateTraverseHelper(inputVar, fp, level, frameIdGenerator); + // When we know for sure that 'inputExpr' will be a document and _not_ an array (such as + // when accessing a field on the root document), we can generate a simpler expression. + return generateTraverseHelper( + std::move(inputExpr), fp, level, frameIdGenerator, topLevelFieldSlot); } else { - // The general case: the value in the 'inputVar' may be an array that will require + tassert(6023418, "Expected an input expression", inputExpr.get()); + // The general case: the value in the 'inputExpr' may be an array that will require // traversal. auto lambdaFrameId = frameIdGenerator->generate(); auto lambdaParam = sbe::EVariable{lambdaFrameId, 0}; - auto resultExpr = generateTraverseHelper(lambdaParam, fp, level, frameIdGenerator); + auto resultExpr = generateTraverseHelper(lambdaParam.clone(), fp, level, frameIdGenerator); auto lambdaExpr = sbe::makeE<sbe::ELocalLambda>(lambdaFrameId, std::move(resultExpr)); return makeFunction("traverseP", - inputVar.clone(), + std::move(inputExpr), std::move(lambdaExpr), makeConstant(sbe::value::TypeTags::NumberInt32, 1)); } @@ -1951,28 +1965,44 @@ public: void visit(const ExpressionFieldPath* expr) final { // There's a chance that we've already generated a SBE plan stage tree for this field path, // in which case we avoid regeneration of the same plan stage tree. - if (auto it = _context->state.preGeneratedExprs.find(expr->getFieldPath().fullPath()); - it != _context->state.preGeneratedExprs.end()) { - tassert(6089301, - "Expressions for top-level document or a variable must not be pre-generated", - expr->getFieldPath().getPathLength() != 1 && !expr->isVariableReference()); - if (auto optionalSlot = it->second.getSlot(); optionalSlot) { - _context->pushExpr(*optionalSlot); - } else { - auto preGeneratedExpr = it->second.extractExpr(); - _context->pushExpr(preGeneratedExpr->clone()); - it->second = std::move(preGeneratedExpr); + if (!_context->state.preGeneratedExprs.empty()) { + if (auto it = _context->state.preGeneratedExprs.find(expr->getFieldPath().fullPath()); + it != _context->state.preGeneratedExprs.end()) { + tassert(6089301, + "Expressions for top-level documents / variables must not be pre-generated", + expr->getFieldPath().getPathLength() != 1 && !expr->isVariableReference()); + if (auto optionalSlot = it->second.getSlot(); optionalSlot) { + _context->pushExpr(*optionalSlot); + } else { + auto preGeneratedExpr = it->second.extractExpr(); + _context->pushExpr(preGeneratedExpr->clone()); + it->second = std::move(preGeneratedExpr); + } + return; } - return; } - tassert(6075901, "Must have a valid root slot", _context->optionalRootSlot.has_value()); + boost::optional<sbe::value::SlotId> slotId; + boost::optional<sbe::value::SlotId> topLevelFieldSlot; + boost::optional<FieldPath> fp; + bool expectsDocumentInputOnly = false; - sbe::value::SlotId slotId; + if (expr->getFieldPath().getPathLength() > 1) { + fp = expr->getFieldPathWithoutCurrentPrefix(); + } + + if (expr->getVariableId() == Variables::kRootId && + expr->getFieldPath().getPathLength() > 1) { + slotId = _context->optionalRootSlot; + expectsDocumentInputOnly = true; - if (!Variables::isUserDefinedVariable(expr->getVariableId())) { + if (_context->slots) { + auto topLevelField = std::make_pair(PlanStageSlots::kField, fp->front()); + topLevelFieldSlot = _context->slots->getIfExists(topLevelField); + } + } else if (!Variables::isUserDefinedVariable(expr->getVariableId())) { if (expr->getVariableId() == Variables::kRootId) { - slotId = *(_context->optionalRootSlot); + slotId = _context->optionalRootSlot; } else if (expr->getVariableId() == Variables::kRemoveId) { // For the field paths that begin with "$$REMOVE", we always produce Nothing, // so no traversal is necessary. @@ -1990,7 +2020,7 @@ public: << "Builtin variable '$$" << it->second << "' is not available", variableSlot.has_value()); - slotId = *variableSlot; + slotId = variableSlot; } } else { auto it = _context->environment.find(expr->getVariableId()); @@ -2002,19 +2032,27 @@ public: } if (expr->getFieldPath().getPathLength() == 1) { + tassert(6023419, "Must have a valid slot", slotId.has_value()); + // A solo variable reference (e.g.: "$$ROOT" or "$$myvar") that doesn't need any // traversal. - _context->pushExpr(slotId); + _context->pushExpr(*slotId); return; } - // Dereference a dotted path, which may contain arrays requiring implicit traversal. - const bool expectsDocumentInputOnly = slotId == *(_context->optionalRootSlot); + tassert(6023420, + "Must have a valid root slot or field slot", + slotId.has_value() || topLevelFieldSlot.has_value()); + + auto inputExpr = + slotId ? sbe::makeE<sbe::EVariable>(*slotId) : std::unique_ptr<sbe::EExpression>{}; - auto resultExpr = generateTraverse(sbe::EVariable{slotId}, + // Dereference a dotted path, which may contain arrays requiring implicit traversal. + auto resultExpr = generateTraverse(std::move(inputExpr), expectsDocumentInputOnly, - expr->getFieldPathWithoutCurrentPrefix(), - _context->state.frameIdGenerator); + *fp, + _context->state.frameIdGenerator, + topLevelFieldSlot); _context->pushExpr(std::move(resultExpr)); } @@ -4126,8 +4164,9 @@ EvalExprStagePair generateExpression(StageBuilderState& state, const Expression* expr, EvalStage stage, boost::optional<sbe::value::SlotId> optionalRootSlot, - PlanNodeId planNodeId) { - ExpressionVisitorContext context(state, std::move(stage), optionalRootSlot, planNodeId); + PlanNodeId planNodeId, + const PlanStageSlots* slots) { + ExpressionVisitorContext context(state, std::move(stage), optionalRootSlot, planNodeId, slots); ExpressionPreVisitor preVisitor{&context}; ExpressionInVisitor inVisitor{&context}; diff --git a/src/mongo/db/query/sbe_stage_builder_expression.h b/src/mongo/db/query/sbe_stage_builder_expression.h index 3d148113f89..78b4774f807 100644 --- a/src/mongo/db/query/sbe_stage_builder_expression.h +++ b/src/mongo/db/query/sbe_stage_builder_expression.h @@ -38,6 +38,8 @@ #include "mongo/db/query/sbe_stage_builder_helpers.h" namespace mongo::stage_builder { +class PlanStageSlots; + /** * Translates an input Expression into an SBE EExpression. The 'stage' parameter provides the input * subtree to build on top of. @@ -46,7 +48,8 @@ EvalExprStagePair generateExpression(StageBuilderState& state, const Expression* expr, EvalStage stage, boost::optional<sbe::value::SlotId> optionalRootSlot, - PlanNodeId planNodeId); + PlanNodeId planNodeId, + const PlanStageSlots* slots = nullptr); /** * Generate an EExpression that converts a value (contained in a variable bound to 'branchRef') that diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.cpp b/src/mongo/db/query/sbe_stage_builder_helpers.cpp index affc05691b1..c8acb608a42 100644 --- a/src/mongo/db/query/sbe_stage_builder_helpers.cpp +++ b/src/mongo/db/query/sbe_stage_builder_helpers.cpp @@ -958,27 +958,25 @@ bool indexKeyConsistencyCheckCallback(OperationContext* opCtx, return true; } -std::tuple<sbe::value::SlotId, sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> -makeLoopJoinForFetch(std::unique_ptr<sbe::PlanStage> inputStage, - sbe::value::SlotId seekKeySlot, - sbe::value::SlotId snapshotIdSlot, - sbe::value::SlotId indexIdSlot, - sbe::value::SlotId indexKeySlot, - sbe::value::SlotId indexKeyPatternSlot, - const CollectionPtr& collToFetch, - StringMap<const IndexAccessMethod*> iamMap, - PlanNodeId planNodeId, - sbe::value::SlotVector slotsToForward, - sbe::value::SlotIdGenerator& slotIdGenerator) { +std::unique_ptr<sbe::PlanStage> makeLoopJoinForFetch(std::unique_ptr<sbe::PlanStage> inputStage, + sbe::value::SlotId resultSlot, + sbe::value::SlotId recordIdSlot, + std::vector<std::string> fields, + sbe::value::SlotVector fieldSlots, + sbe::value::SlotId seekKeySlot, + sbe::value::SlotId snapshotIdSlot, + sbe::value::SlotId indexIdSlot, + sbe::value::SlotId indexKeySlot, + sbe::value::SlotId indexKeyPatternSlot, + const CollectionPtr& collToFetch, + StringMap<const IndexAccessMethod*> iamMap, + PlanNodeId planNodeId, + sbe::value::SlotVector slotsToForward) { // It is assumed that we are generating a fetch loop join over the main collection. If we are // generating a fetch over a secondary collection, it is the responsibility of a parent node // in the QSN tree to indicate which collection we are fetching over. tassert(6355301, "Cannot fetch from a collection that doesn't exist", collToFetch); - auto resultSlot = slotIdGenerator.generate(); - auto recordIdSlot = slotIdGenerator.generate(); - - using namespace std::placeholders; sbe::ScanCallbacks callbacks( indexKeyCorruptionCheckCallback, [=](auto&& arg1, auto&& arg2, auto&& arg3, auto&& arg4, auto&& arg5, auto&& arg6) { @@ -995,8 +993,8 @@ makeLoopJoinForFetch(std::unique_ptr<sbe::PlanStage> inputStage, indexKeySlot, indexKeyPatternSlot, boost::none, - std::vector<std::string>{}, - sbe::makeSV(), + std::move(fields), + std::move(fieldSlots), seekKeySlot, true, nullptr, @@ -1005,15 +1003,13 @@ makeLoopJoinForFetch(std::unique_ptr<sbe::PlanStage> inputStage, // Get the recordIdSlot from the outer side (e.g., IXSCAN) and feed it to the inner side, // limiting the result set to 1 row. - auto stage = sbe::makeS<sbe::LoopJoinStage>( + return sbe::makeS<sbe::LoopJoinStage>( std::move(inputStage), sbe::makeS<sbe::LimitSkipStage>(std::move(scanStage), 1, boost::none, planNodeId), std::move(slotsToForward), sbe::makeSV(seekKeySlot, snapshotIdSlot, indexIdSlot, indexKeySlot, indexKeyPatternSlot), nullptr, planNodeId); - - return {resultSlot, recordIdSlot, std::move(stage)}; } sbe::value::SlotId StageBuilderState::registerInputParamSlot( @@ -1121,4 +1117,172 @@ std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage return sbe::makeProjectStage(std::move(stage), nodeId, resultSlot, std::move(keyExpr)); } +/** + * For covered projections, each of the projection field paths represent respective index key. To + * rehydrate index keys into the result object, we first need to convert projection AST into + * 'IndexKeyPatternTreeNode' structure. Context structure and visitors below are used for this + * purpose. + */ +struct IndexKeysBuilderContext { + // Contains resulting tree of index keys converted from projection AST. + IndexKeyPatternTreeNode root; + + // Full field path of the currently visited projection node. + std::vector<StringData> currentFieldPath; + + // Each projection node has a vector of field names. This stack contains indexes of the + // currently visited field names for each of the projection nodes. + std::vector<size_t> currentFieldIndex; +}; + +/** + * Covered projections are always inclusion-only, so we ban all other operators. + */ +class IndexKeysBuilder : public projection_ast::ProjectionASTConstVisitor { +public: + using projection_ast::ProjectionASTConstVisitor::visit; + + IndexKeysBuilder(IndexKeysBuilderContext* context) : _context{context} {} + + void visit(const projection_ast::ProjectionPositionalASTNode* node) final { + tasserted(5474501, "Positional projection is not allowed in simple or covered projections"); + } + + void visit(const projection_ast::ProjectionSliceASTNode* node) final { + tasserted(5474502, "$slice is not allowed in simple or covered projections"); + } + + void visit(const projection_ast::ProjectionElemMatchASTNode* node) final { + tasserted(5474503, "$elemMatch is not allowed in simple or covered projections"); + } + + void visit(const projection_ast::ExpressionASTNode* node) final { + tasserted(5474504, "Expressions are not allowed in simple or covered projections"); + } + + void visit(const projection_ast::MatchExpressionASTNode* node) final { + tasserted( + 5474505, + "$elemMatch / positional projection are not allowed in simple or covered projections"); + } + + void visit(const projection_ast::BooleanConstantASTNode* node) override {} + +protected: + IndexKeysBuilderContext* _context; +}; + +class IndexKeysPreBuilder final : public IndexKeysBuilder { +public: + using IndexKeysBuilder::IndexKeysBuilder; + using IndexKeysBuilder::visit; + + void visit(const projection_ast::ProjectionPathASTNode* node) final { + _context->currentFieldIndex.push_back(0); + _context->currentFieldPath.emplace_back(node->fieldNames().front()); + } +}; + +class IndexKeysInBuilder final : public IndexKeysBuilder { +public: + using IndexKeysBuilder::IndexKeysBuilder; + using IndexKeysBuilder::visit; + + void visit(const projection_ast::ProjectionPathASTNode* node) final { + auto& currentIndex = _context->currentFieldIndex.back(); + currentIndex++; + _context->currentFieldPath.back() = node->fieldNames()[currentIndex]; + } +}; + +class IndexKeysPostBuilder final : public IndexKeysBuilder { +public: + using IndexKeysBuilder::IndexKeysBuilder; + using IndexKeysBuilder::visit; + + void visit(const projection_ast::ProjectionPathASTNode* node) final { + _context->currentFieldIndex.pop_back(); + _context->currentFieldPath.pop_back(); + } + + void visit(const projection_ast::BooleanConstantASTNode* constantNode) final { + if (!constantNode->value()) { + // Even though only inclusion is allowed in covered projection, there still can be + // {_id: 0} component. We do not need to generate any nodes for it. + return; + } + + // Insert current field path into the index keys tree if it does not exist yet. + auto* node = &_context->root; + for (const auto& part : _context->currentFieldPath) { + if (auto it = node->children.find(part); it != node->children.end()) { + node = it->second.get(); + } else { + node = node->emplace(part); + } + } + } +}; + +IndexKeyPatternTreeNode buildPatternTree(const projection_ast::Projection& projection) { + IndexKeysBuilderContext context; + IndexKeysPreBuilder preVisitor{&context}; + IndexKeysInBuilder inVisitor{&context}; + IndexKeysPostBuilder postVisitor{&context}; + + projection_ast::ProjectionASTConstWalker walker{&preVisitor, &inVisitor, &postVisitor}; + + tree_walker::walk<true, projection_ast::ASTNode>(projection.root(), &walker); + + return std::move(context.root); +} + +std::pair<std::unique_ptr<sbe::PlanStage>, sbe::value::SlotVector> projectTopLevelFields( + std::unique_ptr<sbe::PlanStage> stage, + const std::vector<std::string>& fields, + sbe::value::SlotId resultSlot, + PlanNodeId nodeId, + sbe::value::SlotIdGenerator* slotIdGenerator) { + // 'outputSlots' will match the order of 'fields'. + sbe::value::SlotVector outputSlots; + outputSlots.reserve(fields.size()); + + sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> projects; + for (size_t i = 0; i < fields.size(); ++i) { + const auto& field = fields[i]; + auto slot = slotIdGenerator->generate(); + auto getFieldExpr = + makeFunction("getField"_sd, makeVariable(resultSlot), makeConstant(field)); + projects.insert({slot, std::move(getFieldExpr)}); + outputSlots.emplace_back(slot); + } + + if (!projects.empty()) { + stage = sbe::makeS<sbe::ProjectStage>(std::move(stage), std::move(projects), nodeId); + } + + return {std::move(stage), std::move(outputSlots)}; +} + +std::pair<std::unique_ptr<sbe::PlanStage>, sbe::value::SlotVector> projectNothingToSlots( + std::unique_ptr<sbe::PlanStage> stage, + size_t n, + PlanNodeId nodeId, + sbe::value::SlotIdGenerator* slotIdGenerator) { + if (n == 0) { + return {std::move(stage), sbe::value::SlotVector{}}; + } + + auto outputSlots = slotIdGenerator->generateMultiple(n); + + sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> projects; + for (size_t i = 0; i < n; ++i) { + projects.insert( + {outputSlots[i], sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Nothing, 0)}); + } + + stage = sbe::makeS<sbe::ProjectStage>(std::move(stage), std::move(projects), nodeId); + + return {std::move(stage), std::move(outputSlots)}; +} } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.h b/src/mongo/db/query/sbe_stage_builder_helpers.h index f3b5d9ca455..634005c504c 100644 --- a/src/mongo/db/query/sbe_stage_builder_helpers.h +++ b/src/mongo/db/query/sbe_stage_builder_helpers.h @@ -44,6 +44,10 @@ #include "mongo/db/query/sbe_stage_builder_eval_frame.h" #include "mongo/db/query/stage_types.h" +namespace mongo::projection_ast { +class Projection; +} + namespace mongo::stage_builder { std::unique_ptr<sbe::EExpression> makeUnaryOp(sbe::EPrimUnary::Op unaryOp, @@ -524,19 +528,20 @@ std::unique_ptr<sbe::EExpression> makeLocalBind(sbe::value::FrameIdGenerator* fr return sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(innerExpr)); } - -std::tuple<sbe::value::SlotId, sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> -makeLoopJoinForFetch(std::unique_ptr<sbe::PlanStage> inputStage, - sbe::value::SlotId seekKeySlot, - sbe::value::SlotId snapshotIdSlot, - sbe::value::SlotId indexIdSlot, - sbe::value::SlotId indexKeySlot, - sbe::value::SlotId indexKeyPatternSlot, - const CollectionPtr& collToFetch, - StringMap<const IndexAccessMethod*> iamMap, - PlanNodeId planNodeId, - sbe::value::SlotVector slotsToForward, - sbe::value::SlotIdGenerator& slotIdGenerator); +std::unique_ptr<sbe::PlanStage> makeLoopJoinForFetch(std::unique_ptr<sbe::PlanStage> inputStage, + sbe::value::SlotId resultSlot, + sbe::value::SlotId recordIdSlot, + std::vector<std::string> fields, + sbe::value::SlotVector fieldSlots, + sbe::value::SlotId seekKeySlot, + sbe::value::SlotId snapshotIdSlot, + sbe::value::SlotId indexIdSlot, + sbe::value::SlotId indexKeySlot, + sbe::value::SlotId indexKeyPatternSlot, + const CollectionPtr& collToFetch, + StringMap<const IndexAccessMethod*> iamMap, + PlanNodeId planNodeId, + sbe::value::SlotVector slotsToForward); /** * Trees generated by 'generateFilter' maintain state during execution. There are two types of state @@ -1010,4 +1015,60 @@ std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage const sbe::value::SlotVector& indexKeySlots, sbe::value::SlotId resultSlot); +IndexKeyPatternTreeNode buildPatternTree(const projection_ast::Projection& projection); + +/** + * This method retrieves the values of the specified top-level fields ('fields') from 'resultSlot' + * and stores the values into slots. + * + * This method returns a pair containing: (1) the updated SBE plan stage tree and; (2) a vector of + * the slots ('slots') containing the field values. + * + * The order of slots in 'slots' will match the order of fields in 'fields'. + */ +std::pair<std::unique_ptr<sbe::PlanStage>, sbe::value::SlotVector> projectTopLevelFields( + std::unique_ptr<sbe::PlanStage> stage, + const std::vector<std::string>& fields, + sbe::value::SlotId resultSlot, + PlanNodeId nodeId, + sbe::value::SlotIdGenerator* slotIdGenerator); + +/** + * This method projects the constant value Nothing to multiple slots (the specific number of slots + * being specified by parameter 'n'). + * + * This method returns a pair containing: (1) the updated SBE plan stage tree and; (2) a vector of + * slots ('slots') containing Nothing. + * + * The number of slots in 'slots' will always be equal to parameter 'n'. + */ +std::pair<std::unique_ptr<sbe::PlanStage>, sbe::value::SlotVector> projectNothingToSlots( + std::unique_ptr<sbe::PlanStage> stage, + size_t n, + PlanNodeId nodeId, + sbe::value::SlotIdGenerator* slotIdGenerator); + +template <typename T, typename FuncT> +std::vector<T> filterVector(std::vector<T> vec, FuncT fn) { + std::vector<T> result; + std::copy_if(std::make_move_iterator(vec.begin()), + std::make_move_iterator(vec.end()), + std::back_inserter(result), + fn); + return result; +} + +template <typename T, typename FuncT> +std::pair<std::vector<T>, std::vector<T>> splitVector(std::vector<T> vec, FuncT fn) { + std::pair<std::vector<T>, std::vector<T>> result; + for (size_t i = 0; i < vec.size(); ++i) { + if (fn(vec[i])) { + result.first.emplace_back(std::move(vec[i])); + } else { + result.second.emplace_back(std::move(vec[i])); + } + } + return result; +} + } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp index 51d37ceccf4..bfc52d055c5 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp @@ -696,7 +696,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( PlanYieldPolicy* yieldPolicy, StringMap<const IndexAccessMethod*>* iamMap, bool needsCorruptionCheck) { - auto indexName = ixn->index.identifier.catalogName; auto descriptor = collection->getIndexCatalog()->findIndexByName(state.opCtx, indexName); tassert(5483200, @@ -855,8 +854,16 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( stage = outputStage.extractStage(ixn->nodeId()); } - outputs.setIndexKeySlots(makeIndexKeyOutputSlotsMatchingParentReqs( - ixn->index.keyPattern, originalIndexKeyBitset, indexKeyBitset, indexKeySlots)); + size_t i = 0; + size_t slotIdx = 0; + for (const auto& elt : ixn->index.keyPattern) { + StringData name = elt.fieldNameStringData(); + if (indexKeyBitset.test(i)) { + outputs.set(std::make_pair(PlanStageSlots::kKey, name), indexKeySlots[slotIdx]); + ++slotIdx; + } + ++i; + } return {std::move(stage), std::move(outputs)}; } @@ -981,8 +988,9 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWith // Whenever possible we should prefer building simplified single interval index scan plans in // order to get the best performance. if (canGenerateSingleIntervalIndexScan(ixn->iets)) { - auto makeSlot = [&](const bool cond, - const StringData slotKey) -> boost::optional<sbe::value::SlotId> { + auto makeSlot = + [&](const bool cond, + const PlanStageSlots::Name slotKey) -> boost::optional<sbe::value::SlotId> { if (!cond) return boost::none; @@ -1024,10 +1032,9 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWith auto optimizedIndexScanSlots = optimizedIndexKeySlots; auto branchOutputSlots = outputIndexKeySlots; - auto makeSlotsForThenElseBranches = - [&](const bool cond, - const StringData slotKey) -> std::tuple<boost::optional<sbe::value::SlotId>, - boost::optional<sbe::value::SlotId>> { + auto makeSlotsForThenElseBranches = [&](const bool cond, const PlanStageSlots::Name slotKey) + -> std::tuple<boost::optional<sbe::value::SlotId>, + boost::optional<sbe::value::SlotId>> { if (!cond) return {boost::none, boost::none}; @@ -1145,9 +1152,6 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWith stage = outputStage.extractStage(ixn->nodeId()); } - outputs.setIndexKeySlots(makeIndexKeyOutputSlotsMatchingParentReqs( - ixn->index.keyPattern, originalIndexKeyBitset, indexKeyBitset, outputIndexKeySlots)); - state.data->indexBoundsEvaluationInfos.emplace_back( IndexBoundsEvaluationInfo{ixn->index, accessMethod->getSortedDataInterface()->getKeyStringVersion(), @@ -1156,6 +1160,17 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWith std::move(ixn->iets), std::move(parameterizedScanSlots)}); + size_t i = 0; + size_t slotIdx = 0; + for (const auto& elt : ixn->index.keyPattern) { + StringData name = elt.fieldNameStringData(); + if (indexKeyBitset.test(i)) { + outputs.set(std::make_pair(PlanStageSlots::kKey, name), outputIndexKeySlots[slotIdx]); + ++slotIdx; + } + ++i; + } + return {std::move(stage), std::move(outputs)}; } } // namespace mongo::stage_builder diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.h b/src/mongo/db/query/sbe_stage_builder_index_scan.h index 29949cc1169..cded81563ac 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.h +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.h @@ -48,14 +48,10 @@ using IndexIntervals = std::vector<std::pair<std::unique_ptr<KeyString::Value>, std::unique_ptr<KeyString::Value>>>; /** - * This method generates an SBE plan stage tree implementing an index scan. It returns a tuple - * containing: (1) a slot produced by the index scan that holds the record ID ('recordIdSlot'); - * (2) a slot vector produced by the index scan which hold parts of the index key ('indexKeySlots'); - * and (3) the SBE plan stage tree. 'indexKeySlots' will only contain slots for the parts of the - * index key specified by the 'indexKeysToInclude' bitset. - * - * If the caller provides a slot ID for the 'returnKeySlot' parameter, this method will populate - * the specified slot with the rehydrated index key for each record. + * This method returns a pair containing: (1) an SBE plan stage tree implementing an index scan; + * and (2) a PlanStageSlots object containing a kRecordId slot, possibly some other kMeta slots, + * and slots produced by the index scan that correspond to parts of the index key specified by + * the 'indexKeyBitset' bitset. */ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( StageBuilderState& state, @@ -87,40 +83,12 @@ std::pair<sbe::value::TypeTags, sbe::value::Value> packIndexIntervalsInSbeArray( /** * Constructs a generic multi-interval index scan. Depending on the intervals will either execute - * the optimized or the generic index scan subplan. The generated subtree will have - * the following form: + * the optimized or the generic index scan subplan. * - * branch {isGenericScanSlot} [recordIdSlot, resultSlot, ...] - * then - * filter {isRecordId(resultSlot)} - * lspool sp1 [resultSlot] {!isRecordId(resultSlot)} - * union [resultSlot] - project [startKeySlot = anchorSlot, unusedVarSlot0 = Nothing, ...] - * limit 1 - * coscan - * [checkBoundsSlot] - * nlj [] [seekKeySlot] - * left - * sspool sp1 [seekKeySlot] - * right - * chkbounds resultSlot recordIdSlot checkBoundsSlot - * nlj [] [lowKeySlot] - * left - * project [lowKeySlot = seekKeySlot] - * limit 1 - * coscan - * right - * ixseek lowKeySlot resultSlot recordIdSlot [] @coll @index - * else - * nlj [] [lowKeySlot, highKeySlot] - * left - * project [lowKeySlot = getField (unwindSlot, "l"), - * highKeySlot = getField (unwindSlot, "h")] - * unwind unwindSlot indexSlot boundsSlot false - * limit 1 - * coscan - * right - * ixseek lowKeySlot highKeySlot recordIdSlot [] @coll @index + * This method returns a pair containing: (1) an SBE plan stage tree implementing a generic multi- + * interval index scan; and (2) a PlanStageSlots object containing a kRecordId slot, possibly some + * other kMeta slots, and slots produced by the index scan that correspond to parts of the index + * key specified by the 'indexKeyBitset' bitset. */ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScanWithDynamicBounds( StageBuilderState& state, diff --git a/src/mongo/db/query/sbe_stage_builder_lookup.cpp b/src/mongo/db/query/sbe_stage_builder_lookup.cpp index 775090c9cdd..b4c431f26f5 100644 --- a/src/mongo/db/query/sbe_stage_builder_lookup.cpp +++ b/src/mongo/db/query/sbe_stage_builder_lookup.cpp @@ -920,17 +920,21 @@ std::pair<SlotId, std::unique_ptr<sbe::PlanStage>> buildIndexJoinLookupStage( // stored in 'foreignRecordSlot'. We also pass in 'snapshotIdSlot', 'indexIdSlot', // 'indexKeySlot' and 'indexKeyPatternSlot' to perform index consistency check during the // seek. - auto [foreignRecordSlot, __, scanNljStage] = makeLoopJoinForFetch(std::move(ixScanNljStage), - foreignRecordIdSlot, - snapshotIdSlot, - indexIdSlot, - indexKeySlot, - indexKeyPatternSlot, - foreignColl, - iamMap, - nodeId, - makeSV() /* slotsToForward */, - slotIdGenerator); + auto foreignRecordSlot = slotIdGenerator.generate(); + auto scanNljStage = makeLoopJoinForFetch(std::move(ixScanNljStage), + foreignRecordSlot, + slotIdGenerator.generate() /* unused recordId slot */, + std::vector<std::string>{}, + makeSV(), + foreignRecordIdSlot, + snapshotIdSlot, + indexIdSlot, + indexKeySlot, + indexKeyPatternSlot, + foreignColl, + iamMap, + nodeId, + makeSV() /* slotsToForward */); // 'buildForeignMatches()' filters the foreign records, returned by the index scan, to match // those in 'localKeysSetSlot'. This is necessary because some values are encoded with the same diff --git a/src/mongo/util/pair_map.h b/src/mongo/util/pair_map.h new file mode 100644 index 00000000000..6271fb9ae6d --- /dev/null +++ b/src/mongo/util/pair_map.h @@ -0,0 +1,204 @@ +/** + * Copyright (C) 2022-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <absl/container/flat_hash_map.h> +#include <absl/container/flat_hash_set.h> + +#include "mongo/base/string_data.h" +#include "mongo/util/assert_util.h" + +namespace mongo { + +template <typename T> +class PairMapTraits { +public: + using OwnedType = T; + using ViewType = T; + using HashableType = T; + + static const T& toOwned(const T& t) { + return t; + } + static const T& toView(const T& t) { + return t; + } + static const T& toHashable(const T& t) { + return t; + } +}; + +template <> +class PairMapTraits<StringData> { +public: + using OwnedType = std::string; + using ViewType = StringData; + using HashableType = absl::string_view; + + static std::string toOwned(const StringData& t) { + return t.toString(); + } + static const StringData& toView(const StringData& t) { + return t; + } + static absl::string_view toHashable(const StringData& t) { + // Use the default absl string hasher. + return absl::string_view(t.rawData(), t.size()); + } +}; + +template <> +class PairMapTraits<std::string> { +public: + using OwnedType = std::string; + using ViewType = StringData; + using HashableType = absl::string_view; + + static const std::string& toOwned(const std::string& t) { + return t; + } + static StringData toView(const std::string& t) { + return StringData(t); + } + static absl::string_view toHashable(const std::string& t) { + // Use the default absl string hasher. + return absl::string_view(t.data(), t.size()); + } +}; + +/** + * Type that bundles a hashed key with the actual pair value so that hashing can be performed + * outside of insert() call. This is needed to facilitate heterogeneous lookup. + */ +template <typename T1, typename T2> +class PairMapHashedKey { +public: + using OwnedType1 = typename PairMapTraits<T1>::OwnedType; + using OwnedType2 = typename PairMapTraits<T2>::OwnedType; + using OwnedPairType = std::pair<OwnedType1, OwnedType2>; + using ViewType1 = typename PairMapTraits<T1>::ViewType; + using ViewType2 = typename PairMapTraits<T2>::ViewType; + using ViewPairType = std::pair<ViewType1, ViewType2>; + + explicit PairMapHashedKey(ViewPairType key, std::size_t hash) + : _key(std::move(key)), _hash(hash) {} + + explicit operator OwnedPairType() const { + return {PairMapTraits<ViewType1>::toOwned(_key.first), + PairMapTraits<ViewType2>::toOwned(_key.second)}; + } + + const ViewPairType& key() const { + return _key; + } + + std::size_t hash() const { + return _hash; + } + +private: + ViewPairType _key; + std::size_t _hash; +}; + +/** + * Hasher to support heterogeneous lookup. + */ +template <typename T1, typename T2> +class PairMapHasher { +public: + using OwnedType1 = typename PairMapTraits<T1>::OwnedType; + using OwnedType2 = typename PairMapTraits<T2>::OwnedType; + using OwnedPairType = std::pair<OwnedType1, OwnedType2>; + using ViewType1 = typename PairMapTraits<T1>::ViewType; + using ViewType2 = typename PairMapTraits<T2>::ViewType; + using ViewPairType = std::pair<ViewType1, ViewType2>; + using HashableType1 = typename PairMapTraits<T1>::HashableType; + using HashableType2 = typename PairMapTraits<T2>::HashableType; + using HashablePairType = std::pair<HashableType1, HashableType2>; + + // This using directive activates heterogeneous lookup in the hash table. + using is_transparent = void; + + std::size_t operator()(const ViewPairType& sd) const { + return absl::Hash<HashablePairType>{}( + std::make_pair(PairMapTraits<ViewType1>::toHashable(sd.first), + PairMapTraits<ViewType2>::toHashable(sd.second))); + } + + std::size_t operator()(const OwnedPairType& s) const { + return operator()(std::make_pair(PairMapTraits<OwnedType1>::toView(s.first), + PairMapTraits<OwnedType2>::toView(s.second))); + } + + std::size_t operator()(const PairMapHashedKey<T1, T2>& key) const { + return key.hash(); + } + + PairMapHashedKey<T1, T2> hashed_key(const ViewPairType& sd) const { + return PairMapHashedKey<T1, T2>(sd, operator()(sd)); + } +}; + +template <typename T1, typename T2> +class PairMapEq { +public: + using ViewType1 = typename PairMapTraits<T1>::ViewType; + using ViewType2 = typename PairMapTraits<T2>::ViewType; + using ViewPairType = std::pair<ViewType1, ViewType2>; + + // This using directive activates heterogeneous lookup in the hash table. + using is_transparent = void; + + bool operator()(const ViewPairType& lhs, const ViewPairType& rhs) const { + return lhs == rhs; + } + + bool operator()(const PairMapHashedKey<T1, T2>& lhs, const ViewPairType& rhs) const { + return lhs.key() == rhs; + } + + bool operator()(const ViewPairType& lhs, const PairMapHashedKey<T1, T2>& rhs) const { + return lhs == rhs.key(); + } + + bool operator()(const PairMapHashedKey<T1, T2>& lhs, + const PairMapHashedKey<T1, T2>& rhs) const { + return lhs.key() == rhs.key(); + } +}; + +template <typename K1, typename K2, typename V> +using PairMap = absl::flat_hash_map<std::pair<K1, K2>, V, PairMapHasher<K1, K2>, PairMapEq<K1, K2>>; + +template <typename K1, typename K2> +using PairSet = absl::flat_hash_set<std::pair<K1, K2>, PairMapHasher<K1, K2>, PairMapEq<K1, K2>>; + +} // namespace mongo |