diff options
author | Justin Seyster <justin.seyster@mongodb.com> | 2020-05-29 15:37:48 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-07-16 03:01:28 +0000 |
commit | b1ecda7953b2d9e23857ceeb09f10f9cd18c959b (patch) | |
tree | 75d5c0322e310bf4be8b0b59de6c95d7f2efda72 | |
parent | b348b91ffdfab32e5a1ec9e63c9def884adf1acb (diff) | |
download | mongo-b1ecda7953b2d9e23857ceeb09f10f9cd18c959b.tar.gz |
SERVER-48721 Support for $returnKey in SBE
-rw-r--r-- | src/mongo/db/exec/sbe/parser/parser.cpp | 69 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/parser/parser.h | 8 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/ix_scan.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/stages/ix_scan.h | 37 | ||||
-rw-r--r-- | src/mongo/db/exec/sbe/values/id_generators.h | 8 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 60 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.h | 11 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_index_scan.cpp | 139 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_index_scan.h | 3 |
9 files changed, 318 insertions, 63 deletions
diff --git a/src/mongo/db/exec/sbe/parser/parser.cpp b/src/mongo/db/exec/sbe/parser/parser.cpp index 546d26c8c5f..1e410bbb176 100644 --- a/src/mongo/db/exec/sbe/parser/parser.cpp +++ b/src/mongo/db/exec/sbe/parser/parser.cpp @@ -86,7 +86,7 @@ static constexpr auto kSyntax = R"( IXSCAN <- 'ixscan' IDENT? # optional variable name of the root object (record) delivered by the scan IDENT? # optional variable name of the record id delivered by the scan - IDENT_LIST_WITH_RENAMES # list of projected fields (may be empty) + IX_KEY_LIST_WITH_RENAMES # list of projected fields (may be empty) IDENT # collection name IDENT # index name to scan FORWARD_FLAG # forward scan or not @@ -95,7 +95,7 @@ static constexpr auto kSyntax = R"( IDENT # variable name of the high key IDENT? # optional variable name of the root object (record) delivered by the scan IDENT? # optional variable name of the record id delivered by the scan - IDENT_LIST_WITH_RENAMES # list of projected fields (may be empty) + IX_KEY_LIST_WITH_RENAMES # list of projected fields (may be empty) IDENT # collection name IDENT # index name to scan FORWARD_FLAG # forward scan or not @@ -207,6 +207,9 @@ static constexpr auto kSyntax = R"( IDENT_LIST_WITH_RENAMES <- '[' (IDENT_WITH_RENAME (',' IDENT_WITH_RENAME)*)? ']' IDENT_WITH_RENAME <- IDENT ('=' IDENT)? + IX_KEY_LIST_WITH_RENAMES <- '[' (IX_KEY_WITH_RENAME (',' IX_KEY_WITH_RENAME)*)? ']' + IX_KEY_WITH_RENAME <- IDENT '=' NUMBER + IDENT_LIST <- '[' (IDENT (',' IDENT)*)? ']' IDENT <- RAW_IDENT/ESC_IDENT @@ -223,6 +226,30 @@ static constexpr auto kSyntax = R"( %word <- [a-z]+ )"; +std::pair<IndexKeysInclusionSet, sbe::value::SlotVector> Parser::lookupIndexKeyRenames( + const std::vector<std::string>& renames, const std::vector<size_t>& indexKeys) { + IndexKeysInclusionSet indexKeysInclusion; + + // Each indexKey is associated with the parallel remap from the 'renames' vector. This + // map explicitly binds each indexKey with its remap and sorts them by 'indexKey' order. + stdx::unordered_map<int, sbe::value::SlotId> slotsByKeyIndex; + invariant(renames.size() == indexKeys.size()); + for (size_t idx = 0; idx < renames.size(); idx++) { + uassert(4872100, + str::stream() << "Cannot project index key at position " << indexKeys[idx], + indexKeys[idx] < indexKeysInclusion.size()); + slotsByKeyIndex[indexKeys[idx]] = lookupSlotStrict(renames[idx]); + } + + sbe::value::SlotVector slots; + for (auto&& [indexKey, slot] : slotsByKeyIndex) { + indexKeysInclusion.set(indexKey); + slots.push_back(slot); + } + + return {indexKeysInclusion, std::move(slots)}; +} + void Parser::walkChildren(AstQuery& ast) { for (const auto& node : ast.nodes) { walk(*node); @@ -271,6 +298,24 @@ void Parser::walkIdentListWithRename(AstQuery& ast) { } } +void Parser::walkIxKeyWithRename(AstQuery& ast) { + walkChildren(ast); + + ast.rename = ast.nodes[0]->identifier; + + // This token cannot be negative, because the parser does not accept a "-" prefix. + ast.indexKey = static_cast<size_t>(std::stoi(ast.nodes[1]->token)); +} + +void Parser::walkIxKeyListWithRename(AstQuery& ast) { + walkChildren(ast); + + for (auto& node : ast.nodes) { + ast.indexKeys.emplace_back(std::move(node->indexKey)); + ast.renames.emplace_back(std::move(node->rename)); + } +} + void Parser::walkProjectList(AstQuery& ast) { walkChildren(ast); @@ -625,13 +670,16 @@ void Parser::walkIndexScan(AstQuery& ast) { collection ? NamespaceStringOrUUID{dbName, collection->uuid()} : nssColl; const auto forward = (ast.nodes[forwardPos]->token == "true") ? true : false; + auto [indexKeysInclusion, vars] = + lookupIndexKeyRenames(ast.nodes[projectsPos]->renames, ast.nodes[projectsPos]->indexKeys); + ast.stage = makeS<IndexScanStage>(name, indexName, forward, lookupSlot(recordName), lookupSlot(recordIdName), - ast.nodes[projectsPos]->identifiers, - lookupSlots(ast.nodes[projectsPos]->renames), + indexKeysInclusion, + vars, boost::none, boost::none, nullptr, @@ -678,13 +726,16 @@ void Parser::walkIndexSeek(AstQuery& ast) { collection ? NamespaceStringOrUUID{dbName, collection->uuid()} : nssColl; const auto forward = (ast.nodes[forwardPos]->token == "true") ? true : false; + auto [indexKeysInclusion, vars] = + lookupIndexKeyRenames(ast.nodes[projectsPos]->renames, ast.nodes[projectsPos]->indexKeys); + ast.stage = makeS<IndexScanStage>(name, indexName, forward, lookupSlot(recordName), lookupSlot(recordIdName), - ast.nodes[projectsPos]->identifiers, - lookupSlots(ast.nodes[projectsPos]->renames), + indexKeysInclusion, + vars, lookupSlot(ast.nodes[0]->identifier), lookupSlot(ast.nodes[1]->identifier), nullptr, @@ -1339,6 +1390,12 @@ void Parser::walk(AstQuery& ast) { case "IDENT_LIST_WITH_RENAMES"_: walkIdentListWithRename(ast); break; + case "IX_KEY_WITH_RENAME"_: + walkIxKeyWithRename(ast); + break; + case "IX_KEY_LIST_WITH_RENAMES"_: + walkIxKeyListWithRename(ast); + break; case "PROJECT_LIST"_: walkProjectList(ast); break; diff --git a/src/mongo/db/exec/sbe/parser/parser.h b/src/mongo/db/exec/sbe/parser/parser.h index 9d17c7deb17..87e5b04be7f 100644 --- a/src/mongo/db/exec/sbe/parser/parser.h +++ b/src/mongo/db/exec/sbe/parser/parser.h @@ -33,6 +33,7 @@ #include <third_party/peglib/peglib.h> #include "mongo/db/exec/sbe/expressions/expression.h" +#include "mongo/db/exec/sbe/stages/ix_scan.h" #include "mongo/db/exec/sbe/stages/spool.h" #include "mongo/db/exec/sbe/stages/stages.h" #include "mongo/db/exec/sbe/values/id_generators.h" @@ -42,8 +43,10 @@ namespace mongo { namespace sbe { struct ParsedQueryTree { std::string identifier; + size_t indexKey; std::string rename; std::vector<std::string> identifiers; + std::vector<size_t> indexKeys; std::vector<std::string> renames; std::unique_ptr<PlanStage> stage; @@ -153,6 +156,9 @@ private: return result; } + std::pair<IndexKeysInclusionSet, sbe::value::SlotVector> lookupIndexKeyRenames( + const std::vector<std::string>& renames, const std::vector<size_t>& indexKeys); + SpoolId lookupSpoolBuffer(const std::string& name) { if (_spoolBuffersLookupTable.find(name) == _spoolBuffersLookupTable.end()) { _spoolBuffersLookupTable[name] = _spoolIdGenerator.generate(); @@ -165,6 +171,8 @@ private: void walkIdentList(AstQuery& ast); void walkIdentWithRename(AstQuery& ast); void walkIdentListWithRename(AstQuery& ast); + void walkIxKeyWithRename(AstQuery& ast); + void walkIxKeyListWithRename(AstQuery& ast); void walkProjectList(AstQuery& ast); void walkAssign(AstQuery& ast); diff --git a/src/mongo/db/exec/sbe/stages/ix_scan.cpp b/src/mongo/db/exec/sbe/stages/ix_scan.cpp index 9e3cdb21d44..f26dd1dbadb 100644 --- a/src/mongo/db/exec/sbe/stages/ix_scan.cpp +++ b/src/mongo/db/exec/sbe/stages/ix_scan.cpp @@ -33,6 +33,7 @@ #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/exec/sbe/expressions/expression.h" +#include "mongo/db/exec/sbe/values/bson.h" #include "mongo/db/index/index_access_method.h" namespace mongo::sbe { @@ -41,7 +42,7 @@ IndexScanStage::IndexScanStage(const NamespaceStringOrUUID& name, bool forward, boost::optional<value::SlotId> recordSlot, boost::optional<value::SlotId> recordIdSlot, - std::vector<std::string> fields, + IndexKeysInclusionSet indexKeysToInclude, value::SlotVector vars, boost::optional<value::SlotId> seekKeySlotLow, boost::optional<value::SlotId> seekKeySlotHi, @@ -53,15 +54,16 @@ IndexScanStage::IndexScanStage(const NamespaceStringOrUUID& name, _forward(forward), _recordSlot(recordSlot), _recordIdSlot(recordIdSlot), - _fields(std::move(fields)), + _indexKeysToInclude(indexKeysToInclude), _vars(std::move(vars)), _seekKeySlotLow(seekKeySlotLow), _seekKeySlotHi(seekKeySlotHi), _tracker(tracker) { - invariant(_fields.size() == _vars.size()); // The valid state is when both boundaries, or none is set, or only low key is set. invariant((_seekKeySlotLow && _seekKeySlotHi) || (!_seekKeySlotLow && !_seekKeySlotHi) || (_seekKeySlotLow && !_seekKeySlotHi)); + + invariant(_indexKeysToInclude.count() == _vars.size()); } std::unique_ptr<PlanStage> IndexScanStage::clone() const { @@ -70,7 +72,7 @@ std::unique_ptr<PlanStage> IndexScanStage::clone() const { _forward, _recordSlot, _recordIdSlot, - _fields, + _indexKeysToInclude, _vars, _seekKeySlotLow, _seekKeySlotHi, @@ -87,12 +89,10 @@ void IndexScanStage::prepare(CompileCtx& ctx) { _recordIdAccessor = std::make_unique<value::ViewOfValueAccessor>(); } - for (size_t idx = 0; idx < _fields.size(); ++idx) { - auto [it, inserted] = - _fieldAccessors.emplace(_fields[idx], std::make_unique<value::ViewOfValueAccessor>()); - uassert(4822821, str::stream() << "duplicate field: " << _fields[idx], inserted); - auto [itRename, insertedRename] = _varAccessors.emplace(_vars[idx], it->second.get()); - uassert(4822822, str::stream() << "duplicate field: " << _vars[idx], insertedRename); + _accessors.resize(_vars.size()); + for (size_t idx = 0; idx < _accessors.size(); ++idx) { + auto [it, inserted] = _accessorMap.emplace(_vars[idx], &_accessors[idx]); + uassert(4822821, str::stream() << "duplicate slot: " << _vars[idx], inserted); } if (_seekKeySlotLow) { @@ -112,7 +112,7 @@ value::SlotAccessor* IndexScanStage::getAccessor(CompileCtx& ctx, value::SlotId return _recordIdAccessor.get(); } - if (auto it = _varAccessors.find(slot); it != _varAccessors.end()) { + if (auto it = _accessorMap.find(slot); it != _accessorMap.end()) { return it->second; } @@ -206,6 +206,10 @@ void IndexScanStage::open(bool reOpen) { _seekKeyLow = &_startPoint; _seekKeyHi = nullptr; } + + // TODO SERVER-49385: When the 'prepare()' phase takes the collection lock, it will be + // possible to intialize '_ordering' there instead of here. + _ordering = entry->ordering(); } else { _cursor.reset(); } @@ -256,6 +260,12 @@ PlanState IndexScanStage::getNext() { value::bitcastFrom<int64_t>(_nextRecord->loc.repr())); } + if (_accessors.size()) { + _valuesBuffer.reset(); + readKeyStringValueIntoAccessors( + _nextRecord->keyString, *_ordering, &_valuesBuffer, &_accessors, _indexKeysToInclude); + } + if (_tracker && _tracker->trackProgress<TrialRunProgressTracker::kNumReads>(1)) { // If we're collecting execution stats during multi-planning and reached the end of the // trial period (trackProgress() will return 'true' in this case), then we can reset the @@ -308,14 +318,18 @@ std::vector<DebugPrinter::Block> IndexScanStage::debugPrint() const { } ret.emplace_back(DebugPrinter::Block("[`")); - for (size_t idx = 0; idx < _fields.size(); ++idx) { - if (idx) { + size_t varIndex = 0; + for (size_t keyIndex = 0; keyIndex < _indexKeysToInclude.size(); ++keyIndex) { + if (!_indexKeysToInclude[keyIndex]) { + continue; + } + if (varIndex) { ret.emplace_back(DebugPrinter::Block("`,")); } - - DebugPrinter::addIdentifier(ret, _vars[idx]); + invariant(varIndex < _vars.size()); + DebugPrinter::addIdentifier(ret, _vars[varIndex++]); ret.emplace_back("="); - DebugPrinter::addIdentifier(ret, _fields[idx]); + ret.emplace_back(std::to_string(keyIndex)); } ret.emplace_back(DebugPrinter::Block("`]")); diff --git a/src/mongo/db/exec/sbe/stages/ix_scan.h b/src/mongo/db/exec/sbe/stages/ix_scan.h index a3d23b2bb82..fcecc127ddd 100644 --- a/src/mongo/db/exec/sbe/stages/ix_scan.h +++ b/src/mongo/db/exec/sbe/stages/ix_scan.h @@ -29,6 +29,7 @@ #pragma once +#include "mongo/bson/ordering.h" #include "mongo/db/db_raii.h" #include "mongo/db/exec/sbe/stages/stages.h" #include "mongo/db/exec/trial_run_progress_tracker.h" @@ -36,6 +37,27 @@ #include "mongo/db/storage/sorted_data_interface.h" namespace mongo::sbe { + +/** + * A stage that iterates the entries of a collection index, starting from a bound specified by the + * value in 'seekKeySlotLow' and ending (via IS_EOF) with the 'seekKeySlotHi' bound. (An unspecified + * 'seekKeySlowHi' scans to the end of the index. Leaving both bounds unspecified scans the index + * from beginning to end.) + * + * The input 'seekKeySlotLow' and 'seekKeySlotHi' slots get read as part of the open (or re-open) + * call. A common use case for an IndexScanStage is to place it as the inner child of LoopJoinStage. + * The outer side of the LoopJoinStage determines the bounds, and the inner IndexScanStage iterates + * through all the entries within those bounds. + * + * The "output" slots are + * - 'recordSlot': the "KeyString" representing the index entry, + * - 'recordIdSlot': a reference that can be used to fetch the entire document, and + * - 'vars': one slot for each value in the index key that should be "projected" out of the entry. + * + * The 'indexKeysToInclude' bitset determines which values are included in the projection based + * on their order in the index pattern. The number of bits set in 'indexKeysToInclude' must be + * the same as the number of slots in the 'vars' SlotVector. + */ class IndexScanStage final : public PlanStage { public: IndexScanStage(const NamespaceStringOrUUID& name, @@ -43,7 +65,7 @@ public: bool forward, boost::optional<value::SlotId> recordSlot, boost::optional<value::SlotId> recordIdSlot, - std::vector<std::string> fields, + IndexKeysInclusionSet indexKeysToInclude, value::SlotVector vars, boost::optional<value::SlotId> seekKeySlotLow, boost::optional<value::SlotId> seekKeySlotHi, @@ -74,7 +96,7 @@ private: const bool _forward; const boost::optional<value::SlotId> _recordSlot; const boost::optional<value::SlotId> _recordIdSlot; - const std::vector<std::string> _fields; + const IndexKeysInclusionSet _indexKeysToInclude; const value::SlotVector _vars; const boost::optional<value::SlotId> _seekKeySlotLow; const boost::optional<value::SlotId> _seekKeySlotHi; @@ -82,8 +104,10 @@ private: std::unique_ptr<value::ViewOfValueAccessor> _recordAccessor; std::unique_ptr<value::ViewOfValueAccessor> _recordIdAccessor; - value::FieldAccessorMap _fieldAccessors; - value::SlotAccessorMap _varAccessors; + // One accessor and slot for each key component that this stage will bind from an index entry's + // KeyString. The accessors are in the same order as the key components they bind to. + std::vector<value::ViewOfValueAccessor> _accessors; + value::SlotAccessorMap _accessorMap; value::SlotAccessor* _seekKeyLowAccessor{nullptr}; value::SlotAccessor* _seekKeyHiAccessor{nullptr}; @@ -94,9 +118,14 @@ private: std::unique_ptr<SortedDataInterface::Cursor> _cursor; std::weak_ptr<const IndexCatalogEntry> _weakIndexCatalogEntry; + boost::optional<Ordering> _ordering{boost::none}; boost::optional<AutoGetCollectionForRead> _coll; boost::optional<KeyStringEntry> _nextRecord; + // This buffer stores values that are projected out of the index entry. Values in the + // '_accessors' list that are pointers point to data in this buffer. + BufBuilder _valuesBuffer; + bool _open{false}; bool _firstGetNext{true}; IndexScanStats _specificStats; diff --git a/src/mongo/db/exec/sbe/values/id_generators.h b/src/mongo/db/exec/sbe/values/id_generators.h index fc7fa93f840..4b3fae32cca 100644 --- a/src/mongo/db/exec/sbe/values/id_generators.h +++ b/src/mongo/db/exec/sbe/values/id_generators.h @@ -67,6 +67,14 @@ public: T generate() { return this->generateByIncrementing(); } + + auto generateMultiple(size_t num) { + std::vector<T> idVector(num); + for (T& id : idVector) { + id = generate(); + } + return idVector; + } }; using SlotIdGenerator = IdGenerator<value::SlotId>; diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index fcacfaa402e..a90935d7ccc 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -69,6 +69,13 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildCollScan( _data.oplogTsSlot = oplogTsSlot; _data.shouldTrackLatestOplogTimestamp = csn->shouldTrackLatestOplogTimestamp; _data.shouldTrackResumeToken = csn->requestResumeToken; + + if (_returnKeySlot) { + // Assign the '_returnKeySlot' to be the empty object. + stage = sbe::makeProjectStage( + std::move(stage), *_returnKeySlot, sbe::makeE<sbe::EFunction>("newObj", sbe::makeEs())); + } + return std::move(stage); } @@ -78,6 +85,7 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildIndexScan( auto [slot, stage] = generateIndexScan(_opCtx, _collection, ixn, + _returnKeySlot, &_slotIdGenerator, &_spoolIdGenerator, _yieldPolicy, @@ -87,7 +95,9 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildIndexScan( } std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::makeLoopJoinForFetch( - std::unique_ptr<sbe::PlanStage> inputStage, sbe::value::SlotId recordIdKeySlot) { + std::unique_ptr<sbe::PlanStage> inputStage, + sbe::value::SlotId recordIdKeySlot, + const sbe::value::SlotVector& slotsToForward) { _data.resultSlot = _slotIdGenerator.generate(); _data.recordIdSlot = _slotIdGenerator.generate(); @@ -108,7 +118,7 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::makeLoopJoinForFetch( return sbe::makeS<sbe::LoopJoinStage>( std::move(inputStage), sbe::makeS<sbe::LimitSkipStage>(std::move(scanStage), 1, boost::none), - sbe::makeSV(), + std::move(slotsToForward), sbe::makeSV(recordIdKeySlot), nullptr); } @@ -119,7 +129,10 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildFetch(const QuerySol uassert(4822880, "RecordId slot is not defined", _data.recordIdSlot); - auto stage = makeLoopJoinForFetch(std::move(inputStage), *_data.recordIdSlot); + auto stage = + makeLoopJoinForFetch(std::move(inputStage), + *_data.recordIdSlot, + _returnKeySlot ? sbe::makeSV(*_returnKeySlot) : sbe::makeSV()); if (fn->filter) { stage = generateFilter( @@ -234,6 +247,11 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildSort(const QuerySolu values.push_back(*_data.oplogTsSlot); } + // The '_returnKeySlot' likewise needs to be visible at the root stage. + if (_returnKeySlot) { + values.push_back(*_returnKeySlot); + } + return sbe::makeS<sbe::SortStage>(std::move(inputStage), std::move(orderBy), std::move(direction), @@ -375,6 +393,8 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildText(const QuerySolu forward, makeKeyString(startKeyBson), makeKeyString(endKeyBson), + sbe::IndexKeysInclusionSet{}, + sbe::makeSV(), recordSlot, &_slotIdGenerator, _yieldPolicy, @@ -407,8 +427,35 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildText(const QuerySolu std::move(nljStage), ftsQuery, ftsSpec, *_data.resultSlot, textMatchResultSlot); // Filter based on the contents of the slot filled out by the TextMatchStage. - return sbe::makeS<sbe::FilterStage<false>>(std::move(textMatchStage), - sbe::makeE<sbe::EVariable>(textMatchResultSlot)); + auto filteredStage = sbe::makeS<sbe::FilterStage<false>>( + std::move(textMatchStage), sbe::makeE<sbe::EVariable>(textMatchResultSlot)); + + if (_returnKeySlot) { + // Assign the '_returnKeySlot' to be the empty object. + return sbe::makeProjectStage(std::move(filteredStage), + *_returnKeySlot, + sbe::makeE<sbe::EFunction>("newObj", sbe::makeEs())); + } else { + return filteredStage; + } +} + +std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::buildReturnKey( + const QuerySolutionNode* root) { + // TODO SERVER-48721: If the projection includes {$meta: "sortKey"}, the result of this stage + // should also include the sort key. Everything else in the projection is ignored. + auto returnKeyNode = static_cast<const ReturnKeyNode*>(root); + + auto resultSlot = _slotIdGenerator.generate(); + invariant(!_data.resultSlot); + _data.resultSlot = resultSlot; + + invariant(!_returnKeySlot); + _returnKeySlot = _slotIdGenerator.generate(); + + auto stage = build(returnKeyNode->children[0]); + _data.resultSlot = *_returnKeySlot; + return stage; } // Returns a non-null pointer to the root of a plan tree, or a non-OK status if the PlanStage tree @@ -429,7 +476,8 @@ std::unique_ptr<sbe::PlanStage> SlotBasedStageBuilder::build(const QuerySolution {STAGE_PROJECTION_SIMPLE, std::mem_fn(&SlotBasedStageBuilder::buildProjectionSimple)}, {STAGE_PROJECTION_DEFAULT, std::mem_fn(&SlotBasedStageBuilder::buildProjectionDefault)}, {STAGE_OR, &SlotBasedStageBuilder::buildOr}, - {STAGE_TEXT, &SlotBasedStageBuilder::buildText}}; + {STAGE_TEXT, &SlotBasedStageBuilder::buildText}, + {STAGE_RETURN_KEY, &SlotBasedStageBuilder::buildReturnKey}}; uassert(4822884, str::stream() << "Can't build exec tree for node: " << root->toString(), diff --git a/src/mongo/db/query/sbe_stage_builder.h b/src/mongo/db/query/sbe_stage_builder.h index d71a023a6fd..6d1044e998d 100644 --- a/src/mongo/db/query/sbe_stage_builder.h +++ b/src/mongo/db/query/sbe_stage_builder.h @@ -107,9 +107,12 @@ private: std::unique_ptr<sbe::PlanStage> buildProjectionDefault(const QuerySolutionNode* root); std::unique_ptr<sbe::PlanStage> buildOr(const QuerySolutionNode* root); std::unique_ptr<sbe::PlanStage> buildText(const QuerySolutionNode* root); + std::unique_ptr<sbe::PlanStage> buildReturnKey(const QuerySolutionNode* root); - std::unique_ptr<sbe::PlanStage> makeLoopJoinForFetch(std::unique_ptr<sbe::PlanStage> inputStage, - sbe::value::SlotId recordIdKeySlot); + std::unique_ptr<sbe::PlanStage> makeLoopJoinForFetch( + std::unique_ptr<sbe::PlanStage> inputStage, + sbe::value::SlotId recordIdKeySlot, + const sbe::value::SlotVector& slotsToForward = {}); sbe::value::SlotIdGenerator _slotIdGenerator; sbe::value::FrameIdGenerator _frameIdGenerator; @@ -120,6 +123,10 @@ private: // limit value in this member and will handle it while processing the SKIP stage. boost::optional<long long> _limit; + // A slot here indicates that the plan has a ReturnKeyStage at its root and that any index scans + // should inflate each index entry into an object and bind it to this slot. + boost::optional<sbe::value::SlotId> _returnKeySlot; + PlanYieldPolicySBE* const _yieldPolicy; // Apart from generating just an execution tree, this builder will also produce some auxiliary diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp index d1d7959710b..43c29dfe547 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp @@ -251,7 +251,7 @@ makeIntervalsFromIndexBounds(const IndexBounds& bounds, * limit 1 * coscan * right - * ixseek lowKeySlot highKeySlot recordIdSlot [] @coll @index + * ixseek lowKeySlot highKeySlot keyStringSlot recordIdSlot [] @coll @index * * This subtree is similar to the single-interval subtree with the only difference that instead * of projecting a single pair of the low/high keys, we project an array of such pairs and then @@ -264,6 +264,8 @@ generateOptimizedMultiIntervalIndexScan( bool forward, std::vector<std::pair<std::unique_ptr<KeyString::Value>, std::unique_ptr<KeyString::Value>>> intervals, + sbe::IndexKeysInclusionSet indexKeysToInclude, + sbe::value::SlotVector vars, sbe::value::SlotIdGenerator* slotIdGenerator, PlanYieldPolicy* yieldPolicy, TrialRunProgressTracker* tracker) { @@ -320,10 +322,10 @@ generateOptimizedMultiIntervalIndexScan( NamespaceStringOrUUID{collection->ns().db().toString(), collection->uuid()}, indexName, forward, - boost::none, + boost::none, // recordSlot recordIdSlot, - std::vector<std::string>{}, - sbe::makeSV(), + indexKeysToInclude, + std::move(vars), lowKeySlot, highKeySlot, yieldPolicy, @@ -339,19 +341,27 @@ generateOptimizedMultiIntervalIndexScan( } /** - * Builds an anchor sub-tree of the recusrive index scan CTE to seed the result set with the initial + * Builds an anchor sub-tree of the recursive index scan CTE to seed the result set with the initial * 'startKey' for the index scan. */ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> makeAnchorBranchForGenericIndexScan( - std::unique_ptr<KeyString::Value> startKey, sbe::value::SlotIdGenerator* slotIdGenerator) { - // Just project out the 'startKey'. + std::unique_ptr<KeyString::Value> startKey, + const sbe::value::SlotVector& unusedVarSlots, + sbe::value::SlotIdGenerator* slotIdGenerator) { + // Just project out the 'startKey' KeyString. We must bind a slot for each field requested from + // index keys, but we don't expect these values to ever get used, so we bind them to Nothing. auto startKeySlot = slotIdGenerator->generate(); + sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> projects; + projects.insert({startKeySlot, + sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::ksValue, + sbe::value::bitcastFrom(startKey.release()))}); + for (auto&& unusedSlot : unusedVarSlots) { + projects.insert({unusedSlot, sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::Nothing, 0)}); + } return {startKeySlot, - sbe::makeProjectStage( + sbe::makeS<sbe::ProjectStage>( sbe::makeS<sbe::LimitSkipStage>(sbe::makeS<sbe::CoScanStage>(), 1, boost::none), - startKeySlot, - sbe::makeE<sbe::EConstant>(sbe::value::TypeTags::ksValue, - sbe::value::bitcastFrom(startKey.release())))}; + std::move(projects))}; } /** @@ -363,6 +373,8 @@ makeRecursiveBranchForGenericIndexScan(const Collection* collection, const std::string& indexName, const sbe::CheckBoundsParams& params, sbe::SpoolId spoolId, + sbe::IndexKeysInclusionSet indexKeysToInclude, + sbe::value::SlotVector savedVars, sbe::value::SlotIdGenerator* slotIdGenerator, PlanYieldPolicy* yieldPolicy, TrialRunProgressTracker* tracker) { @@ -386,8 +398,8 @@ makeRecursiveBranchForGenericIndexScan(const Collection* collection, params.direction == 1, resultSlot, recordIdSlot, - std::vector<std::string>{}, - sbe::makeSV(), + indexKeysToInclude, + std::move(savedVars), lowKeySlot, boost::none, yieldPolicy, @@ -421,13 +433,13 @@ makeRecursiveBranchForGenericIndexScan(const Collection* collection, * stage in conjunction with the stack spool: * * filter {isNumber(resultSlot)} - * lspool [resultSlot] {!isNumber(resultSlot)} - * union [resultSlot] - * [anchorSlot] - * project [startKeySlot = KS(...)] + * lspool [resultSlot, varSlots...] {!isNumber(resultSlot)} + * union [resultSlot, varSlots...] + * [anchorSlot, unusedVarSlots] + * project [startKeySlot = KS(...), unusedVarSlot0 = Nothing, ...] * limit 1 * coscan - * [checkBoundsSlot] + * [checkBoundsSlot, savedVarSlots...] * nlj [] [seekKeySlot] * left * sspool [seekKeySlot] @@ -439,7 +451,8 @@ makeRecursiveBranchForGenericIndexScan(const Collection* collection, * limit 1 * coscan * right - * ixseek lowKeySlot resultSlot recordIdSlot [] @coll @index + * ixseek lowKeySlot resultSlot recordIdSlot savedVarSlots [] @coll + * @index * * - The anchor union branch is the starting point of the recursive subtree. It pushes the * starting index into the lspool stage. The lspool has a filter predicate to ensure that @@ -477,6 +490,8 @@ generateGenericMultiIntervalIndexScan(const Collection* collection, const IndexScanNode* ixn, KeyString::Version version, Ordering ordering, + sbe::IndexKeysInclusionSet indexKeysToInclude, + sbe::value::SlotVector vars, sbe::value::SlotIdGenerator* slotIdGenerator, sbe::value::SpoolIdGenerator* spoolIdGenerator, PlanYieldPolicy* yieldPolicy, @@ -506,29 +521,42 @@ generateGenericMultiIntervalIndexScan(const Collection* collection, } // Build the anchor branch of the union. + auto unusedVarSlots = slotIdGenerator->generateMultiple(vars.size()); auto [anchorSlot, anchorBranch] = makeAnchorBranchForGenericIndexScan( std::make_unique<KeyString::Value>(IndexEntryComparison::makeKeyStringFromSeekPointForSeek( seekPoint, version, ordering, ixn->direction == 1)), + unusedVarSlots, slotIdGenerator); auto spoolId = spoolIdGenerator->generate(); // Build the recursive branch of the union. + auto savedVarSlots = slotIdGenerator->generateMultiple(vars.size()); auto [recursiveSlot, recursiveBranch] = makeRecursiveBranchForGenericIndexScan( collection, ixn->index.identifier.catalogName, {ixn->bounds, ixn->index.keyPattern, ixn->direction, version, ordering}, spoolId, + indexKeysToInclude, + savedVarSlots, slotIdGenerator, yieldPolicy, tracker); // Construct a union stage from the two branches. + auto makeSVWithVars = [](sbe::value::SlotId headSlot, const sbe::value::SlotVector& varSlots) { + sbe::value::SlotVector sv; + sv.reserve(1 + varSlots.size()); + sv.push_back(headSlot); + sv.insert(sv.end(), varSlots.begin(), varSlots.end()); + return sv; + }; auto unionStage = sbe::makeS<sbe::UnionStage>( make_vector<std::unique_ptr<sbe::PlanStage>>(std::move(anchorBranch), std::move(recursiveBranch)), - std::vector<sbe::value::SlotVector>{sbe::makeSV(anchorSlot), sbe::makeSV(recursiveSlot)}, - sbe::makeSV(resultSlot)); + std::vector<sbe::value::SlotVector>{makeSVWithVars(anchorSlot, unusedVarSlots), + makeSVWithVars(recursiveSlot, savedVarSlots)}, + makeSVWithVars(resultSlot, vars)); // Stick in a lazy producer spool on top. The specified predicate will ensure that we will only // store the seek key values in the spool (that is, if the value type is not a number, or not @@ -536,7 +564,7 @@ generateGenericMultiIntervalIndexScan(const Collection* collection, auto spool = sbe::makeS<sbe::SpoolLazyProducerStage>( std::move(unionStage), spoolId, - sbe::makeSV(resultSlot), + makeSVWithVars(resultSlot, std::move(vars)), sbe::makeE<sbe::EPrimUnary>( sbe::EPrimUnary::logicNot, sbe::makeE<sbe::EFunction>("isNumber"sv, @@ -557,6 +585,8 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateSingleInt bool forward, std::unique_ptr<KeyString::Value> lowKey, std::unique_ptr<KeyString::Value> highKey, + sbe::IndexKeysInclusionSet indexKeysToInclude, + sbe::value::SlotVector vars, boost::optional<sbe::value::SlotId> recordSlot, sbe::value::SlotIdGenerator* slotIdGenerator, PlanYieldPolicy* yieldPolicy, @@ -585,8 +615,8 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateSingleInt forward, recordSlot, recordIdSlot, - std::vector<std::string>{}, - sbe::makeSV(), + indexKeysToInclude, + std::move(vars), lowKeySlot, highKeySlot, yieldPolicy, @@ -606,12 +636,12 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateIndexScan OperationContext* opCtx, const Collection* collection, const IndexScanNode* ixn, + boost::optional<sbe::value::SlotId> returnKeySlot, sbe::value::SlotIdGenerator* slotIdGenerator, sbe::value::SpoolIdGenerator* spoolIdGenerator, PlanYieldPolicy* yieldPolicy, TrialRunProgressTracker* tracker) { - uassert( - 4822863, "Index scans with key metadata are not supported in SBE", !ixn->addKeyMetadata); + invariant(returnKeySlot || !ixn->addKeyMetadata); uassert(4822864, "Index scans with a filter are not supported in SBE", !ixn->filter); auto descriptor = @@ -623,7 +653,40 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateIndexScan accessMethod->getSortedDataInterface()->getKeyStringVersion(), accessMethod->getSortedDataInterface()->getOrdering()); - auto [slot, stage] = [&]() { + + auto [returnKeyExpr, vars, indexKeysToInclude] = + [&]() -> std::tuple<std::unique_ptr<sbe::EExpression>, + sbe::value::SlotVector, + sbe::IndexKeysInclusionSet> { + std::unique_ptr<sbe::EExpression> returnKeyExpr; + sbe::value::SlotVector vars; + sbe::IndexKeysInclusionSet indexKeysToInclude; + + if (returnKeySlot) { + std::vector<std::unique_ptr<sbe::EExpression>> mkObjArgs; + for (auto&& elem : ixn->index.keyPattern) { + auto fieldName = elem.fieldNameStringData(); + auto slot = slotIdGenerator->generate(); + + vars.emplace_back(slot); + + mkObjArgs.emplace_back(sbe::makeE<sbe::EConstant>( + std::string_view{fieldName.rawData(), fieldName.size()})); + mkObjArgs.emplace_back(sbe::makeE<sbe::EVariable>(slot)); + } + + returnKeyExpr = sbe::makeE<sbe::EFunction>("newObj", std::move(mkObjArgs)); + + for (size_t i = 0; i < vars.size(); ++i) { + indexKeysToInclude.set(i); + } + } + + return {std::move(returnKeyExpr), std::move(vars), indexKeysToInclude}; + }(); + + auto [slot, + stage] = [&, vars = std::ref(vars), indexKeysToInclude = std::ref(indexKeysToInclude)]() { if (intervals.size() == 1) { // If we have just a single interval, we can construct a simplified sub-tree. auto&& [lowKey, highKey] = intervals[0]; @@ -632,7 +695,9 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateIndexScan ixn->direction == 1, std::move(lowKey), std::move(highKey), - boost::none, + indexKeysToInclude, + vars, + boost::none, // recordSlot slotIdGenerator, yieldPolicy, tracker); @@ -644,6 +709,8 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateIndexScan ixn->index.identifier.catalogName, ixn->direction == 1, std::move(intervals), + indexKeysToInclude, + vars, slotIdGenerator, yieldPolicy, tracker); @@ -654,6 +721,8 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateIndexScan ixn, accessMethod->getSortedDataInterface()->getKeyStringVersion(), accessMethod->getSortedDataInterface()->getOrdering(), + indexKeysToInclude, + vars, slotIdGenerator, spoolIdGenerator, yieldPolicy, @@ -662,7 +731,19 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateIndexScan }(); if (ixn->shouldDedup) { - stage = sbe::makeS<sbe::HashAggStage>(std::move(stage), sbe::makeSV(slot), sbe::makeEM()); + sbe::value::SlotMap<std::unique_ptr<sbe::EExpression>> forwardedVarSlots; + for (auto varSlot : vars) { + forwardedVarSlots.insert( + {varSlot, + sbe::makeE<sbe::EFunction>("first", + sbe::makeEs(sbe::makeE<sbe::EVariable>(varSlot)))}); + } + stage = sbe::makeS<sbe::HashAggStage>( + std::move(stage), sbe::makeSV(slot), std::move(forwardedVarSlots)); + } + + if (returnKeyExpr) { + stage = sbe::makeProjectStage(std::move(stage), *returnKeySlot, std::move(returnKeyExpr)); } return {slot, std::move(stage)}; diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.h b/src/mongo/db/query/sbe_stage_builder_index_scan.h index ed4247d28c4..63389a33ba6 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.h +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.h @@ -42,6 +42,7 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateIndexScan OperationContext* opCtx, const Collection* collection, const IndexScanNode* ixn, + boost::optional<sbe::value::SlotId> returnKeySlot, sbe::value::SlotIdGenerator* slotIdGenerator, sbe::value::SpoolIdGenerator* spoolIdGenerator, PlanYieldPolicy* yieldPolicy, @@ -71,6 +72,8 @@ std::pair<sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> generateSingleInt bool forward, std::unique_ptr<KeyString::Value> lowKey, std::unique_ptr<KeyString::Value> highKey, + sbe::IndexKeysInclusionSet indexKeysToInclude, + sbe::value::SlotVector vars, boost::optional<sbe::value::SlotId> recordSlot, sbe::value::SlotIdGenerator* slotIdGenerator, PlanYieldPolicy* yieldPolicy, |