diff options
author | Ian Boros <ian.boros@mongodb.com> | 2021-02-12 11:38:33 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-02-24 06:10:08 +0000 |
commit | 7634943d1a2298a916fbf7aa140da79ea50e47b0 (patch) | |
tree | ce181f90431cc932a86d3f09126b86b627ad624d /src/mongo/db | |
parent | abb82fc8a440587a3d9d1ee9cc7926a9dfba282a (diff) | |
download | mongo-7634943d1a2298a916fbf7aa140da79ea50e47b0.tar.gz |
SERVER-54480 Fix index key rehydration in SBE
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder.cpp | 196 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_index_scan.cpp | 77 | ||||
-rw-r--r-- | src/mongo/db/query/sbe_stage_builder_index_scan.h | 2 |
3 files changed, 193 insertions, 82 deletions
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp index 14c9870fb94..1528ed39fba 100644 --- a/src/mongo/db/query/sbe_stage_builder.cpp +++ b/src/mongo/db/query/sbe_stage_builder.cpp @@ -61,6 +61,128 @@ #include "mongo/db/s/collection_sharding_state.h" namespace mongo::stage_builder { +namespace { +/** + * Tree representation of an index key pattern. + * + * For example, the key pattern {a.b: 1, x: 1, a.c: 1} would look like: + * + * <root> + * / | + * a x + * / \ + * b c + * + * This tree is used for building SBE subtrees to re-hydrate index keys. + */ +struct IndexKeyPatternTreeNode { + IndexKeyPatternTreeNode* emplace(StringData fieldComponent) { + auto newNode = std::make_unique<IndexKeyPatternTreeNode>(); + const auto newNodeRaw = newNode.get(); + children.emplace(fieldComponent, std::move(newNode)); + childrenOrder.push_back(fieldComponent.toString()); + + return newNodeRaw; + } + + StringMap<std::unique_ptr<IndexKeyPatternTreeNode>> children; + std::vector<std::string> childrenOrder; + + // Which slot the index key for this component is stored in. May be boost::none for non-leaf + // nodes. + boost::optional<sbe::value::SlotId> indexKeySlot; +}; + +/** + * Given a key pattern and an array of slots of equal size, builds an IndexKeyPatternTreeNode + * representing the mapping between key pattern component and slot. + * + * Note that this will "short circuit" in cases where the index key pattern contains two components + * where one is a subpath of the other. For example with the key pattern {a:1, a.b: 1}, the "a.b" + * component will not be represented in the output tree. For the purpose of rehydrating index keys, + * this is fine (and actually preferable). + */ +std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern, + const sbe::value::SlotVector& slots) { + size_t i = 0; + + auto root = std::make_unique<IndexKeyPatternTreeNode>(); + for (auto&& elem : keyPattern) { + auto* node = root.get(); + bool skipElem = false; + + FieldRef fr(elem.fieldNameStringData()); + for (FieldIndex j = 0; j < fr.numParts(); ++j) { + const auto part = fr.getPart(j); + if (auto it = node->children.find(part); it != node->children.end()) { + node = it->second.get(); + if (node->indexKeySlot) { + // We're processing the a sub-path of a path that's already indexed. We can + // bail out here since we won't use the sub-path when reconstructing the + // object. + skipElem = true; + break; + } + } else { + node = node->emplace(part); + } + } + + if (!skipElem) { + node->indexKeySlot = slots[i]; + } + + ++i; + } + + return root; +} + +/** + * Given a root IndexKeyPatternTreeNode, this function will construct an SBE expression for + * producing a partial object from an index key. + * + * For example, given the index key pattern {a.b: 1, x: 1, a.c: 1} and the index key + * {"": 1, "": 2, "": 3}, the SBE expression would produce the object {a: {b:1, c: 3}, x: 2}. + */ +std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree) { + + std::vector<std::unique_ptr<sbe::EExpression>> args; + for (auto&& fieldName : kpTree->childrenOrder) { + auto it = kpTree->children.find(fieldName); + + args.emplace_back(makeConstant(fieldName)); + if (it->second->indexKeySlot) { + args.emplace_back(makeVariable(*it->second->indexKeySlot)); + } else { + // The reason this is in an else branch is that in the case where we have an index key + // like {a.b: ..., a: ...}, we've already made the logic for reconstructing the 'a' + // portion, so the 'a.b' subtree can be skipped. + args.push_back(buildNewObjExpr(it->second.get())); + } + } + + return sbe::makeE<sbe::EFunction>("newObj", std::move(args)); +} + +/** + * Given a stage, and index key pattern a corresponding array of slot IDs, this function + * add a ProjectStage to the tree which rehydrates the index key and stores the result in + * 'resultSlot.' + */ +std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage, + const BSONObj& indexKeyPattern, + PlanNodeId nodeId, + const sbe::value::SlotVector& indexKeySlots, + sbe::value::SlotId resultSlot) { + auto kpTree = buildKeyPatternTree(indexKeyPattern, indexKeySlots); + auto keyExpr = buildNewObjExpr(kpTree.get()); + + return sbe::makeProjectStage(std::move(stage), nodeId, resultSlot, std::move(keyExpr)); +} +} // namespace + + std::unique_ptr<sbe::RuntimeEnvironment> makeRuntimeEnvironment( const CanonicalQuery& cq, OperationContext* opCtx, @@ -310,16 +432,70 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder // Index scans cannot produce an oplogTsSlot, so assert that the caller doesn't need it. invariant(!reqs.has(kOplogTs)); - return generateIndexScan(_opCtx, - _collection, - ixn, - reqs, - &_slotIdGenerator, - &_frameIdGenerator, - &_spoolIdGenerator, - _yieldPolicy, - _data.env, - _lockAcquisitionCallback); + sbe::IndexKeysInclusionSet indexKeyBitset; + + if (reqs.has(PlanStageSlots::kReturnKey) || reqs.has(PlanStageSlots::kResult)) { + // If either 'reqs.result' or 'reqs.returnKey' is true, we need to get all parts of the + // index key (regardless of what was requested by 'reqs.indexKeyBitset') so that we can + // create the inflated index key (keyExpr). + for (int i = 0; i < ixn->index.keyPattern.nFields(); ++i) { + indexKeyBitset.set(i); + } + } else if (reqs.getIndexKeyBitset()) { + indexKeyBitset = *reqs.getIndexKeyBitset(); + } + + auto [stage, outputs] = generateIndexScan(_opCtx, + _collection, + ixn, + indexKeyBitset, + &_slotIdGenerator, + &_frameIdGenerator, + &_spoolIdGenerator, + _yieldPolicy, + _data.env, + _lockAcquisitionCallback); + + if (reqs.has(PlanStageSlots::kReturnKey)) { + std::vector<std::unique_ptr<sbe::EExpression>> mkObjArgs; + + size_t i = 0; + for (auto&& elem : ixn->index.keyPattern) { + auto fieldName = elem.fieldNameStringData(); + + mkObjArgs.emplace_back(sbe::makeE<sbe::EConstant>( + std::string_view{fieldName.rawData(), fieldName.size()})); + mkObjArgs.emplace_back(sbe::makeE<sbe::EVariable>((*outputs.getIndexKeySlots())[i++])); + } + + auto rawKeyExpr = sbe::makeE<sbe::EFunction>("newObj", std::move(mkObjArgs)); + outputs.set(PlanStageSlots::kReturnKey, _slotIdGenerator.generate()); + stage = sbe::makeProjectStage(std::move(stage), + ixn->nodeId(), + outputs.get(PlanStageSlots::kReturnKey), + std::move(rawKeyExpr)); + } + + if (reqs.has(PlanStageSlots::kResult)) { + outputs.set(PlanStageSlots::kResult, _slotIdGenerator.generate()); + stage = rehydrateIndexKey(std::move(stage), + ixn->index.keyPattern, + ixn->nodeId(), + *outputs.getIndexKeySlots(), + outputs.get(PlanStageSlots::kResult)); + } + + if (reqs.getIndexKeyBitset()) { + outputs.setIndexKeySlots( + makeIndexKeyOutputSlotsMatchingParentReqs(ixn->index.keyPattern, + *reqs.getIndexKeyBitset(), + indexKeyBitset, + *outputs.getIndexKeySlots())); + } else { + outputs.setIndexKeySlots(boost::none); + } + + return {std::move(stage), std::move(outputs)}; } std::tuple<sbe::value::SlotId, sbe::value::SlotId, std::unique_ptr<sbe::PlanStage>> diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp index 9e7a2cff9b7..e58c5e21fd1 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.cpp +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.cpp @@ -56,6 +56,7 @@ #include "mongo/db/query/util/make_data_structure.h" #include "mongo/logv2/log.h" #include "mongo/util/str.h" +#include "mongo/util/visit_helper.h" namespace mongo::stage_builder { namespace { @@ -679,7 +680,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( OperationContext* opCtx, const CollectionPtr& collection, const IndexScanNode* ixn, - PlanStageReqs reqs, + const sbe::IndexKeysInclusionSet& originalIndexKeyBitset, sbe::value::SlotIdGenerator* slotIdGenerator, sbe::value::SlotIdGenerator* frameIdGenerator, sbe::value::SpoolIdGenerator* spoolIdGenerator, @@ -697,18 +698,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( accessMethod->getSortedDataInterface()->getOrdering()); std::unique_ptr<sbe::PlanStage> stage; - sbe::value::SlotVector indexKeySlots; - sbe::IndexKeysInclusionSet indexKeyBitset; - std::unique_ptr<sbe::EExpression> keyExpr; - PlanStageSlots outputs; - // Save the bit vector describing the fields from the index that our caller requires. If an - // index filter is defined, we may require additional index fields which are not needed by the - // parent stage. We will need the parent's reqs later on so that we can hand the correct slot - // vector for these fields back to our parent. - auto parentIndexKeyBitset = reqs.getIndexKeyBitset(); - // Determine the set of fields from the index required to apply the filter and union those with // the set of fields from the index required by the parent stage. auto [indexFilterKeyBitset, indexFilterKeyFields] = [&]() { @@ -719,35 +710,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( } return std::make_pair(sbe::IndexKeysInclusionSet{}, std::vector<std::string>{}); }(); - reqs.getIndexKeyBitset() = - parentIndexKeyBitset.value_or(sbe::IndexKeysInclusionSet{}) | indexFilterKeyBitset; - - if (reqs.has(PlanStageSlots::kResult) || reqs.has(PlanStageSlots::kReturnKey)) { - // If either 'reqs.result' or 'reqs.returnKey' is true, we need to get all parts of the - // index key (regardless of what was requested by 'reqs.indexKeyBitset') so that we can - // create the inflated index key (keyExpr). - std::vector<std::unique_ptr<sbe::EExpression>> mkObjArgs; - size_t keyIndex = 0; - - for (auto&& elem : ixn->index.keyPattern) { - auto fieldName = elem.fieldNameStringData(); - auto slot = slotIdGenerator->generate(); - - mkObjArgs.emplace_back(sbe::makeE<sbe::EConstant>( - std::string_view{fieldName.rawData(), fieldName.size()})); - mkObjArgs.emplace_back(sbe::makeE<sbe::EVariable>(slot)); - - indexKeySlots.emplace_back(slot); - indexKeyBitset.set(keyIndex++); - } - - keyExpr = sbe::makeE<sbe::EFunction>("newObj", std::move(mkObjArgs)); - } else if (reqs.getIndexKeyBitset()) { - // If both 'reqs.result' and 'reqs.returnKey' are false, we should only get the - // parts of the index key that were requested by 'reqs.indexKeyBitset'. - indexKeySlots = slotIdGenerator->generateMultiple(reqs.getIndexKeyBitset()->count()); - indexKeyBitset = *reqs.getIndexKeyBitset(); - } + auto indexKeyBitset = originalIndexKeyBitset | indexFilterKeyBitset; + auto indexKeySlots = slotIdGenerator->generateMultiple(indexKeyBitset.count()); if (intervals.size() == 1) { // If we have just a single interval, we can construct a simplified sub-tree. @@ -826,37 +790,8 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( ixn->nodeId()); } - if (reqs.has(PlanStageSlots::kResult) || reqs.has(PlanStageSlots::kReturnKey)) { - if (reqs.has(PlanStageSlots::kResult)) { - outputs.set(PlanStageSlots::kResult, slotIdGenerator->generate()); - stage = sbe::makeProjectStage(std::move(stage), - ixn->nodeId(), - outputs.get(PlanStageSlots::kResult), - std::move(keyExpr)); - - if (reqs.has(PlanStageSlots::kReturnKey)) { - outputs.set(PlanStageSlots::kReturnKey, slotIdGenerator->generate()); - stage = sbe::makeProjectStage( - std::move(stage), - ixn->nodeId(), - outputs.get(PlanStageSlots::kReturnKey), - sbe::makeE<sbe::EVariable>(outputs.get(PlanStageSlots::kResult))); - } - } else { - outputs.set(PlanStageSlots::kReturnKey, slotIdGenerator->generate()); - stage = sbe::makeProjectStage(std::move(stage), - ixn->nodeId(), - outputs.get(PlanStageSlots::kReturnKey), - std::move(keyExpr)); - } - } - - // We only need to return the slots which were explicitly requested by our parent stage. - outputs.setIndexKeySlots( - !parentIndexKeyBitset - ? boost::none - : boost::optional<sbe::value::SlotVector>{makeIndexKeyOutputSlotsMatchingParentReqs( - ixn->index.keyPattern, *parentIndexKeyBitset, indexKeyBitset, indexKeySlots)}); + outputs.setIndexKeySlots(makeIndexKeyOutputSlotsMatchingParentReqs( + ixn->index.keyPattern, originalIndexKeyBitset, indexKeyBitset, indexKeySlots)); return {std::move(stage), std::move(outputs)}; } diff --git a/src/mongo/db/query/sbe_stage_builder_index_scan.h b/src/mongo/db/query/sbe_stage_builder_index_scan.h index b6bd6818c18..008f935bfb8 100644 --- a/src/mongo/db/query/sbe_stage_builder_index_scan.h +++ b/src/mongo/db/query/sbe_stage_builder_index_scan.h @@ -54,7 +54,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateIndexScan( OperationContext* opCtx, const CollectionPtr& collection, const IndexScanNode* ixn, - PlanStageReqs reqs, + const sbe::IndexKeysInclusionSet& indexKeyBitset, sbe::value::SlotIdGenerator* slotIdGenerator, sbe::value::SlotIdGenerator* frameIdGenerator, sbe::value::SpoolIdGenerator* spoolIdGenerator, |