diff options
author | Svilen Mihaylov <svilen.mihaylov@mongodb.com> | 2022-08-26 23:41:07 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-27 00:19:25 +0000 |
commit | f5f8acab2eec0be83d94cb8126bccf2bd1522350 (patch) | |
tree | ddcfda87a271a280d4677253e0f918ef843576a7 /src/mongo/db/query | |
parent | 8aef79c9e2e6d4e8110bb7ccdc5f6445198272f2 (diff) | |
download | mongo-f5f8acab2eec0be83d94cb8126bccf2bd1522350.tar.gz |
SERVER-68965 Improvements to indexing in the presence of nested subfelds
Diffstat (limited to 'src/mongo/db/query')
21 files changed, 980 insertions, 774 deletions
diff --git a/src/mongo/db/query/ce/ce_test_utils.cpp b/src/mongo/db/query/ce/ce_test_utils.cpp index 3f32aa57a33..0ee44718b05 100644 --- a/src/mongo/db/query/ce/ce_test_utils.cpp +++ b/src/mongo/db/query/ce/ce_test_utils.cpp @@ -60,7 +60,29 @@ double CETester::getCE(const std::string& query, size_t optimizationLevel) const } double CETester::getCE(ABT& abt, size_t optimizationLevel) const { - OptPhaseManager phaseManager = getPhaseManager(optimizationLevel); + // Needs to outlive the phase manager. + PrefixId prefixId; + + // Mock metadata. + ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _collCard); + Metadata metadata({{_collName, sd}}); + + std::vector<OptPhaseManager::OptPhase> optPhaseChoices{ + OptPhaseManager::OptPhase::MemoSubstitutionPhase, + OptPhaseManager::OptPhase::MemoExplorationPhase}; + optimizationLevel = std::min(optimizationLevel, optPhaseChoices.size()); + OptPhaseManager::PhaseSet optPhases; + for (size_t i = 0; i < optimizationLevel; ++i) { + optPhases.insert(optPhaseChoices[i]); + } + + optimizer::OptPhaseManager phaseManager(optPhases, + prefixId, + true /*requireRID*/, + metadata, + std::make_unique<HeuristicCE>(), + std::make_unique<DefaultCosting>(), + DebugInfo::kDefaultForTests); // Optimize. ASSERT_TRUE(phaseManager.optimize(abt)); @@ -77,29 +99,4 @@ double CETester::getCE(ABT& abt, size_t optimizationLevel) const { return ce; } -optimizer::OptPhaseManager CETester::getPhaseManager(size_t optimizationLevel) const { - // Mock memo. - ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _collCard); - Metadata metadata({{_collName, sd}}); - - std::vector<OptPhaseManager::OptPhase> optPhaseChoices{ - OptPhaseManager::OptPhase::MemoSubstitutionPhase, - OptPhaseManager::OptPhase::MemoExplorationPhase}; - optimizationLevel = std::min(optimizationLevel, optPhaseChoices.size()); - OptPhaseManager::PhaseSet optPhases; - for (size_t i = 0; i < optimizationLevel; ++i) { - optPhases.insert(optPhaseChoices[i]); - } - - // Construct placeholder PhaseManager. Notice that it also creates a Memo internally. - PrefixId prefixId; - return {optPhases, - prefixId, - true /*requireRID*/, - metadata, - std::make_unique<HeuristicCE>(), - std::make_unique<DefaultCosting>(), - DebugInfo::kDefaultForTests}; -} - } // namespace mongo::ce diff --git a/src/mongo/db/query/ce/ce_test_utils.h b/src/mongo/db/query/ce/ce_test_utils.h index 3d49886b1e3..b354e4ae6b1 100644 --- a/src/mongo/db/query/ce/ce_test_utils.h +++ b/src/mongo/db/query/ce/ce_test_utils.h @@ -109,9 +109,6 @@ protected: // The number of records in the collection we are testing. double _collCard; - -private: - optimizer::OptPhaseManager getPhaseManager(size_t optimizationLevel) const; }; } // namespace ce diff --git a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp index 11df74ff8e5..d212c32c5a7 100644 --- a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp +++ b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp @@ -168,9 +168,9 @@ public: double evalCost = childResult._cost; if (!isTrivialExpr<EvalPath>(node.getProjection())) { // Non-trivial projection. - evalCost += kStartupCost + kEvalIncrementalCost * childResult._ce; + evalCost += kStartupCost + kEvalIncrementalCost * _cardinalityEstimate; } - return {evalCost, childResult._ce}; + return {evalCost, _cardinalityEstimate}; } CostAndCEInternal operator()(const ABT& /*n*/, const BinaryJoinNode& node) { diff --git a/src/mongo/db/query/optimizer/cascades/implementers.cpp b/src/mongo/db/query/optimizer/cascades/implementers.cpp index a5bc675695a..2a0c4cfaac0 100644 --- a/src/mongo/db/query/optimizer/cascades/implementers.cpp +++ b/src/mongo/db/query/optimizer/cascades/implementers.cpp @@ -348,7 +348,7 @@ public: // via a physical scan even in the absence of indexes. const IndexingRequirement& requirements = getPropertyConst<IndexingRequirement>(_physProps); - const CandidateIndexMap& candidateIndexMap = node.getCandidateIndexMap(); + const CandidateIndexes& candidateIndexes = node.getCandidateIndexes(); const IndexReqTarget indexReqTarget = requirements.getIndexReqTarget(); switch (indexReqTarget) { case IndexReqTarget::Complete: @@ -358,7 +358,7 @@ public: break; case IndexReqTarget::Index: - if (candidateIndexMap.empty()) { + if (candidateIndexes.empty()) { return; } [[fallthrough]]; @@ -372,10 +372,6 @@ public: default: MONGO_UNREACHABLE; } - const auto& satisfiedPartialIndexes = - getPropertyConst<IndexingAvailability>( - _memo.getGroup(requirements.getSatisfiedPartialIndexesGroupId())._logicalProperties) - .getSatisfiedPartialIndexes(); const auto& requiredProjections = getPropertyConst<ProjectionRequirement>(_physProps).getProjections(); @@ -398,10 +394,6 @@ public: } for (const auto& [key, req] : reqMap) { - if (key.emptyPath()) { - // We cannot satisfy without a field. - return; - } if (key._projectionName != scanProjectionName) { // We can only satisfy partial schema requirements using our root projection. return; @@ -434,7 +426,7 @@ public: const auto& ceProperty = getPropertyConst<CardinalityEstimate>(_logicalProps); const CEType currentGroupCE = ceProperty.getEstimate(); - const PartialSchemaKeyCE& partialSchemaKeyCEMap = ceProperty.getPartialSchemaKeyCEMap(); + const PartialSchemaKeyCE& partialSchemaKeyCE = ceProperty.getPartialSchemaKeyCE(); if (indexReqTarget == IndexReqTarget::Index) { ProjectionCollationSpec requiredCollation; @@ -443,10 +435,18 @@ public: getPropertyConst<CollationRequirement>(_physProps).getCollationSpec(); } + const auto& satisfiedPartialIndexes = + getPropertyConst<IndexingAvailability>( + _memo.getGroup(requirements.getSatisfiedPartialIndexesGroupId()) + ._logicalProperties) + .getSatisfiedPartialIndexes(); + // Consider all candidate indexes, and check if they satisfy the collation and // distribution requirements. - for (const auto& [indexDefName, candidateIndexEntry] : candidateIndexMap) { + for (const auto& candidateIndexEntry : node.getCandidateIndexes()) { + const auto& indexDefName = candidateIndexEntry._indexDefName; const auto& indexDef = scanDef.getIndexDefs().at(indexDefName); + if (!indexDef.getPartialReqMap().empty() && (_hints._disableIndexes == DisableIndexOptions::DisablePartialOnly || satisfiedPartialIndexes.count(indexDefName) == 0)) { @@ -481,45 +481,35 @@ public: availableDirections._forward || availableDirections._backward); auto indexProjectionMap = candidateIndexEntry._fieldProjectionMap; - { - // Remove unused projections from the field projection map. - auto& fieldProjMap = indexProjectionMap._fieldProjections; - for (auto it = fieldProjMap.begin(); it != fieldProjMap.end();) { - const ProjectionName& projName = it->second; - if (!requiredProjections.find(projName).second && - candidateIndexEntry._residualRequirementsTempProjections.count( - projName) == 0) { - fieldProjMap.erase(it++); - } else { - it++; - } - } - } + auto residualReqs = candidateIndexEntry._residualRequirements; + removeRedundantResidualPredicates( + requiredProjections, residualReqs, indexProjectionMap); CEType indexCE = currentGroupCE; - ResidualRequirements residualRequirements; - if (!candidateIndexEntry._residualRequirements.empty()) { + ResidualRequirementsWithCE residualReqsWithCE; + std::vector<SelectivityType> indexPredSels; + if (!residualReqs.empty()) { PartialSchemaKeySet residualQueryKeySet; - for (const auto& [residualKey, residualReq] : - candidateIndexEntry._residualRequirements) { - const auto& queryKey = candidateIndexEntry._residualKeyMap.at(residualKey); - residualQueryKeySet.emplace(queryKey); - const CEType ce = partialSchemaKeyCEMap.at(queryKey); - residualRequirements.emplace_back(residualKey, residualReq, ce); + for (const auto& [residualKey, residualReq, entryIndex] : residualReqs) { + auto entryIt = reqMap.cbegin(); + std::advance(entryIt, entryIndex); + residualQueryKeySet.emplace(entryIt->first); + residualReqsWithCE.emplace_back( + residualKey, residualReq, partialSchemaKeyCE.at(entryIndex).second); } if (scanGroupCE > 0.0) { - std::vector<SelectivityType> indexPredSelectivities; + size_t entryIndex = 0; for (const auto& [key, req] : reqMap) { if (residualQueryKeySet.count(key) == 0) { - const CEType ce = partialSchemaKeyCEMap.at(key); - indexPredSelectivities.push_back(ce / scanGroupCE); + indexPredSels.push_back(partialSchemaKeyCE.at(entryIndex).second / + scanGroupCE); } + entryIndex++; } - if (!indexPredSelectivities.empty()) { - indexCE = scanGroupCE * - ce::conjExponentialBackoff(std::move(indexPredSelectivities)); + if (!indexPredSels.empty()) { + indexCE = scanGroupCE * ce::conjExponentialBackoff(indexPredSels); } } } @@ -529,9 +519,9 @@ public: NodeCEMap nodeCEMap; // TODO: consider pre-computing as part of the candidateIndexes structure. - const auto singularInterval = MultiKeyIntervalReqExpr::getSingularDNF(intervals); + const auto singularInterval = CompoundIntervalReqExpr::getSingularDNF(intervals); const bool needsUniqueStage = - (!singularInterval || !areMultiKeyIntervalsEqualities(*singularInterval)) && + (!singularInterval || !areCompoundIntervalsEqualities(*singularInterval)) && indexDef.isMultiKey() && requirements.getDedupRID(); indexProjectionMap._ridProjection = @@ -558,7 +548,7 @@ public: } lowerPartialSchemaRequirements( - indexCE, scanGroupCE, residualRequirements, physNode, nodeCEMap); + scanGroupCE, std::move(indexPredSels), residualReqsWithCE, physNode, nodeCEMap); if (needsUniqueStage) { // Insert unique stage if we need to, after the residual requirements. @@ -571,6 +561,9 @@ public: _queue, kDefaultPriority, std::move(physNode), {}, std::move(nodeCEMap)); } } else { + const auto& scanParams = node.getScanParams(); + tassert(6624102, "Empty scan params", scanParams); + bool canUseParallelScan = false; if (!distributionsCompatible(indexReqTarget, scanDef.getDistributionAndPaths(), @@ -581,21 +574,14 @@ public: return; } - FieldProjectionMap fieldProjectionMap; + FieldProjectionMap fieldProjectionMap = scanParams->_fieldProjectionMap; + ResidualRequirements residualReqs = scanParams->_residualRequirements; + removeRedundantResidualPredicates( + requiredProjections, residualReqs, fieldProjectionMap); + if (indexReqTarget == IndexReqTarget::Complete && needsRID) { fieldProjectionMap._ridProjection = ridProjName; } - - ProjectionRenames projectionRenames; - ResidualRequirements residualRequirements; - computePhysicalScanParams(_prefixId, - reqMap, - partialSchemaKeyCEMap, - requiredProjections, - residualRequirements, - projectionRenames, - fieldProjectionMap, - requiresRootProjection); if (requiresRootProjection) { fieldProjectionMap._rootProjection = scanProjectionName; } @@ -622,12 +608,14 @@ public: nodeCEMap.emplace(physNode.cast<Node>(), baseCE); } - applyProjectionRenames(std::move(projectionRenames), physNode, [&](const ABT& node) { - nodeCEMap.emplace(node.cast<Node>(), baseCE); - }); + ResidualRequirementsWithCE residualReqsWithCE; + for (const auto& [residualKey, residualReq, entryIndex] : residualReqs) { + residualReqsWithCE.emplace_back( + residualKey, residualReq, partialSchemaKeyCE.at(entryIndex).second); + } lowerPartialSchemaRequirements( - baseCE, scanGroupCE, residualRequirements, physNode, nodeCEMap); + baseCE, {} /*indexPredSels*/, residualReqsWithCE, physNode, nodeCEMap); optimizeChildrenNoAssert( _queue, kDefaultPriority, std::move(physNode), {}, std::move(nodeCEMap)); } diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp index e6903a63cde..24d970f4ea6 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp @@ -573,18 +573,20 @@ static boost::optional<ABT> mergeSargableNodes( const ScanDefinition& scanDef = ctx.getMetadata()._scanDefs.at(indexingAvailability.getScanDefName()); - auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(), - scanProjName, - mergedReqs, - scanDef, - ctx.getHints()._fastIndexNullHandling, - hasEmptyInterval); + auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(), + scanProjName, + mergedReqs, + scanDef, + ctx.getHints()._fastIndexNullHandling, + hasEmptyInterval); if (hasEmptyInterval) { return createEmptyValueScanNode(ctx); } + auto scanParams = computeScanParams(ctx.getPrefixId(), mergedReqs, scanProjName); ABT result = make<SargableNode>(std::move(mergedReqs), - std::move(candidateIndexMap), + std::move(candidateIndexes), + std::move(scanParams), IndexReqTarget::Complete, belowNode.getChild()); applyProjectionRenames(std::move(projectionRenames), result); @@ -710,46 +712,49 @@ static void convertFilterToSargableNode(ABT::reference_type node, return; } - auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(), - scanProjName, - conversion->_reqMap, - scanDef, - ctx.getHints()._fastIndexNullHandling, - hasEmptyInterval); + auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(), + scanProjName, + conversion->_reqMap, + scanDef, + ctx.getHints()._fastIndexNullHandling, + hasEmptyInterval); if (hasEmptyInterval) { addEmptyValueScanNode(ctx); - } else { - ABT sargableNode = make<SargableNode>(std::move(conversion->_reqMap), - std::move(candidateIndexMap), - IndexReqTarget::Complete, - filterNode.getChild()); + return; + } - if (conversion->_retainPredicate) { - const GroupIdType childGroupId = - filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId(); - if (childGroupId == indexingAvailability.getScanGroupId()) { - // Add node directly. - addSargableChildNode(node, std::move(sargableNode), ctx); - } else { - // Look at the child group to find SargableNodes and attempt to combine. - const auto& logicalNodes = ctx.getMemo().getGroup(childGroupId)._logicalNodes; - for (const ABT& childNode : logicalNodes.getVector()) { - if (auto childSargableNode = childNode.cast<SargableNode>(); - childSargableNode != nullptr) { - const auto& result = mergeSargableNodes(indexingAvailability, - scanDef.getNonMultiKeyPathSet(), - *sargableNode.cast<SargableNode>(), - *childSargableNode, - ctx); - if (result) { - addSargableChildNode(node, std::move(*result), ctx); - } - } + auto scanParams = computeScanParams(ctx.getPrefixId(), conversion->_reqMap, scanProjName); + ABT sargableNode = make<SargableNode>(std::move(conversion->_reqMap), + std::move(candidateIndexes), + std::move(scanParams), + IndexReqTarget::Complete, + filterNode.getChild()); + if (!conversion->_retainPredicate) { + ctx.addNode(sargableNode, isSubstitution); + return; + } + + const GroupIdType childGroupId = + filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId(); + if (childGroupId == indexingAvailability.getScanGroupId()) { + // Add node directly. + addSargableChildNode(node, std::move(sargableNode), ctx); + } else { + // Look at the child group to find SargableNodes and attempt to combine. + const auto& logicalNodes = ctx.getMemo().getGroup(childGroupId)._logicalNodes; + for (const ABT& childNode : logicalNodes.getVector()) { + if (auto childSargableNode = childNode.cast<SargableNode>(); + childSargableNode != nullptr) { + const auto& result = mergeSargableNodes(indexingAvailability, + scanDef.getNonMultiKeyPathSet(), + *sargableNode.cast<SargableNode>(), + *childSargableNode, + ctx); + if (result) { + addSargableChildNode(node, std::move(*result), ctx); } } - } else { - ctx.addNode(sargableNode, isSubstitution); } } } @@ -895,22 +900,25 @@ struct SubstituteConvert<EvaluationNode> { } bool hasEmptyInterval = false; - auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(), - scanProjName, - conversion->_reqMap, - scanDef, - ctx.getHints()._fastIndexNullHandling, - hasEmptyInterval); + auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(), + scanProjName, + conversion->_reqMap, + scanDef, + ctx.getHints()._fastIndexNullHandling, + hasEmptyInterval); if (hasEmptyInterval) { addEmptyValueScanNode(ctx); - } else { - ABT newNode = make<SargableNode>(std::move(conversion->_reqMap), - std::move(candidateIndexMap), - IndexReqTarget::Complete, - evalNode.getChild()); - ctx.addNode(newNode, true /*substitute*/); + return; } + + auto scanParams = computeScanParams(ctx.getPrefixId(), conversion->_reqMap, scanProjName); + ABT newNode = make<SargableNode>(std::move(conversion->_reqMap), + std::move(candidateIndexes), + std::move(scanParams), + IndexReqTarget::Complete, + evalNode.getChild()); + ctx.addNode(newNode, true /*substitute*/); } }; @@ -1076,25 +1084,25 @@ struct ExploreConvert<SargableNode> { } bool hasEmptyLeftInterval = false; - auto leftCandidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(), - scanProjectionName, - leftReqs, - scanDef, - fastIndexNullHandling, - hasEmptyLeftInterval); - if (isIndex && leftCandidateIndexMap.empty()) { + auto leftCandidateIndexes = computeCandidateIndexes(ctx.getPrefixId(), + scanProjectionName, + leftReqs, + scanDef, + fastIndexNullHandling, + hasEmptyLeftInterval); + if (isIndex && leftCandidateIndexes.empty()) { // Reject rewrite. continue; } bool hasEmptyRightInterval = false; - auto rightCandidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(), - scanProjectionName, - rightReqs, - scanDef, - fastIndexNullHandling, - hasEmptyRightInterval); - if (isIndex && rightCandidateIndexMap.empty()) { + auto rightCandidateIndexes = computeCandidateIndexes(ctx.getPrefixId(), + scanProjectionName, + rightReqs, + scanDef, + fastIndexNullHandling, + hasEmptyRightInterval); + if (isIndex && rightCandidateIndexes.empty()) { // With empty candidate map, reject only if we cannot implement as Seek. continue; } @@ -1104,13 +1112,18 @@ struct ExploreConvert<SargableNode> { ABT scanDelegator = make<MemoLogicalDelegatorNode>(scanGroupId); ABT leftChild = make<SargableNode>(std::move(leftReqs), - std::move(leftCandidateIndexMap), + std::move(leftCandidateIndexes), + boost::none, IndexReqTarget::Index, scanDelegator); + + auto rightScanParams = + computeScanParams(ctx.getPrefixId(), rightReqs, scanProjectionName); ABT rightChild = rightReqs.empty() ? scanDelegator : make<SargableNode>(std::move(rightReqs), - std::move(rightCandidateIndexMap), + std::move(rightCandidateIndexes), + std::move(rightScanParams), isIndex ? IndexReqTarget::Index : IndexReqTarget::Seek, scanDelegator); @@ -1249,13 +1262,13 @@ struct ExploreReorder<EvaluationNode, RIDIntersectNode> { } }; -void LogicalRewriter::registerRewrite(const LogicalRewriteType rewriteType, RewriteFn fn) { - if (_activeRewriteSet.find(rewriteType) != _activeRewriteSet.cend()) { - _rewriteMap.emplace(rewriteType, fn); - } -} - void LogicalRewriter::initializeRewrites() { + const auto registerRewrite = [& rewriteMap = _rewriteMap](const LogicalRewriteType rewriteType, + RewriteFn fn) { + const bool inserted = rewriteMap.emplace(rewriteType, fn).second; + invariant(inserted); + }; + registerRewrite( LogicalRewriteType::FilterEvaluationReorder, &LogicalRewriter::bindAboveBelow<FilterNode, EvaluationNode, SubstituteReorder>); diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h index 97684ec992d..eb595e26bd8 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h @@ -106,7 +106,6 @@ private: template <class Type, template <class> class R> void bindSingleNode(MemoLogicalNodeId nodeMemoId); - void registerRewrite(LogicalRewriteType rewriteType, RewriteFn fn); void initializeRewrites(); static RewriteSet _explorationSet; diff --git a/src/mongo/db/query/optimizer/cascades/memo.cpp b/src/mongo/db/query/optimizer/cascades/memo.cpp index e1a0f4b0783..d827d450191 100644 --- a/src/mongo/db/query/optimizer/cascades/memo.cpp +++ b/src/mongo/db/query/optimizer/cascades/memo.cpp @@ -658,43 +658,17 @@ void Memo::estimateCE(const GroupIdType groupId, const bool useHeuristicCE) { auto ceProp = properties::CardinalityEstimate(estimate); if (auto sargablePtr = nodeRef.cast<SargableNode>(); sargablePtr != nullptr) { - auto& partialSchemaKeyCEMap = ceProp.getPartialSchemaKeyCEMap(); - invariant(partialSchemaKeyCEMap.empty()); - - struct CEEntry { - PartialSchemaKey _key; - CEType _ce; - size_t _count = 1; - }; - boost::optional<CEEntry> prevEntry; - - const auto addEntryFn = [&]() { - // We take a geometric average of the entries with the same key. - partialSchemaKeyCEMap.emplace(prevEntry->_key, - std::pow(prevEntry->_ce, 1.0 / prevEntry->_count)); - }; + auto& partialSchemaKeyCE = ceProp.getPartialSchemaKeyCE(); + invariant(partialSchemaKeyCE.empty()); for (const auto& [key, req] : sargablePtr->getReqMap()) { ABT singularReq = make<SargableNode>(PartialSchemaRequirements{{key, req}}, - CandidateIndexMap{}, + CandidateIndexes{}, + ScanParams{}, sargablePtr->getTarget(), sargablePtr->getChild()); const CEType singularEst = ceEstimator->deriveCE(*this, props, singularReq.ref()); - - if (prevEntry) { - if (prevEntry->_key == key) { - prevEntry->_ce *= singularEst; - prevEntry->_count++; - } else { - addEntryFn(); - prevEntry = {{key, singularEst}}; - }; - } else { - prevEntry = {{key, singularEst}}; - } - } - if (prevEntry) { - addEntryFn(); + partialSchemaKeyCE.emplace_back(key, singularEst); } } diff --git a/src/mongo/db/query/optimizer/explain.cpp b/src/mongo/db/query/optimizer/explain.cpp index 2d681ccbd6f..bb2973c31c3 100644 --- a/src/mongo/db/query/optimizer/explain.cpp +++ b/src/mongo/db/query/optimizer/explain.cpp @@ -559,7 +559,7 @@ public: } else if constexpr (version == ExplainVersion::V3) { printer.fieldName(name).print(flag); } else { - static_assert("Unknown version"); + MONGO_UNREACHABLE; } } @@ -602,7 +602,7 @@ public: } else if constexpr (version == ExplainVersion::V3) { printer.separator(" ").fieldName(name).print(child); } else { - static_assert("Unknown version"); + MONGO_UNREACHABLE; } } @@ -638,7 +638,7 @@ public: } printer.fieldName("fieldProjectionMap").print(local); } else { - static_assert("Unknown version"); + MONGO_UNREACHABLE; } } @@ -735,7 +735,7 @@ public: .fieldName("highBound") .print(highBoundPrinter); } else { - static_assert("Version not implemented"); + MONGO_UNREACHABLE; } } @@ -750,7 +750,7 @@ public: return intervalPrinter.print(intervalExpr); } - void printInterval(ExplainPrinter& printer, const MultiKeyIntervalRequirement& interval) { + void printInterval(ExplainPrinter& printer, const CompoundIntervalRequirement& interval) { if constexpr (version < ExplainVersion::V3) { bool first = true; for (const auto& entry : interval) { @@ -770,7 +770,7 @@ public: } printer.print(printers); } else { - static_assert("Version not implemented"); + MONGO_UNREACHABLE; } } @@ -817,7 +817,7 @@ public: printer.print(childResults); return printer; } else { - static_assert("Version not implemented"); + MONGO_UNREACHABLE; } } @@ -1005,17 +1005,48 @@ public: parent.fieldName("requirementsMap").print(printers); } + void printResidualRequirements(ExplainPrinter& parent, + const ResidualRequirements& residualReqs) { + std::vector<ExplainPrinter> printers; + for (const auto& [key, req, entryIndex] : residualReqs) { + ExplainPrinter local; + + local.fieldName("refProjection").print(key._projectionName).separator(", "); + ExplainPrinter pathPrinter = generate(key._path); + local.fieldName("path").separator("'").printSingleLevel(pathPrinter).separator("', "); + + if (req.hasBoundProjectionName()) { + local.fieldName("boundProjection") + .print(req.getBoundProjectionName()) + .separator(", "); + } + + local.fieldName("intervals"); + { + ExplainPrinter intervals = printIntervalExpr(req.getIntervals()); + local.printSingleLevel(intervals, "" /*singleLevelSpacer*/); + } + local.separator(", ").fieldName("entryIndex").print(entryIndex); + + printers.push_back(std::move(local)); + } + + parent.fieldName("residualReqs").print(printers); + } + ExplainPrinter transport(const SargableNode& node, ExplainPrinter childResult, ExplainPrinter bindResult, ExplainPrinter refsResult) { + const auto& scanParams = node.getScanParams(); + ExplainPrinter printer("Sargable"); maybePrintProps(printer, node); printer.separator(" [") .fieldName("target", ExplainVersion::V3) .print(IndexReqTargetEnum::toString[static_cast<int>(node.getTarget())]) .separator("]") - .setChildCount(5); + .setChildCount(scanParams ? 6 : 5); if constexpr (version < ExplainVersion::V3) { ExplainPrinter local; @@ -1024,119 +1055,109 @@ public: } else if constexpr (version == ExplainVersion::V3) { printPartialSchemaReqMap(printer, node.getReqMap()); } else { - static_assert("Unknown version"); + MONGO_UNREACHABLE; } - std::vector<ExplainPrinter> candidateIndexesPrinters; - size_t candidateIndex = 0; - for (const auto& [indexDefName, candidateIndexEntry] : node.getCandidateIndexMap()) { - candidateIndex++; - ExplainPrinter local; - local.fieldName("candidateId") - .print(candidateIndex) - .separator(", ") - .fieldName("indexDefName", ExplainVersion::V3) - .print(indexDefName) - .separator(", "); + { + std::vector<ExplainPrinter> candidateIndexesPrinters; + for (size_t index = 0; index < node.getCandidateIndexes().size(); index++) { + const CandidateIndexEntry& candidateIndexEntry = + node.getCandidateIndexes().at(index); - local.separator("{"); - printFieldProjectionMap(local, candidateIndexEntry._fieldProjectionMap); - local.separator("}, {"); + ExplainPrinter local; + local.fieldName("candidateId") + .print(index + 1) + .separator(", ") + .fieldName("indexDefName", ExplainVersion::V3) + .print(candidateIndexEntry._indexDefName) + .separator(", "); - { - std::set<size_t> orderedFields; - for (const size_t fieldId : candidateIndexEntry._fieldsToCollate) { - orderedFields.insert(fieldId); - } + local.separator("{"); + printFieldProjectionMap(local, candidateIndexEntry._fieldProjectionMap); + local.separator("}, {"); - if constexpr (version < ExplainVersion::V3) { - bool first = true; - for (const size_t fieldId : orderedFields) { - if (first) { - first = false; - } else { - local.print(", "); + { + std::set<size_t> orderedFields; + for (const size_t fieldId : candidateIndexEntry._fieldsToCollate) { + orderedFields.insert(fieldId); + } + + if constexpr (version < ExplainVersion::V3) { + bool first = true; + for (const size_t fieldId : orderedFields) { + if (first) { + first = false; + } else { + local.print(", "); + } + local.print(fieldId); + } + } else if constexpr (version == ExplainVersion::V3) { + std::vector<ExplainPrinter> printers; + for (const size_t fieldId : orderedFields) { + ExplainPrinter local1; + local1.print(fieldId); + printers.push_back(std::move(local1)); } - local.print(fieldId); + local.fieldName("fieldsToCollate").print(printers); + } else { + MONGO_UNREACHABLE; } - } else if constexpr (version == ExplainVersion::V3) { - std::vector<ExplainPrinter> printers; - for (const size_t fieldId : orderedFields) { - ExplainPrinter local1; - local1.print(fieldId); - printers.push_back(std::move(local1)); + } + + local.separator("}, ").fieldName("intervals", ExplainVersion::V3); + { + IntervalPrinter<CompoundIntervalReqExpr> intervalPrinter(*this); + ExplainPrinter intervals = + intervalPrinter.print(candidateIndexEntry._intervals); + local.printSingleLevel(intervals, "" /*singleLevelSpacer*/); + } + + if (const auto& residualReqs = candidateIndexEntry._residualRequirements; + !residualReqs.empty()) { + if constexpr (version < ExplainVersion::V3) { + ExplainPrinter residualReqMapPrinter; + printResidualRequirements(residualReqMapPrinter, residualReqs); + local.print(residualReqMapPrinter); + } else if (version == ExplainVersion::V3) { + printResidualRequirements(local, residualReqs); + } else { + MONGO_UNREACHABLE; } - local.fieldName("fieldsToCollate").print(printers); - } else { - static_assert("Unknown version"); } - } - local.separator("}, ").fieldName("intervals", ExplainVersion::V3); - { - IntervalPrinter<MultiKeyIntervalReqExpr> intervalPrinter(*this); - ExplainPrinter intervals = intervalPrinter.print(candidateIndexEntry._intervals); - local.printSingleLevel(intervals, "" /*singleLevelSpacer*/); + candidateIndexesPrinters.push_back(std::move(local)); } + ExplainPrinter candidateIndexesPrinter; + candidateIndexesPrinter.fieldName("candidateIndexes").print(candidateIndexesPrinters); + printer.printAppend(candidateIndexesPrinter); + } - if (!candidateIndexEntry._residualRequirements.empty()) { + if (scanParams) { + ExplainPrinter local; + local.separator("{"); + printFieldProjectionMap(local, scanParams->_fieldProjectionMap); + local.separator("}"); + + if (const auto& residualReqs = scanParams->_residualRequirements; + !residualReqs.empty()) { if constexpr (version < ExplainVersion::V3) { ExplainPrinter residualReqMapPrinter; - printPartialSchemaReqMap(residualReqMapPrinter, - candidateIndexEntry._residualRequirements); + printResidualRequirements(residualReqMapPrinter, residualReqs); local.print(residualReqMapPrinter); } else if (version == ExplainVersion::V3) { - printPartialSchemaReqMap(local, candidateIndexEntry._residualRequirements); + printResidualRequirements(local, residualReqs); } else { - static_assert("Unknown version"); - } - } - - if (!candidateIndexEntry._residualKeyMap.empty()) { - std::vector<ExplainPrinter> residualKeyMapPrinters; - for (const auto& [queryKey, residualKey] : candidateIndexEntry._residualKeyMap) { - ExplainPrinter local1; - - ExplainPrinter pathPrinter = generate(queryKey._path); - local1.fieldName("queryRefProjection") - .print(queryKey._projectionName) - .separator(", ") - .fieldName("queryPath") - .separator("'") - .printSingleLevel(pathPrinter) - .separator("', ") - .fieldName("residualRefProjection") - .print(residualKey._projectionName) - .separator(", "); - - ExplainPrinter pathPrinter1 = generate(residualKey._path); - local1.fieldName("residualPath") - .separator("'") - .printSingleLevel(pathPrinter1) - .separator("'"); - - residualKeyMapPrinters.push_back(std::move(local1)); + MONGO_UNREACHABLE; } - - local.fieldName("residualKeyMap").print(residualKeyMapPrinters); - - std::vector<ExplainPrinter> projNamePrinters; - for (const ProjectionName& projName : - candidateIndexEntry._residualRequirementsTempProjections) { - ExplainPrinter local1; - local1.print(projName); - projNamePrinters.push_back(std::move(local1)); - } - local.fieldName("tempProjections").print(projNamePrinters); } - candidateIndexesPrinters.push_back(std::move(local)); + ExplainPrinter scanParamsPrinter; + scanParamsPrinter.fieldName("scanParams").print(local); + printer.printAppend(scanParamsPrinter); } - ExplainPrinter candidateIndexesPrinter; - candidateIndexesPrinter.fieldName("candidateIndexes").print(candidateIndexesPrinters); - printer.printAppend(candidateIndexesPrinter) - .fieldName("bindings", ExplainVersion::V3) + printer.fieldName("bindings", ExplainVersion::V3) .print(bindResult) .fieldName("references", ExplainVersion::V3) .print(refsResult) @@ -1206,7 +1227,7 @@ public: } printer.fieldName("correlatedProjections").print(printers); } else { - static_assert("Version not implemented"); + MONGO_UNREACHABLE; } printer.separator("]") @@ -1243,7 +1264,7 @@ public: } printer.print(printers); } else { - static_assert("Version not implemented"); + MONGO_UNREACHABLE; } } @@ -1301,7 +1322,7 @@ public: } collationPrinter.print(printers); } else { - static_assert("Version not implemented"); + MONGO_UNREACHABLE; } printer.setChildCount(4) @@ -1583,9 +1604,10 @@ public: cePrinter.fieldName("ce").print(prop.getEstimate()); fieldPrinters.push_back(std::move(cePrinter)); - if (!prop.getPartialSchemaKeyCEMap().empty()) { + if (const auto& partialSchemaKeyCE = prop.getPartialSchemaKeyCE(); + !partialSchemaKeyCE.empty()) { std::vector<ExplainPrinter> reqPrinters; - for (const auto& [key, ce] : prop.getPartialSchemaKeyCEMap()) { + for (const auto& [key, ce] : partialSchemaKeyCE) { ExplainGeneratorTransporter<version> gen; ExplainPrinter pathPrinter = gen.generate(key._path); @@ -2027,7 +2049,7 @@ public: } printer.fieldName("projections").print(printers); } else { - static_assert("Unknown version"); + MONGO_UNREACHABLE; } } @@ -2072,7 +2094,7 @@ public: } else if constexpr (version == ExplainVersion::V3) { printer.fieldName("maxDepth", ExplainVersion::V3).print(path.getMaxDepth()); } else { - static_assert("Unknown version"); + MONGO_UNREACHABLE; } printer.separator("]") diff --git a/src/mongo/db/query/optimizer/index_bounds.cpp b/src/mongo/db/query/optimizer/index_bounds.cpp index 58209ff8e75..908595bc5de 100644 --- a/src/mongo/db/query/optimizer/index_bounds.cpp +++ b/src/mongo/db/query/optimizer/index_bounds.cpp @@ -117,10 +117,6 @@ bool PartialSchemaKey::operator==(const PartialSchemaKey& other) const { return _projectionName == other._projectionName && _path == other._path; } -bool PartialSchemaKey::emptyPath() const { - return _path.is<PathIdentity>(); -} - bool isIntervalReqFullyOpenDNF(const IntervalReqExpr::Node& n) { if (auto singular = IntervalReqExpr::getSingularDNF(n); singular && singular->isFullyOpen()) { return true; @@ -218,19 +214,42 @@ bool PartialSchemaKeyLessComparator::operator()(const PartialSchemaKey& k1, ResidualRequirement::ResidualRequirement(PartialSchemaKey key, PartialSchemaRequirement req, - CEType ce) + size_t entryIndex) + : _key(std::move(key)), _req(std::move(req)), _entryIndex(entryIndex) {} + +bool ResidualRequirement::operator==(const ResidualRequirement& other) const { + return _key == other._key && _req == other._req && _entryIndex == other._entryIndex; +} + +ResidualRequirementWithCE::ResidualRequirementWithCE(PartialSchemaKey key, + PartialSchemaRequirement req, + CEType ce) : _key(std::move(key)), _req(std::move(req)), _ce(ce) {} +CandidateIndexEntry::CandidateIndexEntry(std::string indexDefName) + : _indexDefName(std::move(indexDefName)), + _fieldProjectionMap(), + _intervals(CompoundIntervalReqExpr::makeSingularDNF()), + _residualRequirements(), + _fieldsToCollate(), + _intervalPrefixSize(0) {} + bool CandidateIndexEntry::operator==(const CandidateIndexEntry& other) const { - return _fieldProjectionMap == other._fieldProjectionMap && _intervals == other._intervals && + return _indexDefName == other._indexDefName && + _fieldProjectionMap == other._fieldProjectionMap && _intervals == other._intervals && _residualRequirements == other._residualRequirements && _fieldsToCollate == other._fieldsToCollate && _intervalPrefixSize == other._intervalPrefixSize; } +bool ScanParams::operator==(const ScanParams& other) const { + return _fieldProjectionMap == other._fieldProjectionMap && + _residualRequirements == other._residualRequirements; +} + IndexSpecification::IndexSpecification(std::string scanDefName, std::string indexDefName, - MultiKeyIntervalRequirement interval, + CompoundIntervalRequirement interval, bool reverseOrder) : _scanDefName(std::move(scanDefName)), _indexDefName(std::move(indexDefName)), @@ -250,7 +269,7 @@ const std::string& IndexSpecification::getIndexDefName() const { return _indexDefName; } -const MultiKeyIntervalRequirement& IndexSpecification::getInterval() const { +const CompoundIntervalRequirement& IndexSpecification::getInterval() const { return _interval; } diff --git a/src/mongo/db/query/optimizer/index_bounds.h b/src/mongo/db/query/optimizer/index_bounds.h index 79bffce2e1d..547a278609f 100644 --- a/src/mongo/db/query/optimizer/index_bounds.h +++ b/src/mongo/db/query/optimizer/index_bounds.h @@ -85,8 +85,6 @@ struct PartialSchemaKey { bool operator==(const PartialSchemaKey& other) const; - bool emptyPath() const; - // Referred, or input projection name. ProjectionName _projectionName; @@ -141,64 +139,83 @@ using PartialSchemaRequirements = std::multimap<PartialSchemaKey, PartialSchemaRequirement, PartialSchemaKeyLessComparator>; /** - * Used to track cardinality estimates per predicate inside a PartialSchemaRequirement. - * We currently have a single estimate per PartialSchemaKey for all matching entries in the primary - * map. - * TODO: SERVER-68092 Relax constraint described above. + * Used to track cardinality estimates per predicate inside a PartialSchemaRequirement. The order of + * estimates is the same as the order in the primary PartialSchemaRequirements map. */ -using PartialSchemaKeyCE = std::map<PartialSchemaKey, CEType, PartialSchemaKeyLessComparator>; -using ResidualKeyMap = std::map<PartialSchemaKey, PartialSchemaKey, PartialSchemaKeyLessComparator>; +using PartialSchemaKeyCE = std::vector<std::pair<PartialSchemaKey, CEType>>; using PartialSchemaKeySet = std::set<PartialSchemaKey, PartialSchemaKeyLessComparator>; // Requirements which are not satisfied directly by an IndexScan, PhysicalScan or Seek (e.g. using -// an index field, or scan field). They are intended to be sorted in their containing vector from -// most to least selective. +// an index field, or scan field). The index refers to the underlying entry in the +// PartialSchemaRequirement map. struct ResidualRequirement { - ResidualRequirement(PartialSchemaKey key, PartialSchemaRequirement req, CEType ce); + ResidualRequirement(PartialSchemaKey key, PartialSchemaRequirement req, size_t entryIndex); + + bool operator==(const ResidualRequirement& other) const; PartialSchemaKey _key; PartialSchemaRequirement _req; - CEType _ce; + size_t _entryIndex; }; using ResidualRequirements = std::vector<ResidualRequirement>; +struct ResidualRequirementWithCE { + ResidualRequirementWithCE(PartialSchemaKey key, PartialSchemaRequirement req, CEType ce); + + PartialSchemaKey _key; + PartialSchemaRequirement _req; + CEType _ce; +}; +using ResidualRequirementsWithCE = std::vector<ResidualRequirementWithCE>; + + // A sequence of intervals corresponding, one for each index key. -using MultiKeyIntervalRequirement = std::vector<IntervalRequirement>; +using CompoundIntervalRequirement = std::vector<IntervalRequirement>; -// Multi-key intervals represent unions and conjunctions of individual multi-key intervals. -using MultiKeyIntervalReqExpr = BoolExpr<MultiKeyIntervalRequirement>; +// Unions and conjunctions of individual compound intervals. +using CompoundIntervalReqExpr = BoolExpr<CompoundIntervalRequirement>; // Used to pre-compute candidate indexes for SargableNodes. struct CandidateIndexEntry { + CandidateIndexEntry(std::string indexDefName); + bool operator==(const CandidateIndexEntry& other) const; - FieldProjectionMap _fieldProjectionMap; - MultiKeyIntervalReqExpr::Node _intervals; + std::string _indexDefName; - PartialSchemaRequirements _residualRequirements; - // Projections needed to evaluate residual requirements. - ProjectionNameSet _residualRequirementsTempProjections; + FieldProjectionMap _fieldProjectionMap; + CompoundIntervalReqExpr::Node _intervals; - // Used for CE. Mapping for residual requirement key to query key. - ResidualKeyMap _residualKeyMap; + // Requirements which are not satisfied directly by the IndexScan. They are intended to be + // sorted in their containing vector from most to least selective. + ResidualRequirements _residualRequirements; // We have equalities on those index fields, and thus do not consider for collation // requirements. - // TODO: consider a bitset. opt::unordered_set<size_t> _fieldsToCollate; // Length of prefix of fields with applied intervals. size_t _intervalPrefixSize; }; -using CandidateIndexMap = std::map<std::string /*index name*/, CandidateIndexEntry>; +struct ScanParams { + bool operator==(const ScanParams& other) const; + + FieldProjectionMap _fieldProjectionMap; + + // Requirements which are not satisfied directly by a PhysicalScan or Seek. They are intended to + // be sorted in their containing vector from most to least selective. + ResidualRequirements _residualRequirements; +}; + +using CandidateIndexes = std::vector<CandidateIndexEntry>; class IndexSpecification { public: IndexSpecification(std::string scanDefName, std::string indexDefName, - MultiKeyIntervalRequirement interval, + CompoundIntervalRequirement interval, bool reverseOrder); bool operator==(const IndexSpecification& other) const; @@ -206,7 +223,7 @@ public: const std::string& getScanDefName() const; const std::string& getIndexDefName() const; - const MultiKeyIntervalRequirement& getInterval() const; + const CompoundIntervalRequirement& getInterval() const; bool isReverseOrder() const; @@ -218,7 +235,7 @@ private: const std::string _indexDefName; // The index interval. - MultiKeyIntervalRequirement _interval; + CompoundIntervalRequirement _interval; // Do we reverse the index order. const bool _reverseOrder; diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp index 62b5467c2fe..d5664d3cc2b 100644 --- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp @@ -1120,10 +1120,15 @@ TEST(LogicalRewriter, FilterUnionUnionPushdown) { "| | [ptest]\n" "| | Source []\n" "| Sargable [Complete]\n" - "| | | | | requirementsMap: \n" - "| | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] " + "| | | | | | requirementsMap: \n" + "| | | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] " "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n" - "| | | | candidateIndexes: \n" + "| | | | | candidateIndexes: \n" + "| | | | scanParams: \n" + "| | | | {'a': evalTemp_0}\n" + "| | | | residualReqs: \n" + "| | | | refProjection: evalTemp_0, path: 'PathTraverse [1] " + "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n" "| | | BindBlock:\n" "| | RefBlock: \n" "| | Variable [ptest]\n" @@ -1136,10 +1141,15 @@ TEST(LogicalRewriter, FilterUnionUnionPushdown) { "| | [ptest]\n" "| | Source []\n" "| Sargable [Complete]\n" - "| | | | | requirementsMap: \n" - "| | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] " + "| | | | | | requirementsMap: \n" + "| | | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] " "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n" - "| | | | candidateIndexes: \n" + "| | | | | candidateIndexes: \n" + "| | | | scanParams: \n" + "| | | | {'a': evalTemp_2}\n" + "| | | | residualReqs: \n" + "| | | | refProjection: evalTemp_2, path: 'PathTraverse [1] " + "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n" "| | | BindBlock:\n" "| | RefBlock: \n" "| | Variable [ptest]\n" @@ -1148,11 +1158,15 @@ TEST(LogicalRewriter, FilterUnionUnionPushdown) { "| [ptest]\n" "| Source []\n" "Sargable [Complete]\n" - "| | | | requirementsMap: \n" - "| | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] " - "PathIdentity " - "[]', intervals: {{{[Const [1], Const [1]]}}}\n" - "| | | candidateIndexes: \n" + "| | | | | requirementsMap: \n" + "| | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] " + "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n" + "| | | | candidateIndexes: \n" + "| | | scanParams: \n" + "| | | {'a': evalTemp_1}\n" + "| | | residualReqs: \n" + "| | | refProjection: evalTemp_1, path: 'PathTraverse [1] PathIdentity " + "[]', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n" "| | BindBlock:\n" "| RefBlock: \n" "| Variable [ptest]\n" @@ -1254,10 +1268,12 @@ TEST(LogicalRewriter, UnionPreservesCommonLogicalProps) { " | logicalNodes: \n" " | logicalNodeId: 0\n" " | Sargable [Complete]\n" - " | | | | | requirementsMap: \n" - " | | | | | refProjection: ptest1, path: 'PathGet [a] " + " | | | | | | requirementsMap: \n" + " | | | | | | refProjection: ptest1, path: 'PathGet [a] " "PathIdentity []', boundProjection: a, intervals: {{{[Const [minKey], Const [maxKey]]}}}\n" - " | | | | candidateIndexes: \n" + " | | | | | candidateIndexes: \n" + " | | | | scanParams: \n" + " | | | | {'a': a}\n" " | | | BindBlock:\n" " | | | [a]\n" " | | | Source []\n" @@ -1314,10 +1330,12 @@ TEST(LogicalRewriter, UnionPreservesCommonLogicalProps) { " | logicalNodes: \n" " | logicalNodeId: 0\n" " | Sargable [Complete]\n" - " | | | | | requirementsMap: \n" - " | | | | | refProjection: ptest2, path: 'PathGet [a] " + " | | | | | | requirementsMap: \n" + " | | | | | | refProjection: ptest2, path: 'PathGet [a] " "PathIdentity []', boundProjection: a, intervals: {{{[Const [minKey], Const [maxKey]]}}}\n" - " | | | | candidateIndexes: \n" + " | | | | | candidateIndexes: \n" + " | | | | scanParams: \n" + " | | | | {'a': a}\n" " | | | BindBlock:\n" " | | | [a]\n" " | | | Source []\n" @@ -1456,12 +1474,19 @@ TEST(LogicalRewriter, SargableCE) { " | logicalNodes: \n" " | logicalNodeId: 0\n" " | Sargable [Complete]\n" - " | | | | | requirementsMap: \n" - " | | | | | refProjection: ptest, path: 'PathGet [a] PathIdentity " - "[]', intervals: {{{[Const [1], Const [1]]}}}\n" - " | | | | | refProjection: ptest, path: 'PathGet [b] PathIdentity " - "[]', intervals: {{{[Const [2], Const [2]]}}}\n" - " | | | | candidateIndexes: \n" + " | | | | | | requirementsMap: \n" + " | | | | | | refProjection: ptest, path: 'PathGet [a] " + "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n" + " | | | | | | refProjection: ptest, path: 'PathGet [b] " + "PathIdentity []', intervals: {{{[Const [2], Const [2]]}}}\n" + " | | | | | candidateIndexes: \n" + " | | | | scanParams: \n" + " | | | | {'a': evalTemp_2, 'b': evalTemp_3}\n" + " | | | | residualReqs: \n" + " | | | | refProjection: evalTemp_2, path: 'PathIdentity " + "[]', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n" + " | | | | refProjection: evalTemp_3, path: 'PathIdentity " + "[]', intervals: {{{[Const [2], Const [2]]}}}, entryIndex: 1\n" " | | | BindBlock:\n" " | | RefBlock: \n" " | | Variable [ptest]\n" diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp index b3d656930c3..3f0cc0d5203 100644 --- a/src/mongo/db/query/optimizer/node.cpp +++ b/src/mongo/db/query/optimizer/node.cpp @@ -327,14 +327,16 @@ static ProjectionNameVector createSargableReferences(const PartialSchemaRequirem } SargableNode::SargableNode(PartialSchemaRequirements reqMap, - CandidateIndexMap candidateIndexMap, + CandidateIndexes candidateIndexes, + boost::optional<ScanParams> scanParams, const IndexReqTarget target, ABT child) : Base(std::move(child), buildSimpleBinder(createSargableBindings(reqMap)), make<References>(createSargableReferences(reqMap))), _reqMap(std::move(reqMap)), - _candidateIndexMap(std::move(candidateIndexMap)), + _candidateIndexes(std::move(candidateIndexes)), + _scanParams(std::move(scanParams)), _target(target) { assertNodeSort(getChild()); uassert(6624085, "Empty requirements map", !_reqMap.empty()); @@ -360,16 +362,21 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap, } bool SargableNode::operator==(const SargableNode& other) const { - return _reqMap == other._reqMap && _candidateIndexMap == other._candidateIndexMap && - _target == other._target && getChild() == other.getChild(); + // Specifically not comparing the candidate indexes and ScanParams. Those are derivative of the + // requirements, and can have temp projection names. + return _reqMap == other._reqMap && _target == other._target && getChild() == other.getChild(); } const PartialSchemaRequirements& SargableNode::getReqMap() const { return _reqMap; } -const CandidateIndexMap& SargableNode::getCandidateIndexMap() const { - return _candidateIndexMap; +const CandidateIndexes& SargableNode::getCandidateIndexes() const { + return _candidateIndexes; +} + +const boost::optional<ScanParams>& SargableNode::getScanParams() const { + return _scanParams; } IndexReqTarget SargableNode::getTarget() const { diff --git a/src/mongo/db/query/optimizer/node.h b/src/mongo/db/query/optimizer/node.h index 9a30db0f4e5..ded879d932a 100644 --- a/src/mongo/db/query/optimizer/node.h +++ b/src/mongo/db/query/optimizer/node.h @@ -427,7 +427,8 @@ class SargableNode final : public Operator<SargableNode, 3>, public Node, public public: SargableNode(PartialSchemaRequirements reqMap, - CandidateIndexMap candidateIndexMap, + CandidateIndexes candidateIndexes, + boost::optional<ScanParams> scanParams, IndexReqTarget target, ABT child); @@ -447,14 +448,16 @@ public: } const PartialSchemaRequirements& getReqMap() const; - const CandidateIndexMap& getCandidateIndexMap() const; + const CandidateIndexes& getCandidateIndexes() const; + const boost::optional<ScanParams>& getScanParams() const; IndexReqTarget getTarget() const; private: const PartialSchemaRequirements _reqMap; - CandidateIndexMap _candidateIndexMap; + CandidateIndexes _candidateIndexes; + boost::optional<ScanParams> _scanParams; // Performance optimization to limit number of groups. // Under what indexing requirements can this node be implemented. diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp index 4d6d91c912d..ac07379ea77 100644 --- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp @@ -498,13 +498,13 @@ TEST(PhysRewriter, DuplicateFilter) { "| Variable [root]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_0]\n" + "| | Variable [evalTemp_2]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [0]\n" - "PhysicalScan [{'<root>': root, 'a': evalTemp_0}, c1]\n" + "PhysicalScan [{'<root>': root, 'a': evalTemp_2}, c1]\n" " BindBlock:\n" - " [evalTemp_0]\n" + " [evalTemp_2]\n" " Source []\n" " [root]\n" " Source []\n", @@ -564,13 +564,13 @@ TEST(PhysRewriter, FilterCollation) { "| Variable [pb]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_0]\n" + "| | Variable [evalTemp_1]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [1]\n" - "PhysicalScan [{'a': evalTemp_0, 'b': pb}, c1]\n" + "PhysicalScan [{'a': evalTemp_1, 'b': pb}, c1]\n" " BindBlock:\n" - " [evalTemp_0]\n" + " [evalTemp_1]\n" " Source []\n" " [pb]\n" " Source []\n", @@ -1255,7 +1255,7 @@ TEST(PhysRewriter, FilterIndexing4) { phaseManager.getHints()._disableHashJoinRIDIntersect = true; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_BETWEEN(30, 50, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(60, 75, phaseManager.getMemo().getStats()._physPlanExplorationCount); ASSERT_EXPLAIN_V2( "Root []\n" @@ -1265,29 +1265,29 @@ TEST(PhysRewriter, FilterIndexing4) { "| Variable [pa]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_8]\n" + "| | Variable [evalTemp_14]\n" "| PathCompare [Gt]\n" "| Const [1]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_7]\n" + "| | Variable [evalTemp_13]\n" "| PathCompare [Gt]\n" "| Const [1]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_6]\n" + "| | Variable [evalTemp_12]\n" "| PathCompare [Gt]\n" "| Const [1]\n" - "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_6, '<indexKey> 2': evalTemp_7, " - "'<indexKey> 3': evalTemp_8}, scanDefName: c1, indexDefName: index1, interval: {(Const " + "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_12, '<indexKey> 2': evalTemp_13, " + "'<indexKey> 3': evalTemp_14}, scanDefName: c1, indexDefName: index1, interval: {(Const " "[1], Const [maxKey]], [Const [minKey], Const [maxKey]], [Const [minKey], Const [maxKey]], " "[Const [minKey], Const [maxKey]]}]\n" " BindBlock:\n" - " [evalTemp_6]\n" + " [evalTemp_12]\n" " Source []\n" - " [evalTemp_7]\n" + " [evalTemp_13]\n" " Source []\n" - " [evalTemp_8]\n" + " [evalTemp_14]\n" " Source []\n" " [pa]\n" " Source []\n", @@ -1367,12 +1367,19 @@ TEST(PhysRewriter, FilterIndexing5) { "| | Variable [pb]\n" "| PathCompare [Gt]\n" "| Const [0]\n" - "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': pb}, scanDefName: c1, indexDefName: " - "index1, interval: {(Const [0], Const [maxKey]], [Const [minKey], Const [maxKey]]}]\n" + "Evaluation []\n" + "| BindBlock:\n" + "| [pb]\n" + "| EvalPath []\n" + "| | Variable [evalTemp_0]\n" + "| PathIdentity []\n" + "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_0}, scanDefName: c1, " + "indexDefName: index1, interval: {(Const [0], Const [maxKey]], [Const [minKey], Const " + "[maxKey]]}]\n" " BindBlock:\n" - " [pa]\n" + " [evalTemp_0]\n" " Source []\n" - " [pb]\n" + " [pa]\n" " Source []\n", optimized); } @@ -1506,7 +1513,7 @@ TEST(PhysRewriter, FilterIndexingStress) { // Without the changes to restrict SargableNode split to which this test is tied, we would // be exploring 2^kFilterCount plans, one for each created group. - ASSERT_EQ(55, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(60, 80, phaseManager.getMemo().getStats()._physPlanExplorationCount); const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized); ASSERT_BSON_PATH("\"Filter\"", explainRoot, "child.nodeType"); @@ -1690,7 +1697,7 @@ TEST(PhysRewriter, FilterIndexingMaxKey) { "| Variable [root]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_1]\n" + "| | Variable [evalTemp_3]\n" "| PathTraverse [1]\n" "| PathComposeM []\n" "| | PathCompare [Lt]\n" @@ -1699,15 +1706,15 @@ TEST(PhysRewriter, FilterIndexingMaxKey) { "| Const [2]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_0]\n" + "| | Variable [evalTemp_2]\n" "| PathTraverse [1]\n" "| PathCompare [Gt]\n" "| Const [1]\n" - "PhysicalScan [{'<root>': root, 'a': evalTemp_0, 'b': evalTemp_1}, c1]\n" + "PhysicalScan [{'<root>': root, 'a': evalTemp_2, 'b': evalTemp_3}, c1]\n" " BindBlock:\n" - " [evalTemp_0]\n" + " [evalTemp_2]\n" " Source []\n" - " [evalTemp_1]\n" + " [evalTemp_3]\n" " Source []\n" " [root]\n" " Source []\n", @@ -1767,47 +1774,46 @@ TEST(PhysRewriter, FilterReorder) { "| Variable [root]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_0]\n" + "| | Variable [evalTemp_14]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [0]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_1]\n" + "| | Variable [evalTemp_15]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [1]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_2]\n" + "| | Variable [evalTemp_16]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [2]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_3]\n" + "| | Variable [evalTemp_17]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [3]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_4]\n" + "| | Variable [evalTemp_18]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [4]\n" - "PhysicalScan [{'<root>': root, 'field_0': evalTemp_0, 'field_1': evalTemp_1, " - "'field_2': " - "evalTemp_2, 'field_3': evalTemp_3, 'field_4': evalTemp_4}, c1]\n" + "PhysicalScan [{'<root>': root, 'field_0': evalTemp_14, 'field_1': evalTemp_15, 'field_2': " + "evalTemp_16, 'field_3': evalTemp_17, 'field_4': evalTemp_18}, c1]\n" " BindBlock:\n" - " [evalTemp_0]\n" + " [evalTemp_14]\n" " Source []\n" - " [evalTemp_1]\n" + " [evalTemp_15]\n" " Source []\n" - " [evalTemp_2]\n" + " [evalTemp_16]\n" " Source []\n" - " [evalTemp_3]\n" + " [evalTemp_17]\n" " Source []\n" - " [evalTemp_4]\n" + " [evalTemp_18]\n" " Source []\n" " [root]\n" " Source []\n", @@ -2397,7 +2403,7 @@ TEST(PhysRewriter, CompoundIndex1) { ABT optimized = rootNode; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_BETWEEN(25, 35, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(35, 50, phaseManager.getMemo().getStats()._physPlanExplorationCount); const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized); ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType"); @@ -2485,7 +2491,7 @@ TEST(PhysRewriter, CompoundIndex2) { ABT optimized = rootNode; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_BETWEEN(40, 60, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(60, 80, phaseManager.getMemo().getStats()._physPlanExplorationCount); const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized); ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType"); @@ -2569,7 +2575,7 @@ TEST(PhysRewriter, CompoundIndex3) { ABT optimized = rootNode; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_BETWEEN(40, 60, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(50, 70, phaseManager.getMemo().getStats()._physPlanExplorationCount); // Demonstrate we have a merge join because we have point predicates. const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized); @@ -2672,7 +2678,7 @@ TEST(PhysRewriter, CompoundIndex4Negative) { ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType"); ASSERT_BSON_PATH("\"Filter\"", explainRoot, "child.rightChild.nodeType"); ASSERT_BSON_PATH("2", explainRoot, "child.rightChild.filter.path.value.value"); - ASSERT_BSON_PATH("\"Seek\"", explainRoot, "child.rightChild.child.child.child.nodeType"); + ASSERT_BSON_PATH("\"Seek\"", explainRoot, "child.rightChild.child.child.nodeType"); ASSERT_BSON_PATH("\"IndexScan\"", explainRoot, "child.leftChild.nodeType"); ASSERT_BSON_PATH("1", explainRoot, "child.leftChild.interval.0.lowBound.bound.value"); @@ -2725,7 +2731,7 @@ TEST(PhysRewriter, IndexBoundsIntersect) { ABT optimized = rootNode; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_BETWEEN(10, 20, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(20, 30, phaseManager.getMemo().getStats()._physPlanExplorationCount); // Demonstrate that the predicates >70 and <90 are NOT combined into the same interval (70, 90) // since the paths are multiKey. With the heuristic estimate we may get either interval in the @@ -3079,15 +3085,15 @@ TEST(PhysRewriter, IndexResidualReq) { "| Index, dedupRID\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_1]\n" + "| | Variable [evalTemp_2]\n" "| PathGet [c]\n" "| PathCompare [Gt]\n" "| Const [0]\n" - "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_1}, scanDefName: c1, " + "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_2}, scanDefName: c1, " "indexDefName: index1, interval: {(Const [0], Const [maxKey]], [Const [minKey], Const " "[maxKey]]}]\n" " BindBlock:\n" - " [evalTemp_1]\n" + " [evalTemp_2]\n" " Source []\n" " [pa]\n" " Source []\n", @@ -3154,7 +3160,7 @@ TEST(PhysRewriter, IndexResidualReq1) { ABT optimized = rootNode; phaseManager.getHints()._fastIndexNullHandling = true; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_BETWEEN(25, 45, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(50, 75, phaseManager.getMemo().getStats()._physPlanExplorationCount); // Prefer index1 over index2 and index3 in order to cover all fields. ASSERT_EXPLAIN_V2( @@ -3225,7 +3231,7 @@ TEST(PhysRewriter, IndexResidualReq2) { ABT optimized = rootNode; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_EQ(7, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(7, 10, phaseManager.getMemo().getStats()._physPlanExplorationCount); // We can cover "b" with the index and filter before we Seek. ASSERT_EXPLAIN_V2( @@ -3251,14 +3257,14 @@ TEST(PhysRewriter, IndexResidualReq2) { "| rid_0\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_4]\n" + "| | Variable [evalTemp_10]\n" "| PathCompare [Eq]\n" "| Const [0]\n" - "IndexScan [{'<indexKey> 2': evalTemp_4, '<rid>': rid_0}, scanDefName: c1, indexDefName: " + "IndexScan [{'<indexKey> 2': evalTemp_10, '<rid>': rid_0}, scanDefName: c1, indexDefName: " "index1, interval: {[Const [0], Const [0]], [Const [minKey], Const [maxKey]], [Const " "[minKey], Const [maxKey]]}]\n" " BindBlock:\n" - " [evalTemp_4]\n" + " [evalTemp_10]\n" " Source []\n" " [rid_0]\n" " Source []\n", @@ -3548,7 +3554,7 @@ TEST(PhysRewriter, ArrayConstantIndex) { ABT optimized = rootNode; ASSERT_TRUE(phaseManager.optimize(optimized)); - ASSERT_BETWEEN(8, 12, phaseManager.getMemo().getStats()._physPlanExplorationCount); + ASSERT_BETWEEN(10, 15, phaseManager.getMemo().getStats()._physPlanExplorationCount); // Demonstrate we get index bounds to handle the array constant, while we also retain the // original filter. We have index bound with the array itself unioned with bound using the first @@ -4386,7 +4392,7 @@ TEST(PhysRewriter, PartialIndex1) { "| | Const [true]\n" "| Filter []\n" "| | EvalFilter []\n" - "| | | Variable [evalTemp_2]\n" + "| | | Variable [evalTemp_4]\n" "| | PathTraverse [1]\n" "| | PathCompare [Eq]\n" "| | Const [2]\n" @@ -4394,9 +4400,9 @@ TEST(PhysRewriter, PartialIndex1) { "| | limitSkip:\n" "| | limit: 1\n" "| | skip: 0\n" - "| Seek [ridProjection: rid_0, {'<root>': root, 'b': evalTemp_2}, c1]\n" + "| Seek [ridProjection: rid_0, {'<root>': root, 'b': evalTemp_4}, c1]\n" "| | BindBlock:\n" - "| | [evalTemp_2]\n" + "| | [evalTemp_4]\n" "| | Source []\n" "| | [root]\n" "| | Source []\n" @@ -4543,21 +4549,21 @@ TEST(PhysRewriter, PartialIndexReject) { "| Variable [root]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_1]\n" + "| | Variable [evalTemp_3]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [2]\n" "Filter []\n" "| EvalFilter []\n" - "| | Variable [evalTemp_0]\n" + "| | Variable [evalTemp_2]\n" "| PathTraverse [1]\n" "| PathCompare [Eq]\n" "| Const [3]\n" - "PhysicalScan [{'<root>': root, 'a': evalTemp_0, 'b': evalTemp_1}, c1]\n" + "PhysicalScan [{'<root>': root, 'a': evalTemp_2, 'b': evalTemp_3}, c1]\n" " BindBlock:\n" - " [evalTemp_0]\n" + " [evalTemp_2]\n" " Source []\n" - " [evalTemp_1]\n" + " [evalTemp_3]\n" " Source []\n" " [root]\n" " Source []\n", @@ -4836,5 +4842,143 @@ TEST(PhysRewriter, JoinRewrite1) { optimized); } +TEST(PhysRewriter, RootInterval) { + using namespace properties; + + ABT scanNode = make<ScanNode>("root", "c1"); + + // We have a predicate applied directly over the root projection without field extraction. + ABT filterNode = + make<FilterNode>(make<EvalFilter>(make<PathCompare>(Operations::Eq, Constant::int64(1)), + make<Variable>("root")), + std::move(scanNode)); + + ABT rootNode = + make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterNode)); + + PrefixId prefixId; + OptPhaseManager phaseManager( + {OptPhaseManager::OptPhase::MemoSubstitutionPhase, + OptPhaseManager::OptPhase::MemoExplorationPhase, + OptPhaseManager::OptPhase::MemoImplementationPhase}, + prefixId, + {{{"c1", ScanDefinition{{}, {}}}}}, + {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests}); + + ABT optimized = rootNode; + ASSERT_TRUE(phaseManager.optimize(optimized)); + ASSERT_EQ(2, phaseManager.getMemo().getStats()._physPlanExplorationCount); + + ASSERT_EXPLAIN_V2( + "Root []\n" + "| | projections: \n" + "| | root\n" + "| RefBlock: \n" + "| Variable [root]\n" + "Filter []\n" + "| EvalFilter []\n" + "| | Variable [root]\n" + "| PathCompare [Eq]\n" + "| Const [1]\n" + "PhysicalScan [{'<root>': root}, c1]\n" + " BindBlock:\n" + " [root]\n" + " Source []\n", + optimized); +} + +TEST(PhysRewriter, IndexSubfieldCovered) { + using namespace properties; + + ABT scanNode = make<ScanNode>("root", "c1"); + + ABT evalNode1 = make<EvaluationNode>( + "pa", + make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")), + std::move(scanNode)); + + ABT filterNode1 = + make<FilterNode>(make<EvalFilter>(make<PathCompare>(Operations::Eq, Constant::int64(1)), + make<Variable>("pa")), + std::move(evalNode1)); + + ABT evalNode2 = make<EvaluationNode>( + "pb", + make<EvalPath>(make<PathGet>("a", make<PathGet>("b", make<PathIdentity>())), + make<Variable>("root")), + std::move(filterNode1)); + + ABT filterNode2 = + make<FilterNode>(make<EvalFilter>(make<PathCompare>(Operations::Eq, Constant::int64(2)), + make<Variable>("pb")), + std::move(evalNode2)); + + ABT filterNode3 = make<FilterNode>( + make<EvalFilter>( + make<PathGet>( + "a", + make<PathTraverse>( + make<PathGet>("c", make<PathCompare>(Operations::Eq, Constant::int64(3))), + PathTraverse::kSingleLevel)), + make<Variable>("root")), + std::move(filterNode2)); + + ABT rootNode = make<RootNode>(ProjectionRequirement{ProjectionNameVector{"pa", "pb"}}, + std::move(filterNode3)); + + PrefixId prefixId; + OptPhaseManager phaseManager( + {OptPhaseManager::OptPhase::MemoSubstitutionPhase, + OptPhaseManager::OptPhase::MemoExplorationPhase, + OptPhaseManager::OptPhase::MemoImplementationPhase}, + prefixId, + {{{"c1", + ScanDefinition{ + {}, + {{"index1", + makeIndexDefinition("a", CollationOp::Ascending, false /*isMultiKey*/)}}}}}}, + {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests}); + + ABT optimized = rootNode; + ASSERT_TRUE(phaseManager.optimize(optimized)); + ASSERT_BETWEEN(35, 50, phaseManager.getMemo().getStats()._physPlanExplorationCount); + + // Observe we have a covered plan. The filters for subfields "b" and "c" are expressed as + // residual predicates. Also observe the traverse for "a.c" is removed due to "a" being + // non-multikey. + ASSERT_EXPLAIN_V2( + "Root []\n" + "| | projections: \n" + "| | pa\n" + "| | pb\n" + "| RefBlock: \n" + "| Variable [pa]\n" + "| Variable [pb]\n" + "Filter []\n" + "| EvalFilter []\n" + "| | Variable [pb]\n" + "| PathCompare [Eq]\n" + "| Const [2]\n" + "Evaluation []\n" + "| BindBlock:\n" + "| [pb]\n" + "| EvalPath []\n" + "| | Variable [pa]\n" + "| PathGet [b]\n" + "| PathIdentity []\n" + "Filter []\n" + "| EvalFilter []\n" + "| | Variable [pa]\n" + "| PathGet [c]\n" + "| PathCompare [Eq]\n" + "| Const [3]\n" + "IndexScan [{'<indexKey> 0': pa}, scanDefName: c1, indexDefName: index1, interval: {[Const " + "[1], Const [1]]}]\n" + " BindBlock:\n" + " [pa]\n" + " Source []\n", + optimized); +} + } // namespace } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/props.cpp b/src/mongo/db/query/optimizer/props.cpp index cc8a99db388..ef4ff7a1bda 100644 --- a/src/mongo/db/query/optimizer/props.cpp +++ b/src/mongo/db/query/optimizer/props.cpp @@ -256,10 +256,10 @@ const ProjectionNameSet& ProjectionAvailability::getProjections() const { } CardinalityEstimate::CardinalityEstimate(const CEType estimate) - : _estimate(estimate), _partialSchemaKeyCEMap() {} + : _estimate(estimate), _partialSchemaKeyCE() {} bool CardinalityEstimate::operator==(const CardinalityEstimate& other) const { - return _estimate == other._estimate && _partialSchemaKeyCEMap == other._partialSchemaKeyCEMap; + return _estimate == other._estimate && _partialSchemaKeyCE == other._partialSchemaKeyCE; } CEType CardinalityEstimate::getEstimate() const { @@ -270,12 +270,12 @@ CEType& CardinalityEstimate::getEstimate() { return _estimate; } -const PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCEMap() const { - return _partialSchemaKeyCEMap; +const PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCE() const { + return _partialSchemaKeyCE; } -PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCEMap() { - return _partialSchemaKeyCEMap; +PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCE() { + return _partialSchemaKeyCE; } IndexingAvailability::IndexingAvailability(GroupIdType scanGroupId, diff --git a/src/mongo/db/query/optimizer/props.h b/src/mongo/db/query/optimizer/props.h index f77bf38d23c..dfac59a785b 100644 --- a/src/mongo/db/query/optimizer/props.h +++ b/src/mongo/db/query/optimizer/props.h @@ -89,26 +89,28 @@ using PhysProperty = algebra::PolyValue<CollationRequirement, using LogicalProps = opt::unordered_map<LogicalProperty::key_type, LogicalProperty>; using PhysProps = opt::unordered_map<PhysProperty::key_type, PhysProperty>; -template <typename T, typename... Args> +template <typename T, + std::enable_if_t<std::is_base_of_v<LogicalPropertyTag, T>, bool> = true, + typename... Args> inline auto makeProperty(Args&&... args) { - if constexpr (std::is_base_of_v<LogicalPropertyTag, T>) { - return LogicalProperty::make<T>(std::forward<Args>(args)...); - } else if constexpr (std::is_base_of_v<PhysPropertyTag, T>) { - return PhysProperty::make<T>(std::forward<Args>(args)...); - } else { - static_assert("Unknown property type"); - } + return LogicalProperty::make<T>(std::forward<Args>(args)...); +} + +template <typename T, + std::enable_if_t<std::is_base_of_v<PhysPropertyTag, T>, bool> = true, + typename... Args> +inline auto makeProperty(Args&&... args) { + return PhysProperty::make<T>(std::forward<Args>(args)...); } -template <class P> +template <class P, std::enable_if_t<std::is_base_of_v<LogicalPropertyTag, P>, bool> = true> static constexpr auto getPropertyKey() { - if constexpr (std::is_base_of_v<LogicalPropertyTag, P>) { - return LogicalProperty::template tagOf<P>(); - } else if constexpr (std::is_base_of_v<PhysPropertyTag, P>) { - return PhysProperty::template tagOf<P>(); - } else { - static_assert("Unknown property type"); - } + return LogicalProperty::template tagOf<P>(); +} + +template <class P, std::enable_if_t<std::is_base_of_v<PhysPropertyTag, P>, bool> = true> +static constexpr auto getPropertyKey() { + return PhysProperty::template tagOf<P>(); } template <class P, class C> @@ -376,14 +378,14 @@ public: CEType getEstimate() const; CEType& getEstimate(); - const PartialSchemaKeyCE& getPartialSchemaKeyCEMap() const; - PartialSchemaKeyCE& getPartialSchemaKeyCEMap(); + const PartialSchemaKeyCE& getPartialSchemaKeyCE() const; + PartialSchemaKeyCE& getPartialSchemaKeyCE(); private: CEType _estimate; // Used for SargableNodes. Provide additional per partial schema key CE. - PartialSchemaKeyCE _partialSchemaKeyCEMap; + PartialSchemaKeyCE _partialSchemaKeyCE; }; /** diff --git a/src/mongo/db/query/optimizer/utils/abt_hash.cpp b/src/mongo/db/query/optimizer/utils/abt_hash.cpp index acdc9f34bca..28520d50052 100644 --- a/src/mongo/db/query/optimizer/utils/abt_hash.cpp +++ b/src/mongo/db/query/optimizer/utils/abt_hash.cpp @@ -85,7 +85,7 @@ public: return 17; } - size_t computeHash(const MultiKeyIntervalRequirement& req) { + size_t computeHash(const CompoundIntervalRequirement& req) { size_t result = 19; for (const auto& interval : req) { updateHash(result, computeHash(interval)); @@ -131,28 +131,6 @@ static size_t computePartialSchemaReqHash(const PartialSchemaRequirements& reqMa return result; } -static size_t computeCandidateIndexMapHash(const CandidateIndexMap& map) { - size_t result = 17; - - IntervalHasher<MultiKeyIntervalReqExpr> intervalHasher; - for (const auto& [indexDefName, candidateIndexEntry] : map) { - updateHash(result, std::hash<std::string>()(indexDefName)); - - { - const auto& fieldProjectionMap = candidateIndexEntry._fieldProjectionMap; - updateHash(result, std::hash<ProjectionName>()(fieldProjectionMap._ridProjection)); - updateHash(result, std::hash<ProjectionName>()(fieldProjectionMap._rootProjection)); - for (const auto& fieldProjectionMapEntry : fieldProjectionMap._fieldProjections) { - updateHash(result, std::hash<FieldNameType>()(fieldProjectionMapEntry.first)); - updateHash(result, std::hash<ProjectionName>()(fieldProjectionMapEntry.second)); - } - } - updateHash(result, intervalHasher.compute(candidateIndexEntry._intervals)); - } - - return result; -} - /** * Hasher for ABT nodes. Used in conjunction with memo. */ @@ -202,8 +180,9 @@ public: size_t childResult, size_t /*bindResult*/, size_t /*refResult*/) { + // Specifically not hashing the candidate indexes and ScanParams. Those are derivative of + // the requirements, and can have temp projection names. return computeHashSeq<44>(computePartialSchemaReqHash(node.getReqMap()), - computeCandidateIndexMapHash(node.getCandidateIndexMap()), std::hash<IndexReqTarget>()(node.getTarget()), childResult); } diff --git a/src/mongo/db/query/optimizer/utils/interval_utils.cpp b/src/mongo/db/query/optimizer/utils/interval_utils.cpp index 7f3b29f9896..6d56b21e814 100644 --- a/src/mongo/db/query/optimizer/utils/interval_utils.cpp +++ b/src/mongo/db/query/optimizer/utils/interval_utils.cpp @@ -280,25 +280,25 @@ boost::optional<IntervalReqExpr::Node> intersectDNFIntervals( return IntervalReqExpr::make<IntervalReqExpr::Disjunction>(std::move(disjuncts)); } -bool combineMultiKeyIntervalsDNF(MultiKeyIntervalReqExpr::Node& targetIntervals, +bool combineCompoundIntervalsDNF(CompoundIntervalReqExpr::Node& targetIntervals, const IntervalReqExpr::Node& sourceIntervals, - const bool reverseSource) { - MultiKeyIntervalReqExpr::NodeVector newDisjunction; + bool reverseSource) { + CompoundIntervalReqExpr::NodeVector newDisjunction; for (const auto& sourceConjunction : sourceIntervals.cast<IntervalReqExpr::Disjunction>()->nodes()) { for (const auto& targetConjunction : - targetIntervals.cast<MultiKeyIntervalReqExpr::Disjunction>()->nodes()) { - MultiKeyIntervalReqExpr::NodeVector newConjunction; + targetIntervals.cast<CompoundIntervalReqExpr::Disjunction>()->nodes()) { + CompoundIntervalReqExpr::NodeVector newConjunction; for (const auto& sourceConjunct : sourceConjunction.cast<IntervalReqExpr::Conjunction>()->nodes()) { const auto& sourceInterval = sourceConjunct.cast<IntervalReqExpr::Atom>()->getExpr(); for (const auto& targetConjunct : - targetConjunction.cast<MultiKeyIntervalReqExpr::Conjunction>()->nodes()) { + targetConjunction.cast<CompoundIntervalReqExpr::Conjunction>()->nodes()) { const auto& targetInterval = - targetConjunct.cast<MultiKeyIntervalReqExpr::Atom>()->getExpr(); + targetConjunct.cast<CompoundIntervalReqExpr::Atom>()->getExpr(); if (!targetInterval.empty() && !targetInterval.back().isEquality() && !sourceInterval.isFullyOpen()) { // We do not have an equality prefix. Reject. @@ -314,18 +314,18 @@ bool combineMultiKeyIntervalsDNF(MultiKeyIntervalReqExpr::Node& targetIntervals, newInterval.push_back(sourceInterval); } newConjunction.emplace_back( - MultiKeyIntervalReqExpr::make<MultiKeyIntervalReqExpr::Atom>( + CompoundIntervalReqExpr::make<CompoundIntervalReqExpr::Atom>( std::move(newInterval))); } } newDisjunction.emplace_back( - MultiKeyIntervalReqExpr::make<MultiKeyIntervalReqExpr::Conjunction>( + CompoundIntervalReqExpr::make<CompoundIntervalReqExpr::Conjunction>( std::move(newConjunction))); } } - targetIntervals = MultiKeyIntervalReqExpr::make<MultiKeyIntervalReqExpr::Disjunction>( + targetIntervals = CompoundIntervalReqExpr::make<CompoundIntervalReqExpr::Disjunction>( std::move(newDisjunction)); return true; } diff --git a/src/mongo/db/query/optimizer/utils/interval_utils.h b/src/mongo/db/query/optimizer/utils/interval_utils.h index b47f3dbcaa0..397b9a264f8 100644 --- a/src/mongo/db/query/optimizer/utils/interval_utils.h +++ b/src/mongo/db/query/optimizer/utils/interval_utils.h @@ -57,14 +57,14 @@ boost::optional<IntervalReqExpr::Node> intersectDNFIntervals( * Combines a source interval over a single path with a target multi-component interval. The * multi-component interval is extended to contain an extra field. The resulting multi-component * interval defined the boundaries over the index component used by the index access execution - * operator. If we fail to combine, the target multi-key interval is left unchanged. + * operator. If we fail to combine, the target compound interval is left unchanged. * Currently we only support a single "equality prefix": 0+ equalities followed by at most * inequality, and trailing open intervals. * reverseSource flag indicates the sourceInterval corresponds to a descending index, so the bounds * are flipped before combining with the target. * TODO: support Recursive Index Navigation. */ -bool combineMultiKeyIntervalsDNF(MultiKeyIntervalReqExpr::Node& targetIntervals, +bool combineCompoundIntervalsDNF(CompoundIntervalReqExpr::Node& targetIntervals, const IntervalReqExpr::Node& sourceIntervals, bool reverseSource = false); diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp index e4516a60618..0fb0bee1319 100644 --- a/src/mongo/db/query/optimizer/utils/utils.cpp +++ b/src/mongo/db/query/optimizer/utils/utils.cpp @@ -121,7 +121,7 @@ ProjectionNameSet extractReferencedColumns(const properties::PhysProps& properti return PropertiesAffectedColumnsExtractor::extract(properties); } -bool areMultiKeyIntervalsEqualities(const MultiKeyIntervalRequirement& intervals) { +bool areCompoundIntervalsEqualities(const CompoundIntervalRequirement& intervals) { for (const auto& interval : intervals) { if (!interval.isEquality()) { return false; @@ -718,29 +718,61 @@ bool checkPathContainsTraverse(const ABT& path) { } /** - * Erases Traverse elements from an index path. + * Fuses an index path and a query path to determine a residual path to apply over the index + * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id. + * Return the suffix that doesn't match. */ -class PathTraverseEraser { +class IndexPathFusor { public: - PathTraverseEraser() {} + struct ResultType { + boost::optional<ABT::reference_type> _prefix; + size_t _numTraversesSkipped = 0; + }; - void transport(ABT& n, const PathTraverse& /*node*/, ABT& childResult) { - n = std::move(childResult); - _erased = true; + /** + * 'n' - The complete index path being compared to, can be modified if needed. + * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the + * correct operator. + * 'other' - The query that is checked if it is a prefix of the index. + */ + ResultType operator()(const ABT& n, const PathGet& node, const ABT& other) { + if (auto otherGet = other.cast<PathGet>(); + otherGet != nullptr && otherGet->name() == node.name()) { + if (auto otherChildTraverse = otherGet->getPath().cast<PathTraverse>(); + otherChildTraverse != nullptr && !node.getPath().is<PathTraverse>()) { + // If a query path has a Traverse, but the index path doesn't, the query can + // still be evaluated by this index. Skip the Traverse node, and continue matching. + auto result = node.getPath().visit(*this, otherChildTraverse->getPath()); + result._numTraversesSkipped++; + return result; + } else { + return node.getPath().visit(*this, otherGet->getPath()); + } + } + return {}; } - template <typename T, typename... Ts> - void transport(const ABT& /*n*/, const T& /*node*/, Ts&&...) { - // Noop. + ResultType operator()(const ABT& n, const PathTraverse& node, const ABT& other) { + if (auto otherTraverse = other.cast<PathTraverse>(); + otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) { + return node.getPath().visit(*this, otherTraverse->getPath()); + } + return {}; } - bool eraseTraverse(ABT& path) { - algebra::transport<true>(path, *this); - return _erased; + ResultType operator()(const ABT& n, const PathIdentity& node, const ABT& other) { + return {other.ref()}; } -private: - bool _erased = false; + template <typename T, typename... Ts> + ResultType operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) { + uasserted(6624152, "Unexpected node type"); + } + + static ResultType fuse(const ABT& node, const ABT& candidatePrefix) { + IndexPathFusor instance; + return candidatePrefix.visit(instance, node); + } }; boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( @@ -757,7 +789,7 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( } for (const auto& [key, req] : reqMap) { - if (key.emptyPath() && isIntervalReqFullyOpenDNF(req.getIntervals())) { + if (key._path.is<PathIdentity>() && isIntervalReqFullyOpenDNF(req.getIntervals())) { // We need to determine either path or interval (or both). return {}; } @@ -787,13 +819,24 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, PartialSchemaKey newKey = key; bool updateToNonMultiKey = false; - if (key._projectionName == scanProjName) { - ABT pathNoTraverse = newKey._path; - const bool erasedTraverse = PathTraverseEraser{}.eraseTraverse(pathNoTraverse); - if (erasedTraverse && - nonMultiKeyPaths.find(pathNoTraverse) != nonMultiKeyPaths.cend()) { + if (key._projectionName == scanProjName && checkPathContainsTraverse(newKey._path)) { + ABT bestPath = make<Blackhole>(); + size_t maxTraversesSkipped = 0; + for (const ABT& nonMultiKeyPath : nonMultiKeyPaths) { + // Attempt to fuse with each non-multikey path and observe how many traverses we can + // erase. + // TODO: we probably need a more efficient way to do this instead of iterating. + if (const auto fused = IndexPathFusor::fuse(newKey._path, nonMultiKeyPath); + fused._prefix && fused._numTraversesSkipped > maxTraversesSkipped) { + maxTraversesSkipped = fused._numTraversesSkipped; + bestPath = nonMultiKeyPath; + PathAppender appender(std::move(*fused._prefix)); + appender.append(bestPath); + } + } + if (maxTraversesSkipped > 0) { updateToNonMultiKey = true; - newKey._path = std::move(pathNoTraverse); + newKey._path = std::move(bestPath); } } @@ -964,65 +1007,33 @@ size_t decodeIndexKeyName(const std::string& fieldName) { return key; } -/** - * Fuses an index path and a query path to determine a residual path to apply over the index - * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id. - * Return the suffix that doesn't match. - */ -class IndexPathFusor { -public: - using ResultType = boost::optional<ABT::reference_type>; - - /** - * 'n' - The complete index path being compared to, can be modified if needed. - * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the - * correct operator. - * 'other' - The query that is checked if it is a prefix of the index. - */ - ResultType operator()(const ABT& n, const PathGet& node, const ABT& other) { - if (auto otherGet = other.cast<PathGet>(); - otherGet != nullptr && otherGet->name() == node.name()) { - return node.getPath().visit(*this, otherGet->getPath()); - } - return {}; - } - - ResultType operator()(const ABT& n, const PathTraverse& node, const ABT& other) { - if (auto otherTraverse = other.cast<PathTraverse>(); - otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) { - return node.getPath().visit(*this, otherTraverse->getPath()); - } - return {}; - } - - ResultType operator()(const ABT& n, const PathIdentity& node, const ABT& other) { - return {other.ref()}; - } - - template <typename T, typename... Ts> - ResultType operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) { - uasserted(6624152, "Unexpected node type"); - } - - static ResultType fuse(const ABT& node, const ABT& candidatePrefix) { - IndexPathFusor instance; - return candidatePrefix.visit(instance, node); +const ProjectionName& getExistingOrTempProjForFieldName(PrefixId& prefixId, + const FieldNameType& fieldName, + FieldProjectionMap& fieldProjMap) { + auto it = fieldProjMap._fieldProjections.find(fieldName); + if (it != fieldProjMap._fieldProjections.cend()) { + return it->second; } -}; -CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId, - const ProjectionName& scanProjectionName, - const PartialSchemaRequirements& reqMap, - const ScanDefinition& scanDef, - const bool fastNullHandling, - bool& hasEmptyInterval) { - CandidateIndexMap result; - hasEmptyInterval = false; + ProjectionName tempProjName = prefixId.getNextId("evalTemp"); + const auto result = fieldProjMap._fieldProjections.emplace(fieldName, std::move(tempProjName)); + invariant(result.second); + return result.first->second; +} +CandidateIndexes computeCandidateIndexes(PrefixId& prefixId, + const ProjectionName& scanProjectionName, + const PartialSchemaRequirements& reqMap, + const ScanDefinition& scanDef, + const bool fastNullHandling, + bool& hasEmptyInterval) { // Contains one instance for each unmatched key. PartialSchemaKeySet unsatisfiedKeysInitial; for (const auto& [key, req] : reqMap) { - unsatisfiedKeysInitial.insert(key); + if (!unsatisfiedKeysInitial.insert(key).second) { + // We cannot satisfy two or more non-multikey path instances using an index. + return {}; + } if (!fastNullHandling && req.hasBoundProjectionName() && checkMaybeHasNull(req.getIntervals())) { @@ -1032,140 +1043,150 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId, } } + CandidateIndexes result; + hasEmptyInterval = false; + for (const auto& [indexDefName, indexDef] : scanDef.getIndexDefs()) { - FieldProjectionMap indexProjectionMap; - auto intervals = MultiKeyIntervalReqExpr::makeSingularDNF(); // Singular empty interval. - PartialSchemaRequirements residualRequirements; - ProjectionNameSet residualRequirementsTempProjections; - ResidualKeyMap residualKeyMap; - opt::unordered_set<size_t> fieldsToCollate; - size_t intervalPrefixSize = 0; PartialSchemaKeySet unsatisfiedKeys = unsatisfiedKeysInitial; - - // True if the paths from partial schema requirements form a strict prefix of the index - // collation. - bool isPrefix = true; - // If we formed bounds using at least one requirement (as opposed to having only residual - // requirements). - bool hasExactMatch = false; + CandidateIndexEntry entry(indexDefName); const IndexCollationSpec& indexCollationSpec = indexDef.getCollationSpec(); for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) { const auto& indexCollationEntry = indexCollationSpec.at(indexField); - PartialSchemaKey indexKey{scanProjectionName, indexCollationEntry._path}; const bool reverse = indexCollationEntry._op == CollationOp::Descending; - if (reqMap.count(indexKey) == 1 && isPrefix) { - hasExactMatch = true; - unsatisfiedKeys.erase(indexKey); - const PartialSchemaRequirement& req = reqMap.find(indexKey)->second; - const auto& requiredInterval = req.getIntervals(); + PartialSchemaKey indexKey{scanProjectionName, indexCollationEntry._path}; + auto indexKeyIt = reqMap.find(indexKey); + if (indexKeyIt == reqMap.cend()) { + break; + } - if (combineMultiKeyIntervalsDNF(intervals, requiredInterval, reverse)) { - intervalPrefixSize++; - } else { - if (!combineMultiKeyIntervalsDNF( - intervals, IntervalReqExpr::makeSingularDNF(), reverse)) { - uasserted(6624155, "Cannot combine with an open interval"); - } + const PartialSchemaRequirement& req = indexKeyIt->second; + const auto& requiredInterval = req.getIntervals(); + if (!combineCompoundIntervalsDNF(entry._intervals, requiredInterval, reverse)) { + break; + } - // Move interval into residual requirements. - if (req.hasBoundProjectionName()) { - PartialSchemaKey residualKey{req.getBoundProjectionName(), - make<PathIdentity>()}; - residualRequirements.emplace( - residualKey, PartialSchemaRequirement{"", req.getIntervals()}); - residualKeyMap.emplace(std::move(residualKey), indexKey); - residualRequirementsTempProjections.insert(req.getBoundProjectionName()); - } else { - ProjectionName tempProj = prefixId.getNextId("evalTemp"); - PartialSchemaKey residualKey{tempProj, make<PathIdentity>()}; - residualRequirements.emplace(residualKey, req); - residualKeyMap.emplace(std::move(residualKey), indexKey); - residualRequirementsTempProjections.insert(tempProj); - - // Include bounds projection into index spec. - if (!indexProjectionMap._fieldProjections - .emplace(encodeIndexKeyName(indexField), std::move(tempProj)) - .second) { - uasserted(6624156, "Duplicate field name"); - } - } - } + unsatisfiedKeys.erase(indexKey); + entry._intervalPrefixSize++; + if (req.hasBoundProjectionName()) { // Include bounds projection into index spec. - if (req.hasBoundProjectionName() && - !indexProjectionMap._fieldProjections - .emplace(encodeIndexKeyName(indexField), req.getBoundProjectionName()) - .second) { - uasserted(6624157, "Duplicate field name"); - } + const bool inserted = + entry._fieldProjectionMap._fieldProjections + .emplace(encodeIndexKeyName(indexField), req.getBoundProjectionName()) + .second; + invariant(inserted); + } - if (auto singularInterval = IntervalReqExpr::getSingularDNF(requiredInterval); - !singularInterval || !singularInterval->isEquality()) { - // We only care about collation of for non-equality intervals. - // Equivalently, it is sufficient for singular intervals to be clustered. - fieldsToCollate.insert(indexField); - } - } else { - bool foundPathPrefix = false; - for (const auto& queryKey : unsatisfiedKeys) { - const auto fusedPath = - IndexPathFusor::fuse(queryKey._path, indexCollationEntry._path); - if (fusedPath) { - ProjectionName tempProj = prefixId.getNextId("evalTemp"); - PartialSchemaKey residualKey{tempProj, fusedPath.get()}; - - { - auto range = reqMap.equal_range(queryKey); - for (auto it = range.first; it != range.second; it++) { - residualRequirements.emplace(residualKey, it->second); - } - } - residualKeyMap.emplace(std::move(residualKey), queryKey); - residualRequirementsTempProjections.insert(tempProj); - - // Include bounds projection into index spec. - if (!indexProjectionMap._fieldProjections - .emplace(encodeIndexKeyName(indexField), std::move(tempProj)) - .second) { - uasserted(6624158, "Duplicate field name"); - } - if (!combineMultiKeyIntervalsDNF( - intervals, IntervalReqExpr::makeSingularDNF(), reverse)) { - uasserted(6624159, "Cannot combine with an open interval"); - } + if (auto singularInterval = IntervalReqExpr::getSingularDNF(requiredInterval); + !singularInterval || !singularInterval->isEquality()) { + // We only care about collation of for non-equality intervals. + // Equivalently, it is sufficient for singular intervals to be clustered. + entry._fieldsToCollate.insert(indexField); + } + } - foundPathPrefix = true; - unsatisfiedKeys.erase(queryKey); - break; - } - } + for (auto queryKeyIt = unsatisfiedKeys.begin(); queryKeyIt != unsatisfiedKeys.end();) { + const auto& queryKey = *queryKeyIt; + bool satisfied = false; - if (!foundPathPrefix) { - isPrefix = false; - if (!combineMultiKeyIntervalsDNF( - intervals, IntervalReqExpr::makeSingularDNF(), reverse)) { - uasserted(6624160, "Cannot combine with an open interval"); - } + for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) { + const auto& indexCollationEntry = indexCollationSpec.at(indexField); + const auto fusedPath = + IndexPathFusor::fuse(queryKey._path, indexCollationEntry._path); + if (!fusedPath._prefix) { + continue; } + + auto it = reqMap.find(queryKey); + tassert( + 6624158, "QueryKey must exist in the requirements map", it != reqMap.cend()); + + const ProjectionName& tempProjName = getExistingOrTempProjForFieldName( + prefixId, encodeIndexKeyName(indexField), entry._fieldProjectionMap); + entry._residualRequirements.emplace_back( + PartialSchemaKey{tempProjName, std::move(*fusedPath._prefix)}, + it->second, + std::distance(reqMap.cbegin(), it)); + + satisfied = true; + break; + } + + if (satisfied) { + unsatisfiedKeys.erase(queryKeyIt++); + } else { + queryKeyIt++; } - } - if (!hasExactMatch) { - continue; } if (!unsatisfiedKeys.empty()) { continue; } - result.emplace(indexDefName, - CandidateIndexEntry{std::move(indexProjectionMap), - std::move(intervals), - std::move(residualRequirements), - std::move(residualRequirementsTempProjections), - std::move(residualKeyMap), - std::move(fieldsToCollate), - intervalPrefixSize}); + for (size_t indexField = entry._intervalPrefixSize; indexField < indexCollationSpec.size(); + indexField++) { + // Pad the remaining index fields with [MinKey, MaxKey] intervals. + const bool reverse = indexCollationSpec.at(indexField)._op == CollationOp::Descending; + if (!combineCompoundIntervalsDNF( + entry._intervals, IntervalReqExpr::makeSingularDNF(), reverse)) { + uasserted(6624159, "Cannot combine with an open interval"); + } + } + + result.push_back(std::move(entry)); + } + + return result; +} + +boost::optional<ScanParams> computeScanParams(PrefixId& prefixId, + const PartialSchemaRequirements& reqMap, + const ProjectionName& rootProj) { + ScanParams result; + + size_t entryIndex = 0; + for (const auto& [key, req] : reqMap) { + if (key._projectionName != rootProj) { + // We are not sitting right above a ScanNode. + return {}; + } + + if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) { + const FieldNameType& fieldName = pathGet->name(); + + // Extract a new requirements path with removed simple paths. + // For example if we have a key Get "a" Traverse Compare = 0 we leave only + // Traverse Compare 0. + if (pathGet->getPath().is<PathIdentity>() && req.hasBoundProjectionName()) { + const auto [it, insertedInFPM] = + result._fieldProjectionMap._fieldProjections.emplace( + fieldName, req.getBoundProjectionName()); + + if (!insertedInFPM) { + result._residualRequirements.emplace_back( + PartialSchemaKey{it->second, make<PathIdentity>()}, + PartialSchemaRequirement{req.getBoundProjectionName(), req.getIntervals()}, + entryIndex); + } else if (!isIntervalReqFullyOpenDNF(req.getIntervals())) { + result._residualRequirements.emplace_back( + PartialSchemaKey{req.getBoundProjectionName(), make<PathIdentity>()}, + PartialSchemaRequirement{"", req.getIntervals()}, + entryIndex); + } + } else { + const ProjectionName& tempProjName = getExistingOrTempProjForFieldName( + prefixId, fieldName, result._fieldProjectionMap); + result._residualRequirements.emplace_back( + PartialSchemaKey{tempProjName, pathGet->getPath()}, req, entryIndex); + } + } else { + // Move other conditions into the residual map. + result._fieldProjectionMap._rootProjection = rootProj; + result._residualRequirements.emplace_back(key, req, entryIndex); + } + + entryIndex++; } return result; @@ -1327,24 +1348,23 @@ void lowerPartialSchemaRequirement(const PartialSchemaKey& key, } } -void lowerPartialSchemaRequirements(const CEType baseCE, - const CEType scanGroupCE, - ResidualRequirements& requirements, +void lowerPartialSchemaRequirements(const CEType scanGroupCE, + std::vector<SelectivityType> indexPredSels, + ResidualRequirementsWithCE& requirements, ABT& physNode, NodeCEMap& nodeCEMap) { sortResidualRequirements(requirements); - std::vector<SelectivityType> residualSelectivities; for (const auto& [residualKey, residualReq, ce] : requirements) { - CEType residualCE = baseCE; - if (!residualSelectivities.empty()) { + CEType residualCE = scanGroupCE; + if (!indexPredSels.empty()) { // We are intentionally making a copy of the vector here, we are adding elements to it // below. - residualCE *= ce::conjExponentialBackoff(residualSelectivities); + residualCE *= ce::conjExponentialBackoff(indexPredSels); } if (scanGroupCE > 0.0) { // Compute the selectivity after we assign CE, which is the "input" to the cost. - residualSelectivities.push_back(ce / scanGroupCE); + indexPredSels.push_back(ce / scanGroupCE); } lowerPartialSchemaRequirement(residualKey, residualReq, physNode, [&](const ABT& node) { @@ -1353,72 +1373,34 @@ void lowerPartialSchemaRequirements(const CEType baseCE, } } -void computePhysicalScanParams(PrefixId& prefixId, - const PartialSchemaRequirements& reqMap, - const PartialSchemaKeyCE& partialSchemaKeyCEMap, - const ProjectionNameOrderPreservingSet& requiredProjections, - ResidualRequirements& residualRequirements, - ProjectionRenames& projectionRenames, - FieldProjectionMap& fieldProjectionMap, - bool& requiresRootProjection) { - for (const auto& [key, req] : reqMap) { - bool hasBoundProjection = req.hasBoundProjectionName(); - if (hasBoundProjection && !requiredProjections.find(req.getBoundProjectionName()).second) { - // Bound projection is not required, pretend we don't bind. - hasBoundProjection = false; +void sortResidualRequirements(ResidualRequirementsWithCE& residualReq) { + // Sort residual requirements by estimated cost. + // Assume it is more expensive to deliver a bound projection than to just filter. + + std::vector<std::pair<double, size_t>> costs; + for (size_t index = 0; index < residualReq.size(); index++) { + const auto& entry = residualReq.at(index); + + size_t multiplier = 0; + if (entry._req.hasBoundProjectionName()) { + multiplier++; } - if (!hasBoundProjection && isIntervalReqFullyOpenDNF(req.getIntervals())) { - // Redundant requirement. - continue; + if (!isIntervalReqFullyOpenDNF(entry._req.getIntervals())) { + multiplier++; } - const CEType keyCE = partialSchemaKeyCEMap.at(key); - if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) { - // Extract a new requirements path with removed simple paths. - // For example if we have a key Get "a" Traverse Compare = 0 we leave only - // Traverse Compare 0. - if (pathGet->getPath().is<PathIdentity>() && hasBoundProjection) { - const auto [it, inserted] = fieldProjectionMap._fieldProjections.emplace( - pathGet->name(), req.getBoundProjectionName()); - if (!inserted) { - projectionRenames.emplace(req.getBoundProjectionName(), it->second); - } - - if (!isIntervalReqFullyOpenDNF(req.getIntervals())) { - residualRequirements.emplace_back( - PartialSchemaKey{req.getBoundProjectionName(), make<PathIdentity>()}, - PartialSchemaRequirement{"", req.getIntervals()}, - keyCE); - } - } else { - ProjectionName tempProjName; - auto it = fieldProjectionMap._fieldProjections.find(pathGet->name()); - if (it == fieldProjectionMap._fieldProjections.cend()) { - tempProjName = prefixId.getNextId("evalTemp"); - fieldProjectionMap._fieldProjections.emplace(pathGet->name(), tempProjName); - } else { - tempProjName = it->second; - } + costs.emplace_back(entry._ce * multiplier, index); + } - residualRequirements.emplace_back( - PartialSchemaKey{std::move(tempProjName), pathGet->getPath()}, req, keyCE); - } - } else { - // Move other conditions into the residual map. - requiresRootProjection = true; - residualRequirements.emplace_back(key, req, keyCE); + std::sort(costs.begin(), costs.end()); + for (size_t index = 0; index < residualReq.size(); index++) { + const size_t targetIndex = costs.at(index).second; + if (index < targetIndex) { + std::swap(residualReq.at(index), residualReq.at(targetIndex)); } } } -void sortResidualRequirements(ResidualRequirements& residualReq) { - // Sort residual requirements by estimated CE. - std::sort( - residualReq.begin(), - residualReq.end(), - [](const ResidualRequirement& x, const ResidualRequirement& y) { return x._ce < y._ce; }); -} - void applyProjectionRenames(ProjectionRenames projectionRenames, ABT& node, const std::function<void(const ABT& node)>& visitor) { @@ -1429,6 +1411,42 @@ void applyProjectionRenames(ProjectionRenames projectionRenames, } } +void removeRedundantResidualPredicates(const ProjectionNameOrderPreservingSet& requiredProjections, + ResidualRequirements& residualReqs, + FieldProjectionMap& fieldProjectionMap) { + ProjectionNameSet residualTempProjections; + + // Remove unused residual requirements. + for (auto it = residualReqs.begin(); it != residualReqs.end();) { + auto& [key, req, ce] = *it; + + if (req.hasBoundProjectionName() && + !requiredProjections.find(req.getBoundProjectionName()).second) { + if (isIntervalReqFullyOpenDNF(req.getIntervals())) { + residualReqs.erase(it++); + continue; + } else { + req.setBoundProjectionName(""); + } + } + + residualTempProjections.insert(key._projectionName); + it++; + } + + // Remove unused projections from the field projection map. + auto& fieldProjMap = fieldProjectionMap._fieldProjections; + for (auto it = fieldProjMap.begin(); it != fieldProjMap.end();) { + const ProjectionName& projName = it->second; + if (!requiredProjections.find(projName).second && + residualTempProjections.count(projName) == 0) { + fieldProjMap.erase(it++); + } else { + it++; + } + } +} + ABT lowerRIDIntersectGroupBy(PrefixId& prefixId, const ProjectionName& ridProjName, const CEType intersectedCE, @@ -1635,7 +1653,7 @@ public: _fpmStack.push_back(std::move(indexProjectionMap)); }; - ABT transport(const MultiKeyIntervalReqExpr::Atom& node) { + ABT transport(const CompoundIntervalReqExpr::Atom& node) { ABT physicalIndexScan = make<IndexScanNode>( _fpmStack.back(), IndexSpecification{_scanDefName, _indexDefName, node.getExpr(), _reverseOrder}); @@ -1670,7 +1688,7 @@ public: _fpmStack.push_back(std::move(childMap)); } - void prepare(const MultiKeyIntervalReqExpr::Conjunction& node) { + void prepare(const CompoundIntervalReqExpr::Conjunction& node) { prepare<true /*isConjunction*/>(node.nodes().size()); } @@ -1745,19 +1763,19 @@ public: return result; } - ABT transport(const MultiKeyIntervalReqExpr::Conjunction& node, ABTVector childResults) { + ABT transport(const CompoundIntervalReqExpr::Conjunction& node, ABTVector childResults) { return implement<true /*isIntersect*/>(std::move(childResults)); } - void prepare(const MultiKeyIntervalReqExpr::Disjunction& node) { + void prepare(const CompoundIntervalReqExpr::Disjunction& node) { prepare<false /*isConjunction*/>(node.nodes().size()); } - ABT transport(const MultiKeyIntervalReqExpr::Disjunction& node, ABTVector childResults) { + ABT transport(const CompoundIntervalReqExpr::Disjunction& node, ABTVector childResults) { return implement<false /*isIntersect*/>(std::move(childResults)); } - ABT lower(const MultiKeyIntervalReqExpr::Node& intervals) { + ABT lower(const CompoundIntervalReqExpr::Node& intervals) { return algebra::transport<false>(intervals, *this); } @@ -1779,7 +1797,7 @@ ABT lowerIntervals(PrefixId& prefixId, FieldProjectionMap indexProjectionMap, const std::string& scanDefName, const std::string& indexDefName, - const MultiKeyIntervalReqExpr::Node& intervals, + const CompoundIntervalReqExpr::Node& intervals, const bool reverseOrder, const CEType indexCE, const CEType scanGroupCE, diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h index 09a18cc8817..28690c50262 100644 --- a/src/mongo/db/query/optimizer/utils/utils.h +++ b/src/mongo/db/query/optimizer/utils/utils.h @@ -104,7 +104,7 @@ ProjectionNameSet extractReferencedColumns(const properties::PhysProps& properti /** * Returns true if all components of the compound interval are equalities. */ -bool areMultiKeyIntervalsEqualities(const MultiKeyIntervalRequirement& intervals); +bool areCompoundIntervalsEqualities(const CompoundIntervalRequirement& intervals); struct CollationSplitResult { bool _validSplit = false; @@ -211,19 +211,26 @@ std::string encodeIndexKeyName(size_t indexField); size_t decodeIndexKeyName(const std::string& fieldName); /** - * Compute a mapping [indexName -> CandidateIndexEntry] that describes intervals that could be + * Compute a list of candidate indexes. A CandidateIndexEntry describes intervals that could be * used for accessing each of the indexes in the map. The intervals themselves are derived from * 'reqMap'. * If the intersection of any of the interval requirements in 'reqMap' results in an empty - * interval, return an empty mappting and set 'hasEmptyInterval' to true. + * interval, return an empty mapping and set 'hasEmptyInterval' to true. * Otherwise return the computed mapping, and set 'hasEmptyInterval' to false. */ -CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId, - const ProjectionName& scanProjectionName, - const PartialSchemaRequirements& reqMap, - const ScanDefinition& scanDef, - bool fastNullHandling, - bool& hasEmptyInterval); +CandidateIndexes computeCandidateIndexes(PrefixId& prefixId, + const ProjectionName& scanProjectionName, + const PartialSchemaRequirements& reqMap, + const ScanDefinition& scanDef, + bool fastNullHandling, + bool& hasEmptyInterval); + +/** + * Computes a set of residual predicates which will be applied on top of a Scan. + */ +boost::optional<ScanParams> computeScanParams(PrefixId& prefixId, + const PartialSchemaRequirements& reqMap, + const ProjectionName& rootProj); /** * Checks if we have an interval tree which has at least one atomic interval which may include Null @@ -241,28 +248,23 @@ void lowerPartialSchemaRequirement(const PartialSchemaKey& key, const std::function<void(const ABT& node)>& visitor = [](const ABT&) {}); -void lowerPartialSchemaRequirements(CEType baseCE, - CEType scanGroupCE, - ResidualRequirements& requirements, +void lowerPartialSchemaRequirements(CEType scanGroupCE, + std::vector<SelectivityType> indexPredSels, + ResidualRequirementsWithCE& requirements, ABT& physNode, NodeCEMap& nodeCEMap); -void computePhysicalScanParams(PrefixId& prefixId, - const PartialSchemaRequirements& reqMap, - const PartialSchemaKeyCE& partialSchemaKeyCEMap, - const ProjectionNameOrderPreservingSet& requiredProjections, - ResidualRequirements& residualRequirements, - ProjectionRenames& projectionRenames, - FieldProjectionMap& fieldProjectionMap, - bool& requiresRootProjection); - -void sortResidualRequirements(ResidualRequirements& residualReq); +void sortResidualRequirements(ResidualRequirementsWithCE& residualReq); void applyProjectionRenames(ProjectionRenames projectionRenames, ABT& node, const std::function<void(const ABT& node)>& visitor = [](const ABT&) { }); +void removeRedundantResidualPredicates(const ProjectionNameOrderPreservingSet& requiredProjections, + ResidualRequirements& residualReqs, + FieldProjectionMap& fieldProjectionMap); + /** * Implements an RID Intersect node using Union and GroupBy. */ @@ -311,7 +313,7 @@ ABT lowerIntervals(PrefixId& prefixId, FieldProjectionMap indexProjectionMap, const std::string& scanDefName, const std::string& indexDefName, - const MultiKeyIntervalReqExpr::Node& intervals, + const CompoundIntervalReqExpr::Node& intervals, bool reverseOrder, CEType indexCE, CEType scanGroupCE, |