summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2022-08-26 23:41:07 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-27 00:19:25 +0000
commitf5f8acab2eec0be83d94cb8126bccf2bd1522350 (patch)
treeddcfda87a271a280d4677253e0f918ef843576a7 /src/mongo/db/query
parent8aef79c9e2e6d4e8110bb7ccdc5f6445198272f2 (diff)
downloadmongo-f5f8acab2eec0be83d94cb8126bccf2bd1522350.tar.gz
SERVER-68965 Improvements to indexing in the presence of nested subfelds
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.cpp49
-rw-r--r--src/mongo/db/query/ce/ce_test_utils.h3
-rw-r--r--src/mongo/db/query/optimizer/cascades/cost_derivation.cpp4
-rw-r--r--src/mongo/db/query/optimizer/cascades/implementers.cpp106
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp163
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.h1
-rw-r--r--src/mongo/db/query/optimizer/cascades/memo.cpp36
-rw-r--r--src/mongo/db/query/optimizer/explain.cpp234
-rw-r--r--src/mongo/db/query/optimizer/index_bounds.cpp35
-rw-r--r--src/mongo/db/query/optimizer/index_bounds.h71
-rw-r--r--src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp71
-rw-r--r--src/mongo/db/query/optimizer/node.cpp19
-rw-r--r--src/mongo/db/query/optimizer/node.h9
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp264
-rw-r--r--src/mongo/db/query/optimizer/props.cpp12
-rw-r--r--src/mongo/db/query/optimizer/props.h40
-rw-r--r--src/mongo/db/query/optimizer/utils/abt_hash.cpp27
-rw-r--r--src/mongo/db/query/optimizer/utils/interval_utils.cpp20
-rw-r--r--src/mongo/db/query/optimizer/utils/interval_utils.h4
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp538
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h48
21 files changed, 980 insertions, 774 deletions
diff --git a/src/mongo/db/query/ce/ce_test_utils.cpp b/src/mongo/db/query/ce/ce_test_utils.cpp
index 3f32aa57a33..0ee44718b05 100644
--- a/src/mongo/db/query/ce/ce_test_utils.cpp
+++ b/src/mongo/db/query/ce/ce_test_utils.cpp
@@ -60,7 +60,29 @@ double CETester::getCE(const std::string& query, size_t optimizationLevel) const
}
double CETester::getCE(ABT& abt, size_t optimizationLevel) const {
- OptPhaseManager phaseManager = getPhaseManager(optimizationLevel);
+ // Needs to outlive the phase manager.
+ PrefixId prefixId;
+
+ // Mock metadata.
+ ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _collCard);
+ Metadata metadata({{_collName, sd}});
+
+ std::vector<OptPhaseManager::OptPhase> optPhaseChoices{
+ OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+ OptPhaseManager::OptPhase::MemoExplorationPhase};
+ optimizationLevel = std::min(optimizationLevel, optPhaseChoices.size());
+ OptPhaseManager::PhaseSet optPhases;
+ for (size_t i = 0; i < optimizationLevel; ++i) {
+ optPhases.insert(optPhaseChoices[i]);
+ }
+
+ optimizer::OptPhaseManager phaseManager(optPhases,
+ prefixId,
+ true /*requireRID*/,
+ metadata,
+ std::make_unique<HeuristicCE>(),
+ std::make_unique<DefaultCosting>(),
+ DebugInfo::kDefaultForTests);
// Optimize.
ASSERT_TRUE(phaseManager.optimize(abt));
@@ -77,29 +99,4 @@ double CETester::getCE(ABT& abt, size_t optimizationLevel) const {
return ce;
}
-optimizer::OptPhaseManager CETester::getPhaseManager(size_t optimizationLevel) const {
- // Mock memo.
- ScanDefinition sd({}, {}, {DistributionType::Centralized}, true, _collCard);
- Metadata metadata({{_collName, sd}});
-
- std::vector<OptPhaseManager::OptPhase> optPhaseChoices{
- OptPhaseManager::OptPhase::MemoSubstitutionPhase,
- OptPhaseManager::OptPhase::MemoExplorationPhase};
- optimizationLevel = std::min(optimizationLevel, optPhaseChoices.size());
- OptPhaseManager::PhaseSet optPhases;
- for (size_t i = 0; i < optimizationLevel; ++i) {
- optPhases.insert(optPhaseChoices[i]);
- }
-
- // Construct placeholder PhaseManager. Notice that it also creates a Memo internally.
- PrefixId prefixId;
- return {optPhases,
- prefixId,
- true /*requireRID*/,
- metadata,
- std::make_unique<HeuristicCE>(),
- std::make_unique<DefaultCosting>(),
- DebugInfo::kDefaultForTests};
-}
-
} // namespace mongo::ce
diff --git a/src/mongo/db/query/ce/ce_test_utils.h b/src/mongo/db/query/ce/ce_test_utils.h
index 3d49886b1e3..b354e4ae6b1 100644
--- a/src/mongo/db/query/ce/ce_test_utils.h
+++ b/src/mongo/db/query/ce/ce_test_utils.h
@@ -109,9 +109,6 @@ protected:
// The number of records in the collection we are testing.
double _collCard;
-
-private:
- optimizer::OptPhaseManager getPhaseManager(size_t optimizationLevel) const;
};
} // namespace ce
diff --git a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp
index 11df74ff8e5..d212c32c5a7 100644
--- a/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp
+++ b/src/mongo/db/query/optimizer/cascades/cost_derivation.cpp
@@ -168,9 +168,9 @@ public:
double evalCost = childResult._cost;
if (!isTrivialExpr<EvalPath>(node.getProjection())) {
// Non-trivial projection.
- evalCost += kStartupCost + kEvalIncrementalCost * childResult._ce;
+ evalCost += kStartupCost + kEvalIncrementalCost * _cardinalityEstimate;
}
- return {evalCost, childResult._ce};
+ return {evalCost, _cardinalityEstimate};
}
CostAndCEInternal operator()(const ABT& /*n*/, const BinaryJoinNode& node) {
diff --git a/src/mongo/db/query/optimizer/cascades/implementers.cpp b/src/mongo/db/query/optimizer/cascades/implementers.cpp
index a5bc675695a..2a0c4cfaac0 100644
--- a/src/mongo/db/query/optimizer/cascades/implementers.cpp
+++ b/src/mongo/db/query/optimizer/cascades/implementers.cpp
@@ -348,7 +348,7 @@ public:
// via a physical scan even in the absence of indexes.
const IndexingRequirement& requirements = getPropertyConst<IndexingRequirement>(_physProps);
- const CandidateIndexMap& candidateIndexMap = node.getCandidateIndexMap();
+ const CandidateIndexes& candidateIndexes = node.getCandidateIndexes();
const IndexReqTarget indexReqTarget = requirements.getIndexReqTarget();
switch (indexReqTarget) {
case IndexReqTarget::Complete:
@@ -358,7 +358,7 @@ public:
break;
case IndexReqTarget::Index:
- if (candidateIndexMap.empty()) {
+ if (candidateIndexes.empty()) {
return;
}
[[fallthrough]];
@@ -372,10 +372,6 @@ public:
default:
MONGO_UNREACHABLE;
}
- const auto& satisfiedPartialIndexes =
- getPropertyConst<IndexingAvailability>(
- _memo.getGroup(requirements.getSatisfiedPartialIndexesGroupId())._logicalProperties)
- .getSatisfiedPartialIndexes();
const auto& requiredProjections =
getPropertyConst<ProjectionRequirement>(_physProps).getProjections();
@@ -398,10 +394,6 @@ public:
}
for (const auto& [key, req] : reqMap) {
- if (key.emptyPath()) {
- // We cannot satisfy without a field.
- return;
- }
if (key._projectionName != scanProjectionName) {
// We can only satisfy partial schema requirements using our root projection.
return;
@@ -434,7 +426,7 @@ public:
const auto& ceProperty = getPropertyConst<CardinalityEstimate>(_logicalProps);
const CEType currentGroupCE = ceProperty.getEstimate();
- const PartialSchemaKeyCE& partialSchemaKeyCEMap = ceProperty.getPartialSchemaKeyCEMap();
+ const PartialSchemaKeyCE& partialSchemaKeyCE = ceProperty.getPartialSchemaKeyCE();
if (indexReqTarget == IndexReqTarget::Index) {
ProjectionCollationSpec requiredCollation;
@@ -443,10 +435,18 @@ public:
getPropertyConst<CollationRequirement>(_physProps).getCollationSpec();
}
+ const auto& satisfiedPartialIndexes =
+ getPropertyConst<IndexingAvailability>(
+ _memo.getGroup(requirements.getSatisfiedPartialIndexesGroupId())
+ ._logicalProperties)
+ .getSatisfiedPartialIndexes();
+
// Consider all candidate indexes, and check if they satisfy the collation and
// distribution requirements.
- for (const auto& [indexDefName, candidateIndexEntry] : candidateIndexMap) {
+ for (const auto& candidateIndexEntry : node.getCandidateIndexes()) {
+ const auto& indexDefName = candidateIndexEntry._indexDefName;
const auto& indexDef = scanDef.getIndexDefs().at(indexDefName);
+
if (!indexDef.getPartialReqMap().empty() &&
(_hints._disableIndexes == DisableIndexOptions::DisablePartialOnly ||
satisfiedPartialIndexes.count(indexDefName) == 0)) {
@@ -481,45 +481,35 @@ public:
availableDirections._forward || availableDirections._backward);
auto indexProjectionMap = candidateIndexEntry._fieldProjectionMap;
- {
- // Remove unused projections from the field projection map.
- auto& fieldProjMap = indexProjectionMap._fieldProjections;
- for (auto it = fieldProjMap.begin(); it != fieldProjMap.end();) {
- const ProjectionName& projName = it->second;
- if (!requiredProjections.find(projName).second &&
- candidateIndexEntry._residualRequirementsTempProjections.count(
- projName) == 0) {
- fieldProjMap.erase(it++);
- } else {
- it++;
- }
- }
- }
+ auto residualReqs = candidateIndexEntry._residualRequirements;
+ removeRedundantResidualPredicates(
+ requiredProjections, residualReqs, indexProjectionMap);
CEType indexCE = currentGroupCE;
- ResidualRequirements residualRequirements;
- if (!candidateIndexEntry._residualRequirements.empty()) {
+ ResidualRequirementsWithCE residualReqsWithCE;
+ std::vector<SelectivityType> indexPredSels;
+ if (!residualReqs.empty()) {
PartialSchemaKeySet residualQueryKeySet;
- for (const auto& [residualKey, residualReq] :
- candidateIndexEntry._residualRequirements) {
- const auto& queryKey = candidateIndexEntry._residualKeyMap.at(residualKey);
- residualQueryKeySet.emplace(queryKey);
- const CEType ce = partialSchemaKeyCEMap.at(queryKey);
- residualRequirements.emplace_back(residualKey, residualReq, ce);
+ for (const auto& [residualKey, residualReq, entryIndex] : residualReqs) {
+ auto entryIt = reqMap.cbegin();
+ std::advance(entryIt, entryIndex);
+ residualQueryKeySet.emplace(entryIt->first);
+ residualReqsWithCE.emplace_back(
+ residualKey, residualReq, partialSchemaKeyCE.at(entryIndex).second);
}
if (scanGroupCE > 0.0) {
- std::vector<SelectivityType> indexPredSelectivities;
+ size_t entryIndex = 0;
for (const auto& [key, req] : reqMap) {
if (residualQueryKeySet.count(key) == 0) {
- const CEType ce = partialSchemaKeyCEMap.at(key);
- indexPredSelectivities.push_back(ce / scanGroupCE);
+ indexPredSels.push_back(partialSchemaKeyCE.at(entryIndex).second /
+ scanGroupCE);
}
+ entryIndex++;
}
- if (!indexPredSelectivities.empty()) {
- indexCE = scanGroupCE *
- ce::conjExponentialBackoff(std::move(indexPredSelectivities));
+ if (!indexPredSels.empty()) {
+ indexCE = scanGroupCE * ce::conjExponentialBackoff(indexPredSels);
}
}
}
@@ -529,9 +519,9 @@ public:
NodeCEMap nodeCEMap;
// TODO: consider pre-computing as part of the candidateIndexes structure.
- const auto singularInterval = MultiKeyIntervalReqExpr::getSingularDNF(intervals);
+ const auto singularInterval = CompoundIntervalReqExpr::getSingularDNF(intervals);
const bool needsUniqueStage =
- (!singularInterval || !areMultiKeyIntervalsEqualities(*singularInterval)) &&
+ (!singularInterval || !areCompoundIntervalsEqualities(*singularInterval)) &&
indexDef.isMultiKey() && requirements.getDedupRID();
indexProjectionMap._ridProjection =
@@ -558,7 +548,7 @@ public:
}
lowerPartialSchemaRequirements(
- indexCE, scanGroupCE, residualRequirements, physNode, nodeCEMap);
+ scanGroupCE, std::move(indexPredSels), residualReqsWithCE, physNode, nodeCEMap);
if (needsUniqueStage) {
// Insert unique stage if we need to, after the residual requirements.
@@ -571,6 +561,9 @@ public:
_queue, kDefaultPriority, std::move(physNode), {}, std::move(nodeCEMap));
}
} else {
+ const auto& scanParams = node.getScanParams();
+ tassert(6624102, "Empty scan params", scanParams);
+
bool canUseParallelScan = false;
if (!distributionsCompatible(indexReqTarget,
scanDef.getDistributionAndPaths(),
@@ -581,21 +574,14 @@ public:
return;
}
- FieldProjectionMap fieldProjectionMap;
+ FieldProjectionMap fieldProjectionMap = scanParams->_fieldProjectionMap;
+ ResidualRequirements residualReqs = scanParams->_residualRequirements;
+ removeRedundantResidualPredicates(
+ requiredProjections, residualReqs, fieldProjectionMap);
+
if (indexReqTarget == IndexReqTarget::Complete && needsRID) {
fieldProjectionMap._ridProjection = ridProjName;
}
-
- ProjectionRenames projectionRenames;
- ResidualRequirements residualRequirements;
- computePhysicalScanParams(_prefixId,
- reqMap,
- partialSchemaKeyCEMap,
- requiredProjections,
- residualRequirements,
- projectionRenames,
- fieldProjectionMap,
- requiresRootProjection);
if (requiresRootProjection) {
fieldProjectionMap._rootProjection = scanProjectionName;
}
@@ -622,12 +608,14 @@ public:
nodeCEMap.emplace(physNode.cast<Node>(), baseCE);
}
- applyProjectionRenames(std::move(projectionRenames), physNode, [&](const ABT& node) {
- nodeCEMap.emplace(node.cast<Node>(), baseCE);
- });
+ ResidualRequirementsWithCE residualReqsWithCE;
+ for (const auto& [residualKey, residualReq, entryIndex] : residualReqs) {
+ residualReqsWithCE.emplace_back(
+ residualKey, residualReq, partialSchemaKeyCE.at(entryIndex).second);
+ }
lowerPartialSchemaRequirements(
- baseCE, scanGroupCE, residualRequirements, physNode, nodeCEMap);
+ baseCE, {} /*indexPredSels*/, residualReqsWithCE, physNode, nodeCEMap);
optimizeChildrenNoAssert(
_queue, kDefaultPriority, std::move(physNode), {}, std::move(nodeCEMap));
}
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index e6903a63cde..24d970f4ea6 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -573,18 +573,20 @@ static boost::optional<ABT> mergeSargableNodes(
const ScanDefinition& scanDef =
ctx.getMetadata()._scanDefs.at(indexingAvailability.getScanDefName());
- auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
- scanProjName,
- mergedReqs,
- scanDef,
- ctx.getHints()._fastIndexNullHandling,
- hasEmptyInterval);
+ auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(),
+ scanProjName,
+ mergedReqs,
+ scanDef,
+ ctx.getHints()._fastIndexNullHandling,
+ hasEmptyInterval);
if (hasEmptyInterval) {
return createEmptyValueScanNode(ctx);
}
+ auto scanParams = computeScanParams(ctx.getPrefixId(), mergedReqs, scanProjName);
ABT result = make<SargableNode>(std::move(mergedReqs),
- std::move(candidateIndexMap),
+ std::move(candidateIndexes),
+ std::move(scanParams),
IndexReqTarget::Complete,
belowNode.getChild());
applyProjectionRenames(std::move(projectionRenames), result);
@@ -710,46 +712,49 @@ static void convertFilterToSargableNode(ABT::reference_type node,
return;
}
- auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
- scanProjName,
- conversion->_reqMap,
- scanDef,
- ctx.getHints()._fastIndexNullHandling,
- hasEmptyInterval);
+ auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(),
+ scanProjName,
+ conversion->_reqMap,
+ scanDef,
+ ctx.getHints()._fastIndexNullHandling,
+ hasEmptyInterval);
if (hasEmptyInterval) {
addEmptyValueScanNode(ctx);
- } else {
- ABT sargableNode = make<SargableNode>(std::move(conversion->_reqMap),
- std::move(candidateIndexMap),
- IndexReqTarget::Complete,
- filterNode.getChild());
+ return;
+ }
- if (conversion->_retainPredicate) {
- const GroupIdType childGroupId =
- filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId();
- if (childGroupId == indexingAvailability.getScanGroupId()) {
- // Add node directly.
- addSargableChildNode(node, std::move(sargableNode), ctx);
- } else {
- // Look at the child group to find SargableNodes and attempt to combine.
- const auto& logicalNodes = ctx.getMemo().getGroup(childGroupId)._logicalNodes;
- for (const ABT& childNode : logicalNodes.getVector()) {
- if (auto childSargableNode = childNode.cast<SargableNode>();
- childSargableNode != nullptr) {
- const auto& result = mergeSargableNodes(indexingAvailability,
- scanDef.getNonMultiKeyPathSet(),
- *sargableNode.cast<SargableNode>(),
- *childSargableNode,
- ctx);
- if (result) {
- addSargableChildNode(node, std::move(*result), ctx);
- }
- }
+ auto scanParams = computeScanParams(ctx.getPrefixId(), conversion->_reqMap, scanProjName);
+ ABT sargableNode = make<SargableNode>(std::move(conversion->_reqMap),
+ std::move(candidateIndexes),
+ std::move(scanParams),
+ IndexReqTarget::Complete,
+ filterNode.getChild());
+ if (!conversion->_retainPredicate) {
+ ctx.addNode(sargableNode, isSubstitution);
+ return;
+ }
+
+ const GroupIdType childGroupId =
+ filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId();
+ if (childGroupId == indexingAvailability.getScanGroupId()) {
+ // Add node directly.
+ addSargableChildNode(node, std::move(sargableNode), ctx);
+ } else {
+ // Look at the child group to find SargableNodes and attempt to combine.
+ const auto& logicalNodes = ctx.getMemo().getGroup(childGroupId)._logicalNodes;
+ for (const ABT& childNode : logicalNodes.getVector()) {
+ if (auto childSargableNode = childNode.cast<SargableNode>();
+ childSargableNode != nullptr) {
+ const auto& result = mergeSargableNodes(indexingAvailability,
+ scanDef.getNonMultiKeyPathSet(),
+ *sargableNode.cast<SargableNode>(),
+ *childSargableNode,
+ ctx);
+ if (result) {
+ addSargableChildNode(node, std::move(*result), ctx);
}
}
- } else {
- ctx.addNode(sargableNode, isSubstitution);
}
}
}
@@ -895,22 +900,25 @@ struct SubstituteConvert<EvaluationNode> {
}
bool hasEmptyInterval = false;
- auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
- scanProjName,
- conversion->_reqMap,
- scanDef,
- ctx.getHints()._fastIndexNullHandling,
- hasEmptyInterval);
+ auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(),
+ scanProjName,
+ conversion->_reqMap,
+ scanDef,
+ ctx.getHints()._fastIndexNullHandling,
+ hasEmptyInterval);
if (hasEmptyInterval) {
addEmptyValueScanNode(ctx);
- } else {
- ABT newNode = make<SargableNode>(std::move(conversion->_reqMap),
- std::move(candidateIndexMap),
- IndexReqTarget::Complete,
- evalNode.getChild());
- ctx.addNode(newNode, true /*substitute*/);
+ return;
}
+
+ auto scanParams = computeScanParams(ctx.getPrefixId(), conversion->_reqMap, scanProjName);
+ ABT newNode = make<SargableNode>(std::move(conversion->_reqMap),
+ std::move(candidateIndexes),
+ std::move(scanParams),
+ IndexReqTarget::Complete,
+ evalNode.getChild());
+ ctx.addNode(newNode, true /*substitute*/);
}
};
@@ -1076,25 +1084,25 @@ struct ExploreConvert<SargableNode> {
}
bool hasEmptyLeftInterval = false;
- auto leftCandidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
- scanProjectionName,
- leftReqs,
- scanDef,
- fastIndexNullHandling,
- hasEmptyLeftInterval);
- if (isIndex && leftCandidateIndexMap.empty()) {
+ auto leftCandidateIndexes = computeCandidateIndexes(ctx.getPrefixId(),
+ scanProjectionName,
+ leftReqs,
+ scanDef,
+ fastIndexNullHandling,
+ hasEmptyLeftInterval);
+ if (isIndex && leftCandidateIndexes.empty()) {
// Reject rewrite.
continue;
}
bool hasEmptyRightInterval = false;
- auto rightCandidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
- scanProjectionName,
- rightReqs,
- scanDef,
- fastIndexNullHandling,
- hasEmptyRightInterval);
- if (isIndex && rightCandidateIndexMap.empty()) {
+ auto rightCandidateIndexes = computeCandidateIndexes(ctx.getPrefixId(),
+ scanProjectionName,
+ rightReqs,
+ scanDef,
+ fastIndexNullHandling,
+ hasEmptyRightInterval);
+ if (isIndex && rightCandidateIndexes.empty()) {
// With empty candidate map, reject only if we cannot implement as Seek.
continue;
}
@@ -1104,13 +1112,18 @@ struct ExploreConvert<SargableNode> {
ABT scanDelegator = make<MemoLogicalDelegatorNode>(scanGroupId);
ABT leftChild = make<SargableNode>(std::move(leftReqs),
- std::move(leftCandidateIndexMap),
+ std::move(leftCandidateIndexes),
+ boost::none,
IndexReqTarget::Index,
scanDelegator);
+
+ auto rightScanParams =
+ computeScanParams(ctx.getPrefixId(), rightReqs, scanProjectionName);
ABT rightChild = rightReqs.empty()
? scanDelegator
: make<SargableNode>(std::move(rightReqs),
- std::move(rightCandidateIndexMap),
+ std::move(rightCandidateIndexes),
+ std::move(rightScanParams),
isIndex ? IndexReqTarget::Index : IndexReqTarget::Seek,
scanDelegator);
@@ -1249,13 +1262,13 @@ struct ExploreReorder<EvaluationNode, RIDIntersectNode> {
}
};
-void LogicalRewriter::registerRewrite(const LogicalRewriteType rewriteType, RewriteFn fn) {
- if (_activeRewriteSet.find(rewriteType) != _activeRewriteSet.cend()) {
- _rewriteMap.emplace(rewriteType, fn);
- }
-}
-
void LogicalRewriter::initializeRewrites() {
+ const auto registerRewrite = [& rewriteMap = _rewriteMap](const LogicalRewriteType rewriteType,
+ RewriteFn fn) {
+ const bool inserted = rewriteMap.emplace(rewriteType, fn).second;
+ invariant(inserted);
+ };
+
registerRewrite(
LogicalRewriteType::FilterEvaluationReorder,
&LogicalRewriter::bindAboveBelow<FilterNode, EvaluationNode, SubstituteReorder>);
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
index 97684ec992d..eb595e26bd8 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
@@ -106,7 +106,6 @@ private:
template <class Type, template <class> class R>
void bindSingleNode(MemoLogicalNodeId nodeMemoId);
- void registerRewrite(LogicalRewriteType rewriteType, RewriteFn fn);
void initializeRewrites();
static RewriteSet _explorationSet;
diff --git a/src/mongo/db/query/optimizer/cascades/memo.cpp b/src/mongo/db/query/optimizer/cascades/memo.cpp
index e1a0f4b0783..d827d450191 100644
--- a/src/mongo/db/query/optimizer/cascades/memo.cpp
+++ b/src/mongo/db/query/optimizer/cascades/memo.cpp
@@ -658,43 +658,17 @@ void Memo::estimateCE(const GroupIdType groupId, const bool useHeuristicCE) {
auto ceProp = properties::CardinalityEstimate(estimate);
if (auto sargablePtr = nodeRef.cast<SargableNode>(); sargablePtr != nullptr) {
- auto& partialSchemaKeyCEMap = ceProp.getPartialSchemaKeyCEMap();
- invariant(partialSchemaKeyCEMap.empty());
-
- struct CEEntry {
- PartialSchemaKey _key;
- CEType _ce;
- size_t _count = 1;
- };
- boost::optional<CEEntry> prevEntry;
-
- const auto addEntryFn = [&]() {
- // We take a geometric average of the entries with the same key.
- partialSchemaKeyCEMap.emplace(prevEntry->_key,
- std::pow(prevEntry->_ce, 1.0 / prevEntry->_count));
- };
+ auto& partialSchemaKeyCE = ceProp.getPartialSchemaKeyCE();
+ invariant(partialSchemaKeyCE.empty());
for (const auto& [key, req] : sargablePtr->getReqMap()) {
ABT singularReq = make<SargableNode>(PartialSchemaRequirements{{key, req}},
- CandidateIndexMap{},
+ CandidateIndexes{},
+ ScanParams{},
sargablePtr->getTarget(),
sargablePtr->getChild());
const CEType singularEst = ceEstimator->deriveCE(*this, props, singularReq.ref());
-
- if (prevEntry) {
- if (prevEntry->_key == key) {
- prevEntry->_ce *= singularEst;
- prevEntry->_count++;
- } else {
- addEntryFn();
- prevEntry = {{key, singularEst}};
- };
- } else {
- prevEntry = {{key, singularEst}};
- }
- }
- if (prevEntry) {
- addEntryFn();
+ partialSchemaKeyCE.emplace_back(key, singularEst);
}
}
diff --git a/src/mongo/db/query/optimizer/explain.cpp b/src/mongo/db/query/optimizer/explain.cpp
index 2d681ccbd6f..bb2973c31c3 100644
--- a/src/mongo/db/query/optimizer/explain.cpp
+++ b/src/mongo/db/query/optimizer/explain.cpp
@@ -559,7 +559,7 @@ public:
} else if constexpr (version == ExplainVersion::V3) {
printer.fieldName(name).print(flag);
} else {
- static_assert("Unknown version");
+ MONGO_UNREACHABLE;
}
}
@@ -602,7 +602,7 @@ public:
} else if constexpr (version == ExplainVersion::V3) {
printer.separator(" ").fieldName(name).print(child);
} else {
- static_assert("Unknown version");
+ MONGO_UNREACHABLE;
}
}
@@ -638,7 +638,7 @@ public:
}
printer.fieldName("fieldProjectionMap").print(local);
} else {
- static_assert("Unknown version");
+ MONGO_UNREACHABLE;
}
}
@@ -735,7 +735,7 @@ public:
.fieldName("highBound")
.print(highBoundPrinter);
} else {
- static_assert("Version not implemented");
+ MONGO_UNREACHABLE;
}
}
@@ -750,7 +750,7 @@ public:
return intervalPrinter.print(intervalExpr);
}
- void printInterval(ExplainPrinter& printer, const MultiKeyIntervalRequirement& interval) {
+ void printInterval(ExplainPrinter& printer, const CompoundIntervalRequirement& interval) {
if constexpr (version < ExplainVersion::V3) {
bool first = true;
for (const auto& entry : interval) {
@@ -770,7 +770,7 @@ public:
}
printer.print(printers);
} else {
- static_assert("Version not implemented");
+ MONGO_UNREACHABLE;
}
}
@@ -817,7 +817,7 @@ public:
printer.print(childResults);
return printer;
} else {
- static_assert("Version not implemented");
+ MONGO_UNREACHABLE;
}
}
@@ -1005,17 +1005,48 @@ public:
parent.fieldName("requirementsMap").print(printers);
}
+ void printResidualRequirements(ExplainPrinter& parent,
+ const ResidualRequirements& residualReqs) {
+ std::vector<ExplainPrinter> printers;
+ for (const auto& [key, req, entryIndex] : residualReqs) {
+ ExplainPrinter local;
+
+ local.fieldName("refProjection").print(key._projectionName).separator(", ");
+ ExplainPrinter pathPrinter = generate(key._path);
+ local.fieldName("path").separator("'").printSingleLevel(pathPrinter).separator("', ");
+
+ if (req.hasBoundProjectionName()) {
+ local.fieldName("boundProjection")
+ .print(req.getBoundProjectionName())
+ .separator(", ");
+ }
+
+ local.fieldName("intervals");
+ {
+ ExplainPrinter intervals = printIntervalExpr(req.getIntervals());
+ local.printSingleLevel(intervals, "" /*singleLevelSpacer*/);
+ }
+ local.separator(", ").fieldName("entryIndex").print(entryIndex);
+
+ printers.push_back(std::move(local));
+ }
+
+ parent.fieldName("residualReqs").print(printers);
+ }
+
ExplainPrinter transport(const SargableNode& node,
ExplainPrinter childResult,
ExplainPrinter bindResult,
ExplainPrinter refsResult) {
+ const auto& scanParams = node.getScanParams();
+
ExplainPrinter printer("Sargable");
maybePrintProps(printer, node);
printer.separator(" [")
.fieldName("target", ExplainVersion::V3)
.print(IndexReqTargetEnum::toString[static_cast<int>(node.getTarget())])
.separator("]")
- .setChildCount(5);
+ .setChildCount(scanParams ? 6 : 5);
if constexpr (version < ExplainVersion::V3) {
ExplainPrinter local;
@@ -1024,119 +1055,109 @@ public:
} else if constexpr (version == ExplainVersion::V3) {
printPartialSchemaReqMap(printer, node.getReqMap());
} else {
- static_assert("Unknown version");
+ MONGO_UNREACHABLE;
}
- std::vector<ExplainPrinter> candidateIndexesPrinters;
- size_t candidateIndex = 0;
- for (const auto& [indexDefName, candidateIndexEntry] : node.getCandidateIndexMap()) {
- candidateIndex++;
- ExplainPrinter local;
- local.fieldName("candidateId")
- .print(candidateIndex)
- .separator(", ")
- .fieldName("indexDefName", ExplainVersion::V3)
- .print(indexDefName)
- .separator(", ");
+ {
+ std::vector<ExplainPrinter> candidateIndexesPrinters;
+ for (size_t index = 0; index < node.getCandidateIndexes().size(); index++) {
+ const CandidateIndexEntry& candidateIndexEntry =
+ node.getCandidateIndexes().at(index);
- local.separator("{");
- printFieldProjectionMap(local, candidateIndexEntry._fieldProjectionMap);
- local.separator("}, {");
+ ExplainPrinter local;
+ local.fieldName("candidateId")
+ .print(index + 1)
+ .separator(", ")
+ .fieldName("indexDefName", ExplainVersion::V3)
+ .print(candidateIndexEntry._indexDefName)
+ .separator(", ");
- {
- std::set<size_t> orderedFields;
- for (const size_t fieldId : candidateIndexEntry._fieldsToCollate) {
- orderedFields.insert(fieldId);
- }
+ local.separator("{");
+ printFieldProjectionMap(local, candidateIndexEntry._fieldProjectionMap);
+ local.separator("}, {");
- if constexpr (version < ExplainVersion::V3) {
- bool first = true;
- for (const size_t fieldId : orderedFields) {
- if (first) {
- first = false;
- } else {
- local.print(", ");
+ {
+ std::set<size_t> orderedFields;
+ for (const size_t fieldId : candidateIndexEntry._fieldsToCollate) {
+ orderedFields.insert(fieldId);
+ }
+
+ if constexpr (version < ExplainVersion::V3) {
+ bool first = true;
+ for (const size_t fieldId : orderedFields) {
+ if (first) {
+ first = false;
+ } else {
+ local.print(", ");
+ }
+ local.print(fieldId);
+ }
+ } else if constexpr (version == ExplainVersion::V3) {
+ std::vector<ExplainPrinter> printers;
+ for (const size_t fieldId : orderedFields) {
+ ExplainPrinter local1;
+ local1.print(fieldId);
+ printers.push_back(std::move(local1));
}
- local.print(fieldId);
+ local.fieldName("fieldsToCollate").print(printers);
+ } else {
+ MONGO_UNREACHABLE;
}
- } else if constexpr (version == ExplainVersion::V3) {
- std::vector<ExplainPrinter> printers;
- for (const size_t fieldId : orderedFields) {
- ExplainPrinter local1;
- local1.print(fieldId);
- printers.push_back(std::move(local1));
+ }
+
+ local.separator("}, ").fieldName("intervals", ExplainVersion::V3);
+ {
+ IntervalPrinter<CompoundIntervalReqExpr> intervalPrinter(*this);
+ ExplainPrinter intervals =
+ intervalPrinter.print(candidateIndexEntry._intervals);
+ local.printSingleLevel(intervals, "" /*singleLevelSpacer*/);
+ }
+
+ if (const auto& residualReqs = candidateIndexEntry._residualRequirements;
+ !residualReqs.empty()) {
+ if constexpr (version < ExplainVersion::V3) {
+ ExplainPrinter residualReqMapPrinter;
+ printResidualRequirements(residualReqMapPrinter, residualReqs);
+ local.print(residualReqMapPrinter);
+ } else if (version == ExplainVersion::V3) {
+ printResidualRequirements(local, residualReqs);
+ } else {
+ MONGO_UNREACHABLE;
}
- local.fieldName("fieldsToCollate").print(printers);
- } else {
- static_assert("Unknown version");
}
- }
- local.separator("}, ").fieldName("intervals", ExplainVersion::V3);
- {
- IntervalPrinter<MultiKeyIntervalReqExpr> intervalPrinter(*this);
- ExplainPrinter intervals = intervalPrinter.print(candidateIndexEntry._intervals);
- local.printSingleLevel(intervals, "" /*singleLevelSpacer*/);
+ candidateIndexesPrinters.push_back(std::move(local));
}
+ ExplainPrinter candidateIndexesPrinter;
+ candidateIndexesPrinter.fieldName("candidateIndexes").print(candidateIndexesPrinters);
+ printer.printAppend(candidateIndexesPrinter);
+ }
- if (!candidateIndexEntry._residualRequirements.empty()) {
+ if (scanParams) {
+ ExplainPrinter local;
+ local.separator("{");
+ printFieldProjectionMap(local, scanParams->_fieldProjectionMap);
+ local.separator("}");
+
+ if (const auto& residualReqs = scanParams->_residualRequirements;
+ !residualReqs.empty()) {
if constexpr (version < ExplainVersion::V3) {
ExplainPrinter residualReqMapPrinter;
- printPartialSchemaReqMap(residualReqMapPrinter,
- candidateIndexEntry._residualRequirements);
+ printResidualRequirements(residualReqMapPrinter, residualReqs);
local.print(residualReqMapPrinter);
} else if (version == ExplainVersion::V3) {
- printPartialSchemaReqMap(local, candidateIndexEntry._residualRequirements);
+ printResidualRequirements(local, residualReqs);
} else {
- static_assert("Unknown version");
- }
- }
-
- if (!candidateIndexEntry._residualKeyMap.empty()) {
- std::vector<ExplainPrinter> residualKeyMapPrinters;
- for (const auto& [queryKey, residualKey] : candidateIndexEntry._residualKeyMap) {
- ExplainPrinter local1;
-
- ExplainPrinter pathPrinter = generate(queryKey._path);
- local1.fieldName("queryRefProjection")
- .print(queryKey._projectionName)
- .separator(", ")
- .fieldName("queryPath")
- .separator("'")
- .printSingleLevel(pathPrinter)
- .separator("', ")
- .fieldName("residualRefProjection")
- .print(residualKey._projectionName)
- .separator(", ");
-
- ExplainPrinter pathPrinter1 = generate(residualKey._path);
- local1.fieldName("residualPath")
- .separator("'")
- .printSingleLevel(pathPrinter1)
- .separator("'");
-
- residualKeyMapPrinters.push_back(std::move(local1));
+ MONGO_UNREACHABLE;
}
-
- local.fieldName("residualKeyMap").print(residualKeyMapPrinters);
-
- std::vector<ExplainPrinter> projNamePrinters;
- for (const ProjectionName& projName :
- candidateIndexEntry._residualRequirementsTempProjections) {
- ExplainPrinter local1;
- local1.print(projName);
- projNamePrinters.push_back(std::move(local1));
- }
- local.fieldName("tempProjections").print(projNamePrinters);
}
- candidateIndexesPrinters.push_back(std::move(local));
+ ExplainPrinter scanParamsPrinter;
+ scanParamsPrinter.fieldName("scanParams").print(local);
+ printer.printAppend(scanParamsPrinter);
}
- ExplainPrinter candidateIndexesPrinter;
- candidateIndexesPrinter.fieldName("candidateIndexes").print(candidateIndexesPrinters);
- printer.printAppend(candidateIndexesPrinter)
- .fieldName("bindings", ExplainVersion::V3)
+ printer.fieldName("bindings", ExplainVersion::V3)
.print(bindResult)
.fieldName("references", ExplainVersion::V3)
.print(refsResult)
@@ -1206,7 +1227,7 @@ public:
}
printer.fieldName("correlatedProjections").print(printers);
} else {
- static_assert("Version not implemented");
+ MONGO_UNREACHABLE;
}
printer.separator("]")
@@ -1243,7 +1264,7 @@ public:
}
printer.print(printers);
} else {
- static_assert("Version not implemented");
+ MONGO_UNREACHABLE;
}
}
@@ -1301,7 +1322,7 @@ public:
}
collationPrinter.print(printers);
} else {
- static_assert("Version not implemented");
+ MONGO_UNREACHABLE;
}
printer.setChildCount(4)
@@ -1583,9 +1604,10 @@ public:
cePrinter.fieldName("ce").print(prop.getEstimate());
fieldPrinters.push_back(std::move(cePrinter));
- if (!prop.getPartialSchemaKeyCEMap().empty()) {
+ if (const auto& partialSchemaKeyCE = prop.getPartialSchemaKeyCE();
+ !partialSchemaKeyCE.empty()) {
std::vector<ExplainPrinter> reqPrinters;
- for (const auto& [key, ce] : prop.getPartialSchemaKeyCEMap()) {
+ for (const auto& [key, ce] : partialSchemaKeyCE) {
ExplainGeneratorTransporter<version> gen;
ExplainPrinter pathPrinter = gen.generate(key._path);
@@ -2027,7 +2049,7 @@ public:
}
printer.fieldName("projections").print(printers);
} else {
- static_assert("Unknown version");
+ MONGO_UNREACHABLE;
}
}
@@ -2072,7 +2094,7 @@ public:
} else if constexpr (version == ExplainVersion::V3) {
printer.fieldName("maxDepth", ExplainVersion::V3).print(path.getMaxDepth());
} else {
- static_assert("Unknown version");
+ MONGO_UNREACHABLE;
}
printer.separator("]")
diff --git a/src/mongo/db/query/optimizer/index_bounds.cpp b/src/mongo/db/query/optimizer/index_bounds.cpp
index 58209ff8e75..908595bc5de 100644
--- a/src/mongo/db/query/optimizer/index_bounds.cpp
+++ b/src/mongo/db/query/optimizer/index_bounds.cpp
@@ -117,10 +117,6 @@ bool PartialSchemaKey::operator==(const PartialSchemaKey& other) const {
return _projectionName == other._projectionName && _path == other._path;
}
-bool PartialSchemaKey::emptyPath() const {
- return _path.is<PathIdentity>();
-}
-
bool isIntervalReqFullyOpenDNF(const IntervalReqExpr::Node& n) {
if (auto singular = IntervalReqExpr::getSingularDNF(n); singular && singular->isFullyOpen()) {
return true;
@@ -218,19 +214,42 @@ bool PartialSchemaKeyLessComparator::operator()(const PartialSchemaKey& k1,
ResidualRequirement::ResidualRequirement(PartialSchemaKey key,
PartialSchemaRequirement req,
- CEType ce)
+ size_t entryIndex)
+ : _key(std::move(key)), _req(std::move(req)), _entryIndex(entryIndex) {}
+
+bool ResidualRequirement::operator==(const ResidualRequirement& other) const {
+ return _key == other._key && _req == other._req && _entryIndex == other._entryIndex;
+}
+
+ResidualRequirementWithCE::ResidualRequirementWithCE(PartialSchemaKey key,
+ PartialSchemaRequirement req,
+ CEType ce)
: _key(std::move(key)), _req(std::move(req)), _ce(ce) {}
+CandidateIndexEntry::CandidateIndexEntry(std::string indexDefName)
+ : _indexDefName(std::move(indexDefName)),
+ _fieldProjectionMap(),
+ _intervals(CompoundIntervalReqExpr::makeSingularDNF()),
+ _residualRequirements(),
+ _fieldsToCollate(),
+ _intervalPrefixSize(0) {}
+
bool CandidateIndexEntry::operator==(const CandidateIndexEntry& other) const {
- return _fieldProjectionMap == other._fieldProjectionMap && _intervals == other._intervals &&
+ return _indexDefName == other._indexDefName &&
+ _fieldProjectionMap == other._fieldProjectionMap && _intervals == other._intervals &&
_residualRequirements == other._residualRequirements &&
_fieldsToCollate == other._fieldsToCollate &&
_intervalPrefixSize == other._intervalPrefixSize;
}
+bool ScanParams::operator==(const ScanParams& other) const {
+ return _fieldProjectionMap == other._fieldProjectionMap &&
+ _residualRequirements == other._residualRequirements;
+}
+
IndexSpecification::IndexSpecification(std::string scanDefName,
std::string indexDefName,
- MultiKeyIntervalRequirement interval,
+ CompoundIntervalRequirement interval,
bool reverseOrder)
: _scanDefName(std::move(scanDefName)),
_indexDefName(std::move(indexDefName)),
@@ -250,7 +269,7 @@ const std::string& IndexSpecification::getIndexDefName() const {
return _indexDefName;
}
-const MultiKeyIntervalRequirement& IndexSpecification::getInterval() const {
+const CompoundIntervalRequirement& IndexSpecification::getInterval() const {
return _interval;
}
diff --git a/src/mongo/db/query/optimizer/index_bounds.h b/src/mongo/db/query/optimizer/index_bounds.h
index 79bffce2e1d..547a278609f 100644
--- a/src/mongo/db/query/optimizer/index_bounds.h
+++ b/src/mongo/db/query/optimizer/index_bounds.h
@@ -85,8 +85,6 @@ struct PartialSchemaKey {
bool operator==(const PartialSchemaKey& other) const;
- bool emptyPath() const;
-
// Referred, or input projection name.
ProjectionName _projectionName;
@@ -141,64 +139,83 @@ using PartialSchemaRequirements =
std::multimap<PartialSchemaKey, PartialSchemaRequirement, PartialSchemaKeyLessComparator>;
/**
- * Used to track cardinality estimates per predicate inside a PartialSchemaRequirement.
- * We currently have a single estimate per PartialSchemaKey for all matching entries in the primary
- * map.
- * TODO: SERVER-68092 Relax constraint described above.
+ * Used to track cardinality estimates per predicate inside a PartialSchemaRequirement. The order of
+ * estimates is the same as the order in the primary PartialSchemaRequirements map.
*/
-using PartialSchemaKeyCE = std::map<PartialSchemaKey, CEType, PartialSchemaKeyLessComparator>;
-using ResidualKeyMap = std::map<PartialSchemaKey, PartialSchemaKey, PartialSchemaKeyLessComparator>;
+using PartialSchemaKeyCE = std::vector<std::pair<PartialSchemaKey, CEType>>;
using PartialSchemaKeySet = std::set<PartialSchemaKey, PartialSchemaKeyLessComparator>;
// Requirements which are not satisfied directly by an IndexScan, PhysicalScan or Seek (e.g. using
-// an index field, or scan field). They are intended to be sorted in their containing vector from
-// most to least selective.
+// an index field, or scan field). The index refers to the underlying entry in the
+// PartialSchemaRequirement map.
struct ResidualRequirement {
- ResidualRequirement(PartialSchemaKey key, PartialSchemaRequirement req, CEType ce);
+ ResidualRequirement(PartialSchemaKey key, PartialSchemaRequirement req, size_t entryIndex);
+
+ bool operator==(const ResidualRequirement& other) const;
PartialSchemaKey _key;
PartialSchemaRequirement _req;
- CEType _ce;
+ size_t _entryIndex;
};
using ResidualRequirements = std::vector<ResidualRequirement>;
+struct ResidualRequirementWithCE {
+ ResidualRequirementWithCE(PartialSchemaKey key, PartialSchemaRequirement req, CEType ce);
+
+ PartialSchemaKey _key;
+ PartialSchemaRequirement _req;
+ CEType _ce;
+};
+using ResidualRequirementsWithCE = std::vector<ResidualRequirementWithCE>;
+
+
// A sequence of intervals corresponding, one for each index key.
-using MultiKeyIntervalRequirement = std::vector<IntervalRequirement>;
+using CompoundIntervalRequirement = std::vector<IntervalRequirement>;
-// Multi-key intervals represent unions and conjunctions of individual multi-key intervals.
-using MultiKeyIntervalReqExpr = BoolExpr<MultiKeyIntervalRequirement>;
+// Unions and conjunctions of individual compound intervals.
+using CompoundIntervalReqExpr = BoolExpr<CompoundIntervalRequirement>;
// Used to pre-compute candidate indexes for SargableNodes.
struct CandidateIndexEntry {
+ CandidateIndexEntry(std::string indexDefName);
+
bool operator==(const CandidateIndexEntry& other) const;
- FieldProjectionMap _fieldProjectionMap;
- MultiKeyIntervalReqExpr::Node _intervals;
+ std::string _indexDefName;
- PartialSchemaRequirements _residualRequirements;
- // Projections needed to evaluate residual requirements.
- ProjectionNameSet _residualRequirementsTempProjections;
+ FieldProjectionMap _fieldProjectionMap;
+ CompoundIntervalReqExpr::Node _intervals;
- // Used for CE. Mapping for residual requirement key to query key.
- ResidualKeyMap _residualKeyMap;
+ // Requirements which are not satisfied directly by the IndexScan. They are intended to be
+ // sorted in their containing vector from most to least selective.
+ ResidualRequirements _residualRequirements;
// We have equalities on those index fields, and thus do not consider for collation
// requirements.
- // TODO: consider a bitset.
opt::unordered_set<size_t> _fieldsToCollate;
// Length of prefix of fields with applied intervals.
size_t _intervalPrefixSize;
};
-using CandidateIndexMap = std::map<std::string /*index name*/, CandidateIndexEntry>;
+struct ScanParams {
+ bool operator==(const ScanParams& other) const;
+
+ FieldProjectionMap _fieldProjectionMap;
+
+ // Requirements which are not satisfied directly by a PhysicalScan or Seek. They are intended to
+ // be sorted in their containing vector from most to least selective.
+ ResidualRequirements _residualRequirements;
+};
+
+using CandidateIndexes = std::vector<CandidateIndexEntry>;
class IndexSpecification {
public:
IndexSpecification(std::string scanDefName,
std::string indexDefName,
- MultiKeyIntervalRequirement interval,
+ CompoundIntervalRequirement interval,
bool reverseOrder);
bool operator==(const IndexSpecification& other) const;
@@ -206,7 +223,7 @@ public:
const std::string& getScanDefName() const;
const std::string& getIndexDefName() const;
- const MultiKeyIntervalRequirement& getInterval() const;
+ const CompoundIntervalRequirement& getInterval() const;
bool isReverseOrder() const;
@@ -218,7 +235,7 @@ private:
const std::string _indexDefName;
// The index interval.
- MultiKeyIntervalRequirement _interval;
+ CompoundIntervalRequirement _interval;
// Do we reverse the index order.
const bool _reverseOrder;
diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
index 62b5467c2fe..d5664d3cc2b 100644
--- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
@@ -1120,10 +1120,15 @@ TEST(LogicalRewriter, FilterUnionUnionPushdown) {
"| | [ptest]\n"
"| | Source []\n"
"| Sargable [Complete]\n"
- "| | | | | requirementsMap: \n"
- "| | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] "
+ "| | | | | | requirementsMap: \n"
+ "| | | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] "
"PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n"
- "| | | | candidateIndexes: \n"
+ "| | | | | candidateIndexes: \n"
+ "| | | | scanParams: \n"
+ "| | | | {'a': evalTemp_0}\n"
+ "| | | | residualReqs: \n"
+ "| | | | refProjection: evalTemp_0, path: 'PathTraverse [1] "
+ "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n"
"| | | BindBlock:\n"
"| | RefBlock: \n"
"| | Variable [ptest]\n"
@@ -1136,10 +1141,15 @@ TEST(LogicalRewriter, FilterUnionUnionPushdown) {
"| | [ptest]\n"
"| | Source []\n"
"| Sargable [Complete]\n"
- "| | | | | requirementsMap: \n"
- "| | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] "
+ "| | | | | | requirementsMap: \n"
+ "| | | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] "
"PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n"
- "| | | | candidateIndexes: \n"
+ "| | | | | candidateIndexes: \n"
+ "| | | | scanParams: \n"
+ "| | | | {'a': evalTemp_2}\n"
+ "| | | | residualReqs: \n"
+ "| | | | refProjection: evalTemp_2, path: 'PathTraverse [1] "
+ "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n"
"| | | BindBlock:\n"
"| | RefBlock: \n"
"| | Variable [ptest]\n"
@@ -1148,11 +1158,15 @@ TEST(LogicalRewriter, FilterUnionUnionPushdown) {
"| [ptest]\n"
"| Source []\n"
"Sargable [Complete]\n"
- "| | | | requirementsMap: \n"
- "| | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] "
- "PathIdentity "
- "[]', intervals: {{{[Const [1], Const [1]]}}}\n"
- "| | | candidateIndexes: \n"
+ "| | | | | requirementsMap: \n"
+ "| | | | | refProjection: ptest, path: 'PathGet [a] PathTraverse [1] "
+ "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n"
+ "| | | | candidateIndexes: \n"
+ "| | | scanParams: \n"
+ "| | | {'a': evalTemp_1}\n"
+ "| | | residualReqs: \n"
+ "| | | refProjection: evalTemp_1, path: 'PathTraverse [1] PathIdentity "
+ "[]', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n"
"| | BindBlock:\n"
"| RefBlock: \n"
"| Variable [ptest]\n"
@@ -1254,10 +1268,12 @@ TEST(LogicalRewriter, UnionPreservesCommonLogicalProps) {
" | logicalNodes: \n"
" | logicalNodeId: 0\n"
" | Sargable [Complete]\n"
- " | | | | | requirementsMap: \n"
- " | | | | | refProjection: ptest1, path: 'PathGet [a] "
+ " | | | | | | requirementsMap: \n"
+ " | | | | | | refProjection: ptest1, path: 'PathGet [a] "
"PathIdentity []', boundProjection: a, intervals: {{{[Const [minKey], Const [maxKey]]}}}\n"
- " | | | | candidateIndexes: \n"
+ " | | | | | candidateIndexes: \n"
+ " | | | | scanParams: \n"
+ " | | | | {'a': a}\n"
" | | | BindBlock:\n"
" | | | [a]\n"
" | | | Source []\n"
@@ -1314,10 +1330,12 @@ TEST(LogicalRewriter, UnionPreservesCommonLogicalProps) {
" | logicalNodes: \n"
" | logicalNodeId: 0\n"
" | Sargable [Complete]\n"
- " | | | | | requirementsMap: \n"
- " | | | | | refProjection: ptest2, path: 'PathGet [a] "
+ " | | | | | | requirementsMap: \n"
+ " | | | | | | refProjection: ptest2, path: 'PathGet [a] "
"PathIdentity []', boundProjection: a, intervals: {{{[Const [minKey], Const [maxKey]]}}}\n"
- " | | | | candidateIndexes: \n"
+ " | | | | | candidateIndexes: \n"
+ " | | | | scanParams: \n"
+ " | | | | {'a': a}\n"
" | | | BindBlock:\n"
" | | | [a]\n"
" | | | Source []\n"
@@ -1456,12 +1474,19 @@ TEST(LogicalRewriter, SargableCE) {
" | logicalNodes: \n"
" | logicalNodeId: 0\n"
" | Sargable [Complete]\n"
- " | | | | | requirementsMap: \n"
- " | | | | | refProjection: ptest, path: 'PathGet [a] PathIdentity "
- "[]', intervals: {{{[Const [1], Const [1]]}}}\n"
- " | | | | | refProjection: ptest, path: 'PathGet [b] PathIdentity "
- "[]', intervals: {{{[Const [2], Const [2]]}}}\n"
- " | | | | candidateIndexes: \n"
+ " | | | | | | requirementsMap: \n"
+ " | | | | | | refProjection: ptest, path: 'PathGet [a] "
+ "PathIdentity []', intervals: {{{[Const [1], Const [1]]}}}\n"
+ " | | | | | | refProjection: ptest, path: 'PathGet [b] "
+ "PathIdentity []', intervals: {{{[Const [2], Const [2]]}}}\n"
+ " | | | | | candidateIndexes: \n"
+ " | | | | scanParams: \n"
+ " | | | | {'a': evalTemp_2, 'b': evalTemp_3}\n"
+ " | | | | residualReqs: \n"
+ " | | | | refProjection: evalTemp_2, path: 'PathIdentity "
+ "[]', intervals: {{{[Const [1], Const [1]]}}}, entryIndex: 0\n"
+ " | | | | refProjection: evalTemp_3, path: 'PathIdentity "
+ "[]', intervals: {{{[Const [2], Const [2]]}}}, entryIndex: 1\n"
" | | | BindBlock:\n"
" | | RefBlock: \n"
" | | Variable [ptest]\n"
diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp
index b3d656930c3..3f0cc0d5203 100644
--- a/src/mongo/db/query/optimizer/node.cpp
+++ b/src/mongo/db/query/optimizer/node.cpp
@@ -327,14 +327,16 @@ static ProjectionNameVector createSargableReferences(const PartialSchemaRequirem
}
SargableNode::SargableNode(PartialSchemaRequirements reqMap,
- CandidateIndexMap candidateIndexMap,
+ CandidateIndexes candidateIndexes,
+ boost::optional<ScanParams> scanParams,
const IndexReqTarget target,
ABT child)
: Base(std::move(child),
buildSimpleBinder(createSargableBindings(reqMap)),
make<References>(createSargableReferences(reqMap))),
_reqMap(std::move(reqMap)),
- _candidateIndexMap(std::move(candidateIndexMap)),
+ _candidateIndexes(std::move(candidateIndexes)),
+ _scanParams(std::move(scanParams)),
_target(target) {
assertNodeSort(getChild());
uassert(6624085, "Empty requirements map", !_reqMap.empty());
@@ -360,16 +362,21 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap,
}
bool SargableNode::operator==(const SargableNode& other) const {
- return _reqMap == other._reqMap && _candidateIndexMap == other._candidateIndexMap &&
- _target == other._target && getChild() == other.getChild();
+ // Specifically not comparing the candidate indexes and ScanParams. Those are derivative of the
+ // requirements, and can have temp projection names.
+ return _reqMap == other._reqMap && _target == other._target && getChild() == other.getChild();
}
const PartialSchemaRequirements& SargableNode::getReqMap() const {
return _reqMap;
}
-const CandidateIndexMap& SargableNode::getCandidateIndexMap() const {
- return _candidateIndexMap;
+const CandidateIndexes& SargableNode::getCandidateIndexes() const {
+ return _candidateIndexes;
+}
+
+const boost::optional<ScanParams>& SargableNode::getScanParams() const {
+ return _scanParams;
}
IndexReqTarget SargableNode::getTarget() const {
diff --git a/src/mongo/db/query/optimizer/node.h b/src/mongo/db/query/optimizer/node.h
index 9a30db0f4e5..ded879d932a 100644
--- a/src/mongo/db/query/optimizer/node.h
+++ b/src/mongo/db/query/optimizer/node.h
@@ -427,7 +427,8 @@ class SargableNode final : public Operator<SargableNode, 3>, public Node, public
public:
SargableNode(PartialSchemaRequirements reqMap,
- CandidateIndexMap candidateIndexMap,
+ CandidateIndexes candidateIndexes,
+ boost::optional<ScanParams> scanParams,
IndexReqTarget target,
ABT child);
@@ -447,14 +448,16 @@ public:
}
const PartialSchemaRequirements& getReqMap() const;
- const CandidateIndexMap& getCandidateIndexMap() const;
+ const CandidateIndexes& getCandidateIndexes() const;
+ const boost::optional<ScanParams>& getScanParams() const;
IndexReqTarget getTarget() const;
private:
const PartialSchemaRequirements _reqMap;
- CandidateIndexMap _candidateIndexMap;
+ CandidateIndexes _candidateIndexes;
+ boost::optional<ScanParams> _scanParams;
// Performance optimization to limit number of groups.
// Under what indexing requirements can this node be implemented.
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 4d6d91c912d..ac07379ea77 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -498,13 +498,13 @@ TEST(PhysRewriter, DuplicateFilter) {
"| Variable [root]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_0]\n"
+ "| | Variable [evalTemp_2]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [0]\n"
- "PhysicalScan [{'<root>': root, 'a': evalTemp_0}, c1]\n"
+ "PhysicalScan [{'<root>': root, 'a': evalTemp_2}, c1]\n"
" BindBlock:\n"
- " [evalTemp_0]\n"
+ " [evalTemp_2]\n"
" Source []\n"
" [root]\n"
" Source []\n",
@@ -564,13 +564,13 @@ TEST(PhysRewriter, FilterCollation) {
"| Variable [pb]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_0]\n"
+ "| | Variable [evalTemp_1]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [1]\n"
- "PhysicalScan [{'a': evalTemp_0, 'b': pb}, c1]\n"
+ "PhysicalScan [{'a': evalTemp_1, 'b': pb}, c1]\n"
" BindBlock:\n"
- " [evalTemp_0]\n"
+ " [evalTemp_1]\n"
" Source []\n"
" [pb]\n"
" Source []\n",
@@ -1255,7 +1255,7 @@ TEST(PhysRewriter, FilterIndexing4) {
phaseManager.getHints()._disableHashJoinRIDIntersect = true;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(30, 50, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(60, 75, phaseManager.getMemo().getStats()._physPlanExplorationCount);
ASSERT_EXPLAIN_V2(
"Root []\n"
@@ -1265,29 +1265,29 @@ TEST(PhysRewriter, FilterIndexing4) {
"| Variable [pa]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_8]\n"
+ "| | Variable [evalTemp_14]\n"
"| PathCompare [Gt]\n"
"| Const [1]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_7]\n"
+ "| | Variable [evalTemp_13]\n"
"| PathCompare [Gt]\n"
"| Const [1]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_6]\n"
+ "| | Variable [evalTemp_12]\n"
"| PathCompare [Gt]\n"
"| Const [1]\n"
- "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_6, '<indexKey> 2': evalTemp_7, "
- "'<indexKey> 3': evalTemp_8}, scanDefName: c1, indexDefName: index1, interval: {(Const "
+ "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_12, '<indexKey> 2': evalTemp_13, "
+ "'<indexKey> 3': evalTemp_14}, scanDefName: c1, indexDefName: index1, interval: {(Const "
"[1], Const [maxKey]], [Const [minKey], Const [maxKey]], [Const [minKey], Const [maxKey]], "
"[Const [minKey], Const [maxKey]]}]\n"
" BindBlock:\n"
- " [evalTemp_6]\n"
+ " [evalTemp_12]\n"
" Source []\n"
- " [evalTemp_7]\n"
+ " [evalTemp_13]\n"
" Source []\n"
- " [evalTemp_8]\n"
+ " [evalTemp_14]\n"
" Source []\n"
" [pa]\n"
" Source []\n",
@@ -1367,12 +1367,19 @@ TEST(PhysRewriter, FilterIndexing5) {
"| | Variable [pb]\n"
"| PathCompare [Gt]\n"
"| Const [0]\n"
- "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': pb}, scanDefName: c1, indexDefName: "
- "index1, interval: {(Const [0], Const [maxKey]], [Const [minKey], Const [maxKey]]}]\n"
+ "Evaluation []\n"
+ "| BindBlock:\n"
+ "| [pb]\n"
+ "| EvalPath []\n"
+ "| | Variable [evalTemp_0]\n"
+ "| PathIdentity []\n"
+ "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_0}, scanDefName: c1, "
+ "indexDefName: index1, interval: {(Const [0], Const [maxKey]], [Const [minKey], Const "
+ "[maxKey]]}]\n"
" BindBlock:\n"
- " [pa]\n"
+ " [evalTemp_0]\n"
" Source []\n"
- " [pb]\n"
+ " [pa]\n"
" Source []\n",
optimized);
}
@@ -1506,7 +1513,7 @@ TEST(PhysRewriter, FilterIndexingStress) {
// Without the changes to restrict SargableNode split to which this test is tied, we would
// be exploring 2^kFilterCount plans, one for each created group.
- ASSERT_EQ(55, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(60, 80, phaseManager.getMemo().getStats()._physPlanExplorationCount);
const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
ASSERT_BSON_PATH("\"Filter\"", explainRoot, "child.nodeType");
@@ -1690,7 +1697,7 @@ TEST(PhysRewriter, FilterIndexingMaxKey) {
"| Variable [root]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_1]\n"
+ "| | Variable [evalTemp_3]\n"
"| PathTraverse [1]\n"
"| PathComposeM []\n"
"| | PathCompare [Lt]\n"
@@ -1699,15 +1706,15 @@ TEST(PhysRewriter, FilterIndexingMaxKey) {
"| Const [2]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_0]\n"
+ "| | Variable [evalTemp_2]\n"
"| PathTraverse [1]\n"
"| PathCompare [Gt]\n"
"| Const [1]\n"
- "PhysicalScan [{'<root>': root, 'a': evalTemp_0, 'b': evalTemp_1}, c1]\n"
+ "PhysicalScan [{'<root>': root, 'a': evalTemp_2, 'b': evalTemp_3}, c1]\n"
" BindBlock:\n"
- " [evalTemp_0]\n"
+ " [evalTemp_2]\n"
" Source []\n"
- " [evalTemp_1]\n"
+ " [evalTemp_3]\n"
" Source []\n"
" [root]\n"
" Source []\n",
@@ -1767,47 +1774,46 @@ TEST(PhysRewriter, FilterReorder) {
"| Variable [root]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_0]\n"
+ "| | Variable [evalTemp_14]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [0]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_1]\n"
+ "| | Variable [evalTemp_15]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [1]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_2]\n"
+ "| | Variable [evalTemp_16]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [2]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_3]\n"
+ "| | Variable [evalTemp_17]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [3]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_4]\n"
+ "| | Variable [evalTemp_18]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [4]\n"
- "PhysicalScan [{'<root>': root, 'field_0': evalTemp_0, 'field_1': evalTemp_1, "
- "'field_2': "
- "evalTemp_2, 'field_3': evalTemp_3, 'field_4': evalTemp_4}, c1]\n"
+ "PhysicalScan [{'<root>': root, 'field_0': evalTemp_14, 'field_1': evalTemp_15, 'field_2': "
+ "evalTemp_16, 'field_3': evalTemp_17, 'field_4': evalTemp_18}, c1]\n"
" BindBlock:\n"
- " [evalTemp_0]\n"
+ " [evalTemp_14]\n"
" Source []\n"
- " [evalTemp_1]\n"
+ " [evalTemp_15]\n"
" Source []\n"
- " [evalTemp_2]\n"
+ " [evalTemp_16]\n"
" Source []\n"
- " [evalTemp_3]\n"
+ " [evalTemp_17]\n"
" Source []\n"
- " [evalTemp_4]\n"
+ " [evalTemp_18]\n"
" Source []\n"
" [root]\n"
" Source []\n",
@@ -2397,7 +2403,7 @@ TEST(PhysRewriter, CompoundIndex1) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(25, 35, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(35, 50, phaseManager.getMemo().getStats()._physPlanExplorationCount);
const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType");
@@ -2485,7 +2491,7 @@ TEST(PhysRewriter, CompoundIndex2) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(40, 60, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(60, 80, phaseManager.getMemo().getStats()._physPlanExplorationCount);
const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType");
@@ -2569,7 +2575,7 @@ TEST(PhysRewriter, CompoundIndex3) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(40, 60, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(50, 70, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// Demonstrate we have a merge join because we have point predicates.
const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
@@ -2672,7 +2678,7 @@ TEST(PhysRewriter, CompoundIndex4Negative) {
ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.nodeType");
ASSERT_BSON_PATH("\"Filter\"", explainRoot, "child.rightChild.nodeType");
ASSERT_BSON_PATH("2", explainRoot, "child.rightChild.filter.path.value.value");
- ASSERT_BSON_PATH("\"Seek\"", explainRoot, "child.rightChild.child.child.child.nodeType");
+ ASSERT_BSON_PATH("\"Seek\"", explainRoot, "child.rightChild.child.child.nodeType");
ASSERT_BSON_PATH("\"IndexScan\"", explainRoot, "child.leftChild.nodeType");
ASSERT_BSON_PATH("1", explainRoot, "child.leftChild.interval.0.lowBound.bound.value");
@@ -2725,7 +2731,7 @@ TEST(PhysRewriter, IndexBoundsIntersect) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(10, 20, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(20, 30, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// Demonstrate that the predicates >70 and <90 are NOT combined into the same interval (70, 90)
// since the paths are multiKey. With the heuristic estimate we may get either interval in the
@@ -3079,15 +3085,15 @@ TEST(PhysRewriter, IndexResidualReq) {
"| Index, dedupRID\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_1]\n"
+ "| | Variable [evalTemp_2]\n"
"| PathGet [c]\n"
"| PathCompare [Gt]\n"
"| Const [0]\n"
- "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_1}, scanDefName: c1, "
+ "IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_2}, scanDefName: c1, "
"indexDefName: index1, interval: {(Const [0], Const [maxKey]], [Const [minKey], Const "
"[maxKey]]}]\n"
" BindBlock:\n"
- " [evalTemp_1]\n"
+ " [evalTemp_2]\n"
" Source []\n"
" [pa]\n"
" Source []\n",
@@ -3154,7 +3160,7 @@ TEST(PhysRewriter, IndexResidualReq1) {
ABT optimized = rootNode;
phaseManager.getHints()._fastIndexNullHandling = true;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(25, 45, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(50, 75, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// Prefer index1 over index2 and index3 in order to cover all fields.
ASSERT_EXPLAIN_V2(
@@ -3225,7 +3231,7 @@ TEST(PhysRewriter, IndexResidualReq2) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_EQ(7, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(7, 10, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// We can cover "b" with the index and filter before we Seek.
ASSERT_EXPLAIN_V2(
@@ -3251,14 +3257,14 @@ TEST(PhysRewriter, IndexResidualReq2) {
"| rid_0\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_4]\n"
+ "| | Variable [evalTemp_10]\n"
"| PathCompare [Eq]\n"
"| Const [0]\n"
- "IndexScan [{'<indexKey> 2': evalTemp_4, '<rid>': rid_0}, scanDefName: c1, indexDefName: "
+ "IndexScan [{'<indexKey> 2': evalTemp_10, '<rid>': rid_0}, scanDefName: c1, indexDefName: "
"index1, interval: {[Const [0], Const [0]], [Const [minKey], Const [maxKey]], [Const "
"[minKey], Const [maxKey]]}]\n"
" BindBlock:\n"
- " [evalTemp_4]\n"
+ " [evalTemp_10]\n"
" Source []\n"
" [rid_0]\n"
" Source []\n",
@@ -3548,7 +3554,7 @@ TEST(PhysRewriter, ArrayConstantIndex) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(8, 12, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(10, 15, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// Demonstrate we get index bounds to handle the array constant, while we also retain the
// original filter. We have index bound with the array itself unioned with bound using the first
@@ -4386,7 +4392,7 @@ TEST(PhysRewriter, PartialIndex1) {
"| | Const [true]\n"
"| Filter []\n"
"| | EvalFilter []\n"
- "| | | Variable [evalTemp_2]\n"
+ "| | | Variable [evalTemp_4]\n"
"| | PathTraverse [1]\n"
"| | PathCompare [Eq]\n"
"| | Const [2]\n"
@@ -4394,9 +4400,9 @@ TEST(PhysRewriter, PartialIndex1) {
"| | limitSkip:\n"
"| | limit: 1\n"
"| | skip: 0\n"
- "| Seek [ridProjection: rid_0, {'<root>': root, 'b': evalTemp_2}, c1]\n"
+ "| Seek [ridProjection: rid_0, {'<root>': root, 'b': evalTemp_4}, c1]\n"
"| | BindBlock:\n"
- "| | [evalTemp_2]\n"
+ "| | [evalTemp_4]\n"
"| | Source []\n"
"| | [root]\n"
"| | Source []\n"
@@ -4543,21 +4549,21 @@ TEST(PhysRewriter, PartialIndexReject) {
"| Variable [root]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_1]\n"
+ "| | Variable [evalTemp_3]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [2]\n"
"Filter []\n"
"| EvalFilter []\n"
- "| | Variable [evalTemp_0]\n"
+ "| | Variable [evalTemp_2]\n"
"| PathTraverse [1]\n"
"| PathCompare [Eq]\n"
"| Const [3]\n"
- "PhysicalScan [{'<root>': root, 'a': evalTemp_0, 'b': evalTemp_1}, c1]\n"
+ "PhysicalScan [{'<root>': root, 'a': evalTemp_2, 'b': evalTemp_3}, c1]\n"
" BindBlock:\n"
- " [evalTemp_0]\n"
+ " [evalTemp_2]\n"
" Source []\n"
- " [evalTemp_1]\n"
+ " [evalTemp_3]\n"
" Source []\n"
" [root]\n"
" Source []\n",
@@ -4836,5 +4842,143 @@ TEST(PhysRewriter, JoinRewrite1) {
optimized);
}
+TEST(PhysRewriter, RootInterval) {
+ using namespace properties;
+
+ ABT scanNode = make<ScanNode>("root", "c1");
+
+ // We have a predicate applied directly over the root projection without field extraction.
+ ABT filterNode =
+ make<FilterNode>(make<EvalFilter>(make<PathCompare>(Operations::Eq, Constant::int64(1)),
+ make<Variable>("root")),
+ std::move(scanNode));
+
+ ABT rootNode =
+ make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterNode));
+
+ PrefixId prefixId;
+ OptPhaseManager phaseManager(
+ {OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+ OptPhaseManager::OptPhase::MemoExplorationPhase,
+ OptPhaseManager::OptPhase::MemoImplementationPhase},
+ prefixId,
+ {{{"c1", ScanDefinition{{}, {}}}}},
+ {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
+
+ ABT optimized = rootNode;
+ ASSERT_TRUE(phaseManager.optimize(optimized));
+ ASSERT_EQ(2, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+
+ ASSERT_EXPLAIN_V2(
+ "Root []\n"
+ "| | projections: \n"
+ "| | root\n"
+ "| RefBlock: \n"
+ "| Variable [root]\n"
+ "Filter []\n"
+ "| EvalFilter []\n"
+ "| | Variable [root]\n"
+ "| PathCompare [Eq]\n"
+ "| Const [1]\n"
+ "PhysicalScan [{'<root>': root}, c1]\n"
+ " BindBlock:\n"
+ " [root]\n"
+ " Source []\n",
+ optimized);
+}
+
+TEST(PhysRewriter, IndexSubfieldCovered) {
+ using namespace properties;
+
+ ABT scanNode = make<ScanNode>("root", "c1");
+
+ ABT evalNode1 = make<EvaluationNode>(
+ "pa",
+ make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
+ std::move(scanNode));
+
+ ABT filterNode1 =
+ make<FilterNode>(make<EvalFilter>(make<PathCompare>(Operations::Eq, Constant::int64(1)),
+ make<Variable>("pa")),
+ std::move(evalNode1));
+
+ ABT evalNode2 = make<EvaluationNode>(
+ "pb",
+ make<EvalPath>(make<PathGet>("a", make<PathGet>("b", make<PathIdentity>())),
+ make<Variable>("root")),
+ std::move(filterNode1));
+
+ ABT filterNode2 =
+ make<FilterNode>(make<EvalFilter>(make<PathCompare>(Operations::Eq, Constant::int64(2)),
+ make<Variable>("pb")),
+ std::move(evalNode2));
+
+ ABT filterNode3 = make<FilterNode>(
+ make<EvalFilter>(
+ make<PathGet>(
+ "a",
+ make<PathTraverse>(
+ make<PathGet>("c", make<PathCompare>(Operations::Eq, Constant::int64(3))),
+ PathTraverse::kSingleLevel)),
+ make<Variable>("root")),
+ std::move(filterNode2));
+
+ ABT rootNode = make<RootNode>(ProjectionRequirement{ProjectionNameVector{"pa", "pb"}},
+ std::move(filterNode3));
+
+ PrefixId prefixId;
+ OptPhaseManager phaseManager(
+ {OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+ OptPhaseManager::OptPhase::MemoExplorationPhase,
+ OptPhaseManager::OptPhase::MemoImplementationPhase},
+ prefixId,
+ {{{"c1",
+ ScanDefinition{
+ {},
+ {{"index1",
+ makeIndexDefinition("a", CollationOp::Ascending, false /*isMultiKey*/)}}}}}},
+ {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
+
+ ABT optimized = rootNode;
+ ASSERT_TRUE(phaseManager.optimize(optimized));
+ ASSERT_BETWEEN(35, 50, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+
+ // Observe we have a covered plan. The filters for subfields "b" and "c" are expressed as
+ // residual predicates. Also observe the traverse for "a.c" is removed due to "a" being
+ // non-multikey.
+ ASSERT_EXPLAIN_V2(
+ "Root []\n"
+ "| | projections: \n"
+ "| | pa\n"
+ "| | pb\n"
+ "| RefBlock: \n"
+ "| Variable [pa]\n"
+ "| Variable [pb]\n"
+ "Filter []\n"
+ "| EvalFilter []\n"
+ "| | Variable [pb]\n"
+ "| PathCompare [Eq]\n"
+ "| Const [2]\n"
+ "Evaluation []\n"
+ "| BindBlock:\n"
+ "| [pb]\n"
+ "| EvalPath []\n"
+ "| | Variable [pa]\n"
+ "| PathGet [b]\n"
+ "| PathIdentity []\n"
+ "Filter []\n"
+ "| EvalFilter []\n"
+ "| | Variable [pa]\n"
+ "| PathGet [c]\n"
+ "| PathCompare [Eq]\n"
+ "| Const [3]\n"
+ "IndexScan [{'<indexKey> 0': pa}, scanDefName: c1, indexDefName: index1, interval: {[Const "
+ "[1], Const [1]]}]\n"
+ " BindBlock:\n"
+ " [pa]\n"
+ " Source []\n",
+ optimized);
+}
+
} // namespace
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/props.cpp b/src/mongo/db/query/optimizer/props.cpp
index cc8a99db388..ef4ff7a1bda 100644
--- a/src/mongo/db/query/optimizer/props.cpp
+++ b/src/mongo/db/query/optimizer/props.cpp
@@ -256,10 +256,10 @@ const ProjectionNameSet& ProjectionAvailability::getProjections() const {
}
CardinalityEstimate::CardinalityEstimate(const CEType estimate)
- : _estimate(estimate), _partialSchemaKeyCEMap() {}
+ : _estimate(estimate), _partialSchemaKeyCE() {}
bool CardinalityEstimate::operator==(const CardinalityEstimate& other) const {
- return _estimate == other._estimate && _partialSchemaKeyCEMap == other._partialSchemaKeyCEMap;
+ return _estimate == other._estimate && _partialSchemaKeyCE == other._partialSchemaKeyCE;
}
CEType CardinalityEstimate::getEstimate() const {
@@ -270,12 +270,12 @@ CEType& CardinalityEstimate::getEstimate() {
return _estimate;
}
-const PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCEMap() const {
- return _partialSchemaKeyCEMap;
+const PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCE() const {
+ return _partialSchemaKeyCE;
}
-PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCEMap() {
- return _partialSchemaKeyCEMap;
+PartialSchemaKeyCE& CardinalityEstimate::getPartialSchemaKeyCE() {
+ return _partialSchemaKeyCE;
}
IndexingAvailability::IndexingAvailability(GroupIdType scanGroupId,
diff --git a/src/mongo/db/query/optimizer/props.h b/src/mongo/db/query/optimizer/props.h
index f77bf38d23c..dfac59a785b 100644
--- a/src/mongo/db/query/optimizer/props.h
+++ b/src/mongo/db/query/optimizer/props.h
@@ -89,26 +89,28 @@ using PhysProperty = algebra::PolyValue<CollationRequirement,
using LogicalProps = opt::unordered_map<LogicalProperty::key_type, LogicalProperty>;
using PhysProps = opt::unordered_map<PhysProperty::key_type, PhysProperty>;
-template <typename T, typename... Args>
+template <typename T,
+ std::enable_if_t<std::is_base_of_v<LogicalPropertyTag, T>, bool> = true,
+ typename... Args>
inline auto makeProperty(Args&&... args) {
- if constexpr (std::is_base_of_v<LogicalPropertyTag, T>) {
- return LogicalProperty::make<T>(std::forward<Args>(args)...);
- } else if constexpr (std::is_base_of_v<PhysPropertyTag, T>) {
- return PhysProperty::make<T>(std::forward<Args>(args)...);
- } else {
- static_assert("Unknown property type");
- }
+ return LogicalProperty::make<T>(std::forward<Args>(args)...);
+}
+
+template <typename T,
+ std::enable_if_t<std::is_base_of_v<PhysPropertyTag, T>, bool> = true,
+ typename... Args>
+inline auto makeProperty(Args&&... args) {
+ return PhysProperty::make<T>(std::forward<Args>(args)...);
}
-template <class P>
+template <class P, std::enable_if_t<std::is_base_of_v<LogicalPropertyTag, P>, bool> = true>
static constexpr auto getPropertyKey() {
- if constexpr (std::is_base_of_v<LogicalPropertyTag, P>) {
- return LogicalProperty::template tagOf<P>();
- } else if constexpr (std::is_base_of_v<PhysPropertyTag, P>) {
- return PhysProperty::template tagOf<P>();
- } else {
- static_assert("Unknown property type");
- }
+ return LogicalProperty::template tagOf<P>();
+}
+
+template <class P, std::enable_if_t<std::is_base_of_v<PhysPropertyTag, P>, bool> = true>
+static constexpr auto getPropertyKey() {
+ return PhysProperty::template tagOf<P>();
}
template <class P, class C>
@@ -376,14 +378,14 @@ public:
CEType getEstimate() const;
CEType& getEstimate();
- const PartialSchemaKeyCE& getPartialSchemaKeyCEMap() const;
- PartialSchemaKeyCE& getPartialSchemaKeyCEMap();
+ const PartialSchemaKeyCE& getPartialSchemaKeyCE() const;
+ PartialSchemaKeyCE& getPartialSchemaKeyCE();
private:
CEType _estimate;
// Used for SargableNodes. Provide additional per partial schema key CE.
- PartialSchemaKeyCE _partialSchemaKeyCEMap;
+ PartialSchemaKeyCE _partialSchemaKeyCE;
};
/**
diff --git a/src/mongo/db/query/optimizer/utils/abt_hash.cpp b/src/mongo/db/query/optimizer/utils/abt_hash.cpp
index acdc9f34bca..28520d50052 100644
--- a/src/mongo/db/query/optimizer/utils/abt_hash.cpp
+++ b/src/mongo/db/query/optimizer/utils/abt_hash.cpp
@@ -85,7 +85,7 @@ public:
return 17;
}
- size_t computeHash(const MultiKeyIntervalRequirement& req) {
+ size_t computeHash(const CompoundIntervalRequirement& req) {
size_t result = 19;
for (const auto& interval : req) {
updateHash(result, computeHash(interval));
@@ -131,28 +131,6 @@ static size_t computePartialSchemaReqHash(const PartialSchemaRequirements& reqMa
return result;
}
-static size_t computeCandidateIndexMapHash(const CandidateIndexMap& map) {
- size_t result = 17;
-
- IntervalHasher<MultiKeyIntervalReqExpr> intervalHasher;
- for (const auto& [indexDefName, candidateIndexEntry] : map) {
- updateHash(result, std::hash<std::string>()(indexDefName));
-
- {
- const auto& fieldProjectionMap = candidateIndexEntry._fieldProjectionMap;
- updateHash(result, std::hash<ProjectionName>()(fieldProjectionMap._ridProjection));
- updateHash(result, std::hash<ProjectionName>()(fieldProjectionMap._rootProjection));
- for (const auto& fieldProjectionMapEntry : fieldProjectionMap._fieldProjections) {
- updateHash(result, std::hash<FieldNameType>()(fieldProjectionMapEntry.first));
- updateHash(result, std::hash<ProjectionName>()(fieldProjectionMapEntry.second));
- }
- }
- updateHash(result, intervalHasher.compute(candidateIndexEntry._intervals));
- }
-
- return result;
-}
-
/**
* Hasher for ABT nodes. Used in conjunction with memo.
*/
@@ -202,8 +180,9 @@ public:
size_t childResult,
size_t /*bindResult*/,
size_t /*refResult*/) {
+ // Specifically not hashing the candidate indexes and ScanParams. Those are derivative of
+ // the requirements, and can have temp projection names.
return computeHashSeq<44>(computePartialSchemaReqHash(node.getReqMap()),
- computeCandidateIndexMapHash(node.getCandidateIndexMap()),
std::hash<IndexReqTarget>()(node.getTarget()),
childResult);
}
diff --git a/src/mongo/db/query/optimizer/utils/interval_utils.cpp b/src/mongo/db/query/optimizer/utils/interval_utils.cpp
index 7f3b29f9896..6d56b21e814 100644
--- a/src/mongo/db/query/optimizer/utils/interval_utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/interval_utils.cpp
@@ -280,25 +280,25 @@ boost::optional<IntervalReqExpr::Node> intersectDNFIntervals(
return IntervalReqExpr::make<IntervalReqExpr::Disjunction>(std::move(disjuncts));
}
-bool combineMultiKeyIntervalsDNF(MultiKeyIntervalReqExpr::Node& targetIntervals,
+bool combineCompoundIntervalsDNF(CompoundIntervalReqExpr::Node& targetIntervals,
const IntervalReqExpr::Node& sourceIntervals,
- const bool reverseSource) {
- MultiKeyIntervalReqExpr::NodeVector newDisjunction;
+ bool reverseSource) {
+ CompoundIntervalReqExpr::NodeVector newDisjunction;
for (const auto& sourceConjunction :
sourceIntervals.cast<IntervalReqExpr::Disjunction>()->nodes()) {
for (const auto& targetConjunction :
- targetIntervals.cast<MultiKeyIntervalReqExpr::Disjunction>()->nodes()) {
- MultiKeyIntervalReqExpr::NodeVector newConjunction;
+ targetIntervals.cast<CompoundIntervalReqExpr::Disjunction>()->nodes()) {
+ CompoundIntervalReqExpr::NodeVector newConjunction;
for (const auto& sourceConjunct :
sourceConjunction.cast<IntervalReqExpr::Conjunction>()->nodes()) {
const auto& sourceInterval =
sourceConjunct.cast<IntervalReqExpr::Atom>()->getExpr();
for (const auto& targetConjunct :
- targetConjunction.cast<MultiKeyIntervalReqExpr::Conjunction>()->nodes()) {
+ targetConjunction.cast<CompoundIntervalReqExpr::Conjunction>()->nodes()) {
const auto& targetInterval =
- targetConjunct.cast<MultiKeyIntervalReqExpr::Atom>()->getExpr();
+ targetConjunct.cast<CompoundIntervalReqExpr::Atom>()->getExpr();
if (!targetInterval.empty() && !targetInterval.back().isEquality() &&
!sourceInterval.isFullyOpen()) {
// We do not have an equality prefix. Reject.
@@ -314,18 +314,18 @@ bool combineMultiKeyIntervalsDNF(MultiKeyIntervalReqExpr::Node& targetIntervals,
newInterval.push_back(sourceInterval);
}
newConjunction.emplace_back(
- MultiKeyIntervalReqExpr::make<MultiKeyIntervalReqExpr::Atom>(
+ CompoundIntervalReqExpr::make<CompoundIntervalReqExpr::Atom>(
std::move(newInterval)));
}
}
newDisjunction.emplace_back(
- MultiKeyIntervalReqExpr::make<MultiKeyIntervalReqExpr::Conjunction>(
+ CompoundIntervalReqExpr::make<CompoundIntervalReqExpr::Conjunction>(
std::move(newConjunction)));
}
}
- targetIntervals = MultiKeyIntervalReqExpr::make<MultiKeyIntervalReqExpr::Disjunction>(
+ targetIntervals = CompoundIntervalReqExpr::make<CompoundIntervalReqExpr::Disjunction>(
std::move(newDisjunction));
return true;
}
diff --git a/src/mongo/db/query/optimizer/utils/interval_utils.h b/src/mongo/db/query/optimizer/utils/interval_utils.h
index b47f3dbcaa0..397b9a264f8 100644
--- a/src/mongo/db/query/optimizer/utils/interval_utils.h
+++ b/src/mongo/db/query/optimizer/utils/interval_utils.h
@@ -57,14 +57,14 @@ boost::optional<IntervalReqExpr::Node> intersectDNFIntervals(
* Combines a source interval over a single path with a target multi-component interval. The
* multi-component interval is extended to contain an extra field. The resulting multi-component
* interval defined the boundaries over the index component used by the index access execution
- * operator. If we fail to combine, the target multi-key interval is left unchanged.
+ * operator. If we fail to combine, the target compound interval is left unchanged.
* Currently we only support a single "equality prefix": 0+ equalities followed by at most
* inequality, and trailing open intervals.
* reverseSource flag indicates the sourceInterval corresponds to a descending index, so the bounds
* are flipped before combining with the target.
* TODO: support Recursive Index Navigation.
*/
-bool combineMultiKeyIntervalsDNF(MultiKeyIntervalReqExpr::Node& targetIntervals,
+bool combineCompoundIntervalsDNF(CompoundIntervalReqExpr::Node& targetIntervals,
const IntervalReqExpr::Node& sourceIntervals,
bool reverseSource = false);
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index e4516a60618..0fb0bee1319 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -121,7 +121,7 @@ ProjectionNameSet extractReferencedColumns(const properties::PhysProps& properti
return PropertiesAffectedColumnsExtractor::extract(properties);
}
-bool areMultiKeyIntervalsEqualities(const MultiKeyIntervalRequirement& intervals) {
+bool areCompoundIntervalsEqualities(const CompoundIntervalRequirement& intervals) {
for (const auto& interval : intervals) {
if (!interval.isEquality()) {
return false;
@@ -718,29 +718,61 @@ bool checkPathContainsTraverse(const ABT& path) {
}
/**
- * Erases Traverse elements from an index path.
+ * Fuses an index path and a query path to determine a residual path to apply over the index
+ * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id.
+ * Return the suffix that doesn't match.
*/
-class PathTraverseEraser {
+class IndexPathFusor {
public:
- PathTraverseEraser() {}
+ struct ResultType {
+ boost::optional<ABT::reference_type> _prefix;
+ size_t _numTraversesSkipped = 0;
+ };
- void transport(ABT& n, const PathTraverse& /*node*/, ABT& childResult) {
- n = std::move(childResult);
- _erased = true;
+ /**
+ * 'n' - The complete index path being compared to, can be modified if needed.
+ * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the
+ * correct operator.
+ * 'other' - The query that is checked if it is a prefix of the index.
+ */
+ ResultType operator()(const ABT& n, const PathGet& node, const ABT& other) {
+ if (auto otherGet = other.cast<PathGet>();
+ otherGet != nullptr && otherGet->name() == node.name()) {
+ if (auto otherChildTraverse = otherGet->getPath().cast<PathTraverse>();
+ otherChildTraverse != nullptr && !node.getPath().is<PathTraverse>()) {
+ // If a query path has a Traverse, but the index path doesn't, the query can
+ // still be evaluated by this index. Skip the Traverse node, and continue matching.
+ auto result = node.getPath().visit(*this, otherChildTraverse->getPath());
+ result._numTraversesSkipped++;
+ return result;
+ } else {
+ return node.getPath().visit(*this, otherGet->getPath());
+ }
+ }
+ return {};
}
- template <typename T, typename... Ts>
- void transport(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
- // Noop.
+ ResultType operator()(const ABT& n, const PathTraverse& node, const ABT& other) {
+ if (auto otherTraverse = other.cast<PathTraverse>();
+ otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) {
+ return node.getPath().visit(*this, otherTraverse->getPath());
+ }
+ return {};
}
- bool eraseTraverse(ABT& path) {
- algebra::transport<true>(path, *this);
- return _erased;
+ ResultType operator()(const ABT& n, const PathIdentity& node, const ABT& other) {
+ return {other.ref()};
}
-private:
- bool _erased = false;
+ template <typename T, typename... Ts>
+ ResultType operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
+ uasserted(6624152, "Unexpected node type");
+ }
+
+ static ResultType fuse(const ABT& node, const ABT& candidatePrefix) {
+ IndexPathFusor instance;
+ return candidatePrefix.visit(instance, node);
+ }
};
boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
@@ -757,7 +789,7 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
}
for (const auto& [key, req] : reqMap) {
- if (key.emptyPath() && isIntervalReqFullyOpenDNF(req.getIntervals())) {
+ if (key._path.is<PathIdentity>() && isIntervalReqFullyOpenDNF(req.getIntervals())) {
// We need to determine either path or interval (or both).
return {};
}
@@ -787,13 +819,24 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
PartialSchemaKey newKey = key;
bool updateToNonMultiKey = false;
- if (key._projectionName == scanProjName) {
- ABT pathNoTraverse = newKey._path;
- const bool erasedTraverse = PathTraverseEraser{}.eraseTraverse(pathNoTraverse);
- if (erasedTraverse &&
- nonMultiKeyPaths.find(pathNoTraverse) != nonMultiKeyPaths.cend()) {
+ if (key._projectionName == scanProjName && checkPathContainsTraverse(newKey._path)) {
+ ABT bestPath = make<Blackhole>();
+ size_t maxTraversesSkipped = 0;
+ for (const ABT& nonMultiKeyPath : nonMultiKeyPaths) {
+ // Attempt to fuse with each non-multikey path and observe how many traverses we can
+ // erase.
+ // TODO: we probably need a more efficient way to do this instead of iterating.
+ if (const auto fused = IndexPathFusor::fuse(newKey._path, nonMultiKeyPath);
+ fused._prefix && fused._numTraversesSkipped > maxTraversesSkipped) {
+ maxTraversesSkipped = fused._numTraversesSkipped;
+ bestPath = nonMultiKeyPath;
+ PathAppender appender(std::move(*fused._prefix));
+ appender.append(bestPath);
+ }
+ }
+ if (maxTraversesSkipped > 0) {
updateToNonMultiKey = true;
- newKey._path = std::move(pathNoTraverse);
+ newKey._path = std::move(bestPath);
}
}
@@ -964,65 +1007,33 @@ size_t decodeIndexKeyName(const std::string& fieldName) {
return key;
}
-/**
- * Fuses an index path and a query path to determine a residual path to apply over the index
- * results. Checks if one index path is a prefix of another. Considers only Get, Traverse, and Id.
- * Return the suffix that doesn't match.
- */
-class IndexPathFusor {
-public:
- using ResultType = boost::optional<ABT::reference_type>;
-
- /**
- * 'n' - The complete index path being compared to, can be modified if needed.
- * 'node' - Same as 'n' but cast to a specific type by the caller in order to invoke the
- * correct operator.
- * 'other' - The query that is checked if it is a prefix of the index.
- */
- ResultType operator()(const ABT& n, const PathGet& node, const ABT& other) {
- if (auto otherGet = other.cast<PathGet>();
- otherGet != nullptr && otherGet->name() == node.name()) {
- return node.getPath().visit(*this, otherGet->getPath());
- }
- return {};
- }
-
- ResultType operator()(const ABT& n, const PathTraverse& node, const ABT& other) {
- if (auto otherTraverse = other.cast<PathTraverse>();
- otherTraverse != nullptr && otherTraverse->getMaxDepth() == node.getMaxDepth()) {
- return node.getPath().visit(*this, otherTraverse->getPath());
- }
- return {};
- }
-
- ResultType operator()(const ABT& n, const PathIdentity& node, const ABT& other) {
- return {other.ref()};
- }
-
- template <typename T, typename... Ts>
- ResultType operator()(const ABT& /*n*/, const T& /*node*/, Ts&&...) {
- uasserted(6624152, "Unexpected node type");
- }
-
- static ResultType fuse(const ABT& node, const ABT& candidatePrefix) {
- IndexPathFusor instance;
- return candidatePrefix.visit(instance, node);
+const ProjectionName& getExistingOrTempProjForFieldName(PrefixId& prefixId,
+ const FieldNameType& fieldName,
+ FieldProjectionMap& fieldProjMap) {
+ auto it = fieldProjMap._fieldProjections.find(fieldName);
+ if (it != fieldProjMap._fieldProjections.cend()) {
+ return it->second;
}
-};
-CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
- const ProjectionName& scanProjectionName,
- const PartialSchemaRequirements& reqMap,
- const ScanDefinition& scanDef,
- const bool fastNullHandling,
- bool& hasEmptyInterval) {
- CandidateIndexMap result;
- hasEmptyInterval = false;
+ ProjectionName tempProjName = prefixId.getNextId("evalTemp");
+ const auto result = fieldProjMap._fieldProjections.emplace(fieldName, std::move(tempProjName));
+ invariant(result.second);
+ return result.first->second;
+}
+CandidateIndexes computeCandidateIndexes(PrefixId& prefixId,
+ const ProjectionName& scanProjectionName,
+ const PartialSchemaRequirements& reqMap,
+ const ScanDefinition& scanDef,
+ const bool fastNullHandling,
+ bool& hasEmptyInterval) {
// Contains one instance for each unmatched key.
PartialSchemaKeySet unsatisfiedKeysInitial;
for (const auto& [key, req] : reqMap) {
- unsatisfiedKeysInitial.insert(key);
+ if (!unsatisfiedKeysInitial.insert(key).second) {
+ // We cannot satisfy two or more non-multikey path instances using an index.
+ return {};
+ }
if (!fastNullHandling && req.hasBoundProjectionName() &&
checkMaybeHasNull(req.getIntervals())) {
@@ -1032,140 +1043,150 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
}
}
+ CandidateIndexes result;
+ hasEmptyInterval = false;
+
for (const auto& [indexDefName, indexDef] : scanDef.getIndexDefs()) {
- FieldProjectionMap indexProjectionMap;
- auto intervals = MultiKeyIntervalReqExpr::makeSingularDNF(); // Singular empty interval.
- PartialSchemaRequirements residualRequirements;
- ProjectionNameSet residualRequirementsTempProjections;
- ResidualKeyMap residualKeyMap;
- opt::unordered_set<size_t> fieldsToCollate;
- size_t intervalPrefixSize = 0;
PartialSchemaKeySet unsatisfiedKeys = unsatisfiedKeysInitial;
-
- // True if the paths from partial schema requirements form a strict prefix of the index
- // collation.
- bool isPrefix = true;
- // If we formed bounds using at least one requirement (as opposed to having only residual
- // requirements).
- bool hasExactMatch = false;
+ CandidateIndexEntry entry(indexDefName);
const IndexCollationSpec& indexCollationSpec = indexDef.getCollationSpec();
for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) {
const auto& indexCollationEntry = indexCollationSpec.at(indexField);
- PartialSchemaKey indexKey{scanProjectionName, indexCollationEntry._path};
const bool reverse = indexCollationEntry._op == CollationOp::Descending;
- if (reqMap.count(indexKey) == 1 && isPrefix) {
- hasExactMatch = true;
- unsatisfiedKeys.erase(indexKey);
- const PartialSchemaRequirement& req = reqMap.find(indexKey)->second;
- const auto& requiredInterval = req.getIntervals();
+ PartialSchemaKey indexKey{scanProjectionName, indexCollationEntry._path};
+ auto indexKeyIt = reqMap.find(indexKey);
+ if (indexKeyIt == reqMap.cend()) {
+ break;
+ }
- if (combineMultiKeyIntervalsDNF(intervals, requiredInterval, reverse)) {
- intervalPrefixSize++;
- } else {
- if (!combineMultiKeyIntervalsDNF(
- intervals, IntervalReqExpr::makeSingularDNF(), reverse)) {
- uasserted(6624155, "Cannot combine with an open interval");
- }
+ const PartialSchemaRequirement& req = indexKeyIt->second;
+ const auto& requiredInterval = req.getIntervals();
+ if (!combineCompoundIntervalsDNF(entry._intervals, requiredInterval, reverse)) {
+ break;
+ }
- // Move interval into residual requirements.
- if (req.hasBoundProjectionName()) {
- PartialSchemaKey residualKey{req.getBoundProjectionName(),
- make<PathIdentity>()};
- residualRequirements.emplace(
- residualKey, PartialSchemaRequirement{"", req.getIntervals()});
- residualKeyMap.emplace(std::move(residualKey), indexKey);
- residualRequirementsTempProjections.insert(req.getBoundProjectionName());
- } else {
- ProjectionName tempProj = prefixId.getNextId("evalTemp");
- PartialSchemaKey residualKey{tempProj, make<PathIdentity>()};
- residualRequirements.emplace(residualKey, req);
- residualKeyMap.emplace(std::move(residualKey), indexKey);
- residualRequirementsTempProjections.insert(tempProj);
-
- // Include bounds projection into index spec.
- if (!indexProjectionMap._fieldProjections
- .emplace(encodeIndexKeyName(indexField), std::move(tempProj))
- .second) {
- uasserted(6624156, "Duplicate field name");
- }
- }
- }
+ unsatisfiedKeys.erase(indexKey);
+ entry._intervalPrefixSize++;
+ if (req.hasBoundProjectionName()) {
// Include bounds projection into index spec.
- if (req.hasBoundProjectionName() &&
- !indexProjectionMap._fieldProjections
- .emplace(encodeIndexKeyName(indexField), req.getBoundProjectionName())
- .second) {
- uasserted(6624157, "Duplicate field name");
- }
+ const bool inserted =
+ entry._fieldProjectionMap._fieldProjections
+ .emplace(encodeIndexKeyName(indexField), req.getBoundProjectionName())
+ .second;
+ invariant(inserted);
+ }
- if (auto singularInterval = IntervalReqExpr::getSingularDNF(requiredInterval);
- !singularInterval || !singularInterval->isEquality()) {
- // We only care about collation of for non-equality intervals.
- // Equivalently, it is sufficient for singular intervals to be clustered.
- fieldsToCollate.insert(indexField);
- }
- } else {
- bool foundPathPrefix = false;
- for (const auto& queryKey : unsatisfiedKeys) {
- const auto fusedPath =
- IndexPathFusor::fuse(queryKey._path, indexCollationEntry._path);
- if (fusedPath) {
- ProjectionName tempProj = prefixId.getNextId("evalTemp");
- PartialSchemaKey residualKey{tempProj, fusedPath.get()};
-
- {
- auto range = reqMap.equal_range(queryKey);
- for (auto it = range.first; it != range.second; it++) {
- residualRequirements.emplace(residualKey, it->second);
- }
- }
- residualKeyMap.emplace(std::move(residualKey), queryKey);
- residualRequirementsTempProjections.insert(tempProj);
-
- // Include bounds projection into index spec.
- if (!indexProjectionMap._fieldProjections
- .emplace(encodeIndexKeyName(indexField), std::move(tempProj))
- .second) {
- uasserted(6624158, "Duplicate field name");
- }
- if (!combineMultiKeyIntervalsDNF(
- intervals, IntervalReqExpr::makeSingularDNF(), reverse)) {
- uasserted(6624159, "Cannot combine with an open interval");
- }
+ if (auto singularInterval = IntervalReqExpr::getSingularDNF(requiredInterval);
+ !singularInterval || !singularInterval->isEquality()) {
+ // We only care about collation of for non-equality intervals.
+ // Equivalently, it is sufficient for singular intervals to be clustered.
+ entry._fieldsToCollate.insert(indexField);
+ }
+ }
- foundPathPrefix = true;
- unsatisfiedKeys.erase(queryKey);
- break;
- }
- }
+ for (auto queryKeyIt = unsatisfiedKeys.begin(); queryKeyIt != unsatisfiedKeys.end();) {
+ const auto& queryKey = *queryKeyIt;
+ bool satisfied = false;
- if (!foundPathPrefix) {
- isPrefix = false;
- if (!combineMultiKeyIntervalsDNF(
- intervals, IntervalReqExpr::makeSingularDNF(), reverse)) {
- uasserted(6624160, "Cannot combine with an open interval");
- }
+ for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) {
+ const auto& indexCollationEntry = indexCollationSpec.at(indexField);
+ const auto fusedPath =
+ IndexPathFusor::fuse(queryKey._path, indexCollationEntry._path);
+ if (!fusedPath._prefix) {
+ continue;
}
+
+ auto it = reqMap.find(queryKey);
+ tassert(
+ 6624158, "QueryKey must exist in the requirements map", it != reqMap.cend());
+
+ const ProjectionName& tempProjName = getExistingOrTempProjForFieldName(
+ prefixId, encodeIndexKeyName(indexField), entry._fieldProjectionMap);
+ entry._residualRequirements.emplace_back(
+ PartialSchemaKey{tempProjName, std::move(*fusedPath._prefix)},
+ it->second,
+ std::distance(reqMap.cbegin(), it));
+
+ satisfied = true;
+ break;
+ }
+
+ if (satisfied) {
+ unsatisfiedKeys.erase(queryKeyIt++);
+ } else {
+ queryKeyIt++;
}
- }
- if (!hasExactMatch) {
- continue;
}
if (!unsatisfiedKeys.empty()) {
continue;
}
- result.emplace(indexDefName,
- CandidateIndexEntry{std::move(indexProjectionMap),
- std::move(intervals),
- std::move(residualRequirements),
- std::move(residualRequirementsTempProjections),
- std::move(residualKeyMap),
- std::move(fieldsToCollate),
- intervalPrefixSize});
+ for (size_t indexField = entry._intervalPrefixSize; indexField < indexCollationSpec.size();
+ indexField++) {
+ // Pad the remaining index fields with [MinKey, MaxKey] intervals.
+ const bool reverse = indexCollationSpec.at(indexField)._op == CollationOp::Descending;
+ if (!combineCompoundIntervalsDNF(
+ entry._intervals, IntervalReqExpr::makeSingularDNF(), reverse)) {
+ uasserted(6624159, "Cannot combine with an open interval");
+ }
+ }
+
+ result.push_back(std::move(entry));
+ }
+
+ return result;
+}
+
+boost::optional<ScanParams> computeScanParams(PrefixId& prefixId,
+ const PartialSchemaRequirements& reqMap,
+ const ProjectionName& rootProj) {
+ ScanParams result;
+
+ size_t entryIndex = 0;
+ for (const auto& [key, req] : reqMap) {
+ if (key._projectionName != rootProj) {
+ // We are not sitting right above a ScanNode.
+ return {};
+ }
+
+ if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) {
+ const FieldNameType& fieldName = pathGet->name();
+
+ // Extract a new requirements path with removed simple paths.
+ // For example if we have a key Get "a" Traverse Compare = 0 we leave only
+ // Traverse Compare 0.
+ if (pathGet->getPath().is<PathIdentity>() && req.hasBoundProjectionName()) {
+ const auto [it, insertedInFPM] =
+ result._fieldProjectionMap._fieldProjections.emplace(
+ fieldName, req.getBoundProjectionName());
+
+ if (!insertedInFPM) {
+ result._residualRequirements.emplace_back(
+ PartialSchemaKey{it->second, make<PathIdentity>()},
+ PartialSchemaRequirement{req.getBoundProjectionName(), req.getIntervals()},
+ entryIndex);
+ } else if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
+ result._residualRequirements.emplace_back(
+ PartialSchemaKey{req.getBoundProjectionName(), make<PathIdentity>()},
+ PartialSchemaRequirement{"", req.getIntervals()},
+ entryIndex);
+ }
+ } else {
+ const ProjectionName& tempProjName = getExistingOrTempProjForFieldName(
+ prefixId, fieldName, result._fieldProjectionMap);
+ result._residualRequirements.emplace_back(
+ PartialSchemaKey{tempProjName, pathGet->getPath()}, req, entryIndex);
+ }
+ } else {
+ // Move other conditions into the residual map.
+ result._fieldProjectionMap._rootProjection = rootProj;
+ result._residualRequirements.emplace_back(key, req, entryIndex);
+ }
+
+ entryIndex++;
}
return result;
@@ -1327,24 +1348,23 @@ void lowerPartialSchemaRequirement(const PartialSchemaKey& key,
}
}
-void lowerPartialSchemaRequirements(const CEType baseCE,
- const CEType scanGroupCE,
- ResidualRequirements& requirements,
+void lowerPartialSchemaRequirements(const CEType scanGroupCE,
+ std::vector<SelectivityType> indexPredSels,
+ ResidualRequirementsWithCE& requirements,
ABT& physNode,
NodeCEMap& nodeCEMap) {
sortResidualRequirements(requirements);
- std::vector<SelectivityType> residualSelectivities;
for (const auto& [residualKey, residualReq, ce] : requirements) {
- CEType residualCE = baseCE;
- if (!residualSelectivities.empty()) {
+ CEType residualCE = scanGroupCE;
+ if (!indexPredSels.empty()) {
// We are intentionally making a copy of the vector here, we are adding elements to it
// below.
- residualCE *= ce::conjExponentialBackoff(residualSelectivities);
+ residualCE *= ce::conjExponentialBackoff(indexPredSels);
}
if (scanGroupCE > 0.0) {
// Compute the selectivity after we assign CE, which is the "input" to the cost.
- residualSelectivities.push_back(ce / scanGroupCE);
+ indexPredSels.push_back(ce / scanGroupCE);
}
lowerPartialSchemaRequirement(residualKey, residualReq, physNode, [&](const ABT& node) {
@@ -1353,72 +1373,34 @@ void lowerPartialSchemaRequirements(const CEType baseCE,
}
}
-void computePhysicalScanParams(PrefixId& prefixId,
- const PartialSchemaRequirements& reqMap,
- const PartialSchemaKeyCE& partialSchemaKeyCEMap,
- const ProjectionNameOrderPreservingSet& requiredProjections,
- ResidualRequirements& residualRequirements,
- ProjectionRenames& projectionRenames,
- FieldProjectionMap& fieldProjectionMap,
- bool& requiresRootProjection) {
- for (const auto& [key, req] : reqMap) {
- bool hasBoundProjection = req.hasBoundProjectionName();
- if (hasBoundProjection && !requiredProjections.find(req.getBoundProjectionName()).second) {
- // Bound projection is not required, pretend we don't bind.
- hasBoundProjection = false;
+void sortResidualRequirements(ResidualRequirementsWithCE& residualReq) {
+ // Sort residual requirements by estimated cost.
+ // Assume it is more expensive to deliver a bound projection than to just filter.
+
+ std::vector<std::pair<double, size_t>> costs;
+ for (size_t index = 0; index < residualReq.size(); index++) {
+ const auto& entry = residualReq.at(index);
+
+ size_t multiplier = 0;
+ if (entry._req.hasBoundProjectionName()) {
+ multiplier++;
}
- if (!hasBoundProjection && isIntervalReqFullyOpenDNF(req.getIntervals())) {
- // Redundant requirement.
- continue;
+ if (!isIntervalReqFullyOpenDNF(entry._req.getIntervals())) {
+ multiplier++;
}
- const CEType keyCE = partialSchemaKeyCEMap.at(key);
- if (auto pathGet = key._path.cast<PathGet>(); pathGet != nullptr) {
- // Extract a new requirements path with removed simple paths.
- // For example if we have a key Get "a" Traverse Compare = 0 we leave only
- // Traverse Compare 0.
- if (pathGet->getPath().is<PathIdentity>() && hasBoundProjection) {
- const auto [it, inserted] = fieldProjectionMap._fieldProjections.emplace(
- pathGet->name(), req.getBoundProjectionName());
- if (!inserted) {
- projectionRenames.emplace(req.getBoundProjectionName(), it->second);
- }
-
- if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
- residualRequirements.emplace_back(
- PartialSchemaKey{req.getBoundProjectionName(), make<PathIdentity>()},
- PartialSchemaRequirement{"", req.getIntervals()},
- keyCE);
- }
- } else {
- ProjectionName tempProjName;
- auto it = fieldProjectionMap._fieldProjections.find(pathGet->name());
- if (it == fieldProjectionMap._fieldProjections.cend()) {
- tempProjName = prefixId.getNextId("evalTemp");
- fieldProjectionMap._fieldProjections.emplace(pathGet->name(), tempProjName);
- } else {
- tempProjName = it->second;
- }
+ costs.emplace_back(entry._ce * multiplier, index);
+ }
- residualRequirements.emplace_back(
- PartialSchemaKey{std::move(tempProjName), pathGet->getPath()}, req, keyCE);
- }
- } else {
- // Move other conditions into the residual map.
- requiresRootProjection = true;
- residualRequirements.emplace_back(key, req, keyCE);
+ std::sort(costs.begin(), costs.end());
+ for (size_t index = 0; index < residualReq.size(); index++) {
+ const size_t targetIndex = costs.at(index).second;
+ if (index < targetIndex) {
+ std::swap(residualReq.at(index), residualReq.at(targetIndex));
}
}
}
-void sortResidualRequirements(ResidualRequirements& residualReq) {
- // Sort residual requirements by estimated CE.
- std::sort(
- residualReq.begin(),
- residualReq.end(),
- [](const ResidualRequirement& x, const ResidualRequirement& y) { return x._ce < y._ce; });
-}
-
void applyProjectionRenames(ProjectionRenames projectionRenames,
ABT& node,
const std::function<void(const ABT& node)>& visitor) {
@@ -1429,6 +1411,42 @@ void applyProjectionRenames(ProjectionRenames projectionRenames,
}
}
+void removeRedundantResidualPredicates(const ProjectionNameOrderPreservingSet& requiredProjections,
+ ResidualRequirements& residualReqs,
+ FieldProjectionMap& fieldProjectionMap) {
+ ProjectionNameSet residualTempProjections;
+
+ // Remove unused residual requirements.
+ for (auto it = residualReqs.begin(); it != residualReqs.end();) {
+ auto& [key, req, ce] = *it;
+
+ if (req.hasBoundProjectionName() &&
+ !requiredProjections.find(req.getBoundProjectionName()).second) {
+ if (isIntervalReqFullyOpenDNF(req.getIntervals())) {
+ residualReqs.erase(it++);
+ continue;
+ } else {
+ req.setBoundProjectionName("");
+ }
+ }
+
+ residualTempProjections.insert(key._projectionName);
+ it++;
+ }
+
+ // Remove unused projections from the field projection map.
+ auto& fieldProjMap = fieldProjectionMap._fieldProjections;
+ for (auto it = fieldProjMap.begin(); it != fieldProjMap.end();) {
+ const ProjectionName& projName = it->second;
+ if (!requiredProjections.find(projName).second &&
+ residualTempProjections.count(projName) == 0) {
+ fieldProjMap.erase(it++);
+ } else {
+ it++;
+ }
+ }
+}
+
ABT lowerRIDIntersectGroupBy(PrefixId& prefixId,
const ProjectionName& ridProjName,
const CEType intersectedCE,
@@ -1635,7 +1653,7 @@ public:
_fpmStack.push_back(std::move(indexProjectionMap));
};
- ABT transport(const MultiKeyIntervalReqExpr::Atom& node) {
+ ABT transport(const CompoundIntervalReqExpr::Atom& node) {
ABT physicalIndexScan = make<IndexScanNode>(
_fpmStack.back(),
IndexSpecification{_scanDefName, _indexDefName, node.getExpr(), _reverseOrder});
@@ -1670,7 +1688,7 @@ public:
_fpmStack.push_back(std::move(childMap));
}
- void prepare(const MultiKeyIntervalReqExpr::Conjunction& node) {
+ void prepare(const CompoundIntervalReqExpr::Conjunction& node) {
prepare<true /*isConjunction*/>(node.nodes().size());
}
@@ -1745,19 +1763,19 @@ public:
return result;
}
- ABT transport(const MultiKeyIntervalReqExpr::Conjunction& node, ABTVector childResults) {
+ ABT transport(const CompoundIntervalReqExpr::Conjunction& node, ABTVector childResults) {
return implement<true /*isIntersect*/>(std::move(childResults));
}
- void prepare(const MultiKeyIntervalReqExpr::Disjunction& node) {
+ void prepare(const CompoundIntervalReqExpr::Disjunction& node) {
prepare<false /*isConjunction*/>(node.nodes().size());
}
- ABT transport(const MultiKeyIntervalReqExpr::Disjunction& node, ABTVector childResults) {
+ ABT transport(const CompoundIntervalReqExpr::Disjunction& node, ABTVector childResults) {
return implement<false /*isIntersect*/>(std::move(childResults));
}
- ABT lower(const MultiKeyIntervalReqExpr::Node& intervals) {
+ ABT lower(const CompoundIntervalReqExpr::Node& intervals) {
return algebra::transport<false>(intervals, *this);
}
@@ -1779,7 +1797,7 @@ ABT lowerIntervals(PrefixId& prefixId,
FieldProjectionMap indexProjectionMap,
const std::string& scanDefName,
const std::string& indexDefName,
- const MultiKeyIntervalReqExpr::Node& intervals,
+ const CompoundIntervalReqExpr::Node& intervals,
const bool reverseOrder,
const CEType indexCE,
const CEType scanGroupCE,
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 09a18cc8817..28690c50262 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -104,7 +104,7 @@ ProjectionNameSet extractReferencedColumns(const properties::PhysProps& properti
/**
* Returns true if all components of the compound interval are equalities.
*/
-bool areMultiKeyIntervalsEqualities(const MultiKeyIntervalRequirement& intervals);
+bool areCompoundIntervalsEqualities(const CompoundIntervalRequirement& intervals);
struct CollationSplitResult {
bool _validSplit = false;
@@ -211,19 +211,26 @@ std::string encodeIndexKeyName(size_t indexField);
size_t decodeIndexKeyName(const std::string& fieldName);
/**
- * Compute a mapping [indexName -> CandidateIndexEntry] that describes intervals that could be
+ * Compute a list of candidate indexes. A CandidateIndexEntry describes intervals that could be
* used for accessing each of the indexes in the map. The intervals themselves are derived from
* 'reqMap'.
* If the intersection of any of the interval requirements in 'reqMap' results in an empty
- * interval, return an empty mappting and set 'hasEmptyInterval' to true.
+ * interval, return an empty mapping and set 'hasEmptyInterval' to true.
* Otherwise return the computed mapping, and set 'hasEmptyInterval' to false.
*/
-CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
- const ProjectionName& scanProjectionName,
- const PartialSchemaRequirements& reqMap,
- const ScanDefinition& scanDef,
- bool fastNullHandling,
- bool& hasEmptyInterval);
+CandidateIndexes computeCandidateIndexes(PrefixId& prefixId,
+ const ProjectionName& scanProjectionName,
+ const PartialSchemaRequirements& reqMap,
+ const ScanDefinition& scanDef,
+ bool fastNullHandling,
+ bool& hasEmptyInterval);
+
+/**
+ * Computes a set of residual predicates which will be applied on top of a Scan.
+ */
+boost::optional<ScanParams> computeScanParams(PrefixId& prefixId,
+ const PartialSchemaRequirements& reqMap,
+ const ProjectionName& rootProj);
/**
* Checks if we have an interval tree which has at least one atomic interval which may include Null
@@ -241,28 +248,23 @@ void lowerPartialSchemaRequirement(const PartialSchemaKey& key,
const std::function<void(const ABT& node)>& visitor =
[](const ABT&) {});
-void lowerPartialSchemaRequirements(CEType baseCE,
- CEType scanGroupCE,
- ResidualRequirements& requirements,
+void lowerPartialSchemaRequirements(CEType scanGroupCE,
+ std::vector<SelectivityType> indexPredSels,
+ ResidualRequirementsWithCE& requirements,
ABT& physNode,
NodeCEMap& nodeCEMap);
-void computePhysicalScanParams(PrefixId& prefixId,
- const PartialSchemaRequirements& reqMap,
- const PartialSchemaKeyCE& partialSchemaKeyCEMap,
- const ProjectionNameOrderPreservingSet& requiredProjections,
- ResidualRequirements& residualRequirements,
- ProjectionRenames& projectionRenames,
- FieldProjectionMap& fieldProjectionMap,
- bool& requiresRootProjection);
-
-void sortResidualRequirements(ResidualRequirements& residualReq);
+void sortResidualRequirements(ResidualRequirementsWithCE& residualReq);
void applyProjectionRenames(ProjectionRenames projectionRenames,
ABT& node,
const std::function<void(const ABT& node)>& visitor = [](const ABT&) {
});
+void removeRedundantResidualPredicates(const ProjectionNameOrderPreservingSet& requiredProjections,
+ ResidualRequirements& residualReqs,
+ FieldProjectionMap& fieldProjectionMap);
+
/**
* Implements an RID Intersect node using Union and GroupBy.
*/
@@ -311,7 +313,7 @@ ABT lowerIntervals(PrefixId& prefixId,
FieldProjectionMap indexProjectionMap,
const std::string& scanDefName,
const std::string& indexDefName,
- const MultiKeyIntervalReqExpr::Node& intervals,
+ const CompoundIntervalReqExpr::Node& intervals,
bool reverseOrder,
CEType indexCE,
CEType scanGroupCE,