summaryrefslogtreecommitdiff
path: root/src/mongo/db/query/optimizer
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2022-08-06 15:59:26 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-06 17:13:38 +0000
commitbe83d8092562797b2e4c40332a8eb7ff87c60b7f (patch)
treef2300e484e63b21ec30e42743f690818a17a9bfb /src/mongo/db/query/optimizer
parentd37e684e83e6353effa60d5f93090c51630ee125 (diff)
downloadmongo-be83d8092562797b2e4c40332a8eb7ff87c60b7f.tar.gz
SERVER-67548 Index scan with equality to null incorrectly appends null if the field is missing
Diffstat (limited to 'src/mongo/db/query/optimizer')
-rw-r--r--src/mongo/db/query/optimizer/cascades/implementers.cpp8
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp122
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.h7
-rw-r--r--src/mongo/db/query/optimizer/defs.h4
-rw-r--r--src/mongo/db/query/optimizer/opt_phase_manager.cpp8
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp142
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp79
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h7
8 files changed, 269 insertions, 108 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/implementers.cpp b/src/mongo/db/query/optimizer/cascades/implementers.cpp
index b4f24715450..f32d391839e 100644
--- a/src/mongo/db/query/optimizer/cascades/implementers.cpp
+++ b/src/mongo/db/query/optimizer/cascades/implementers.cpp
@@ -347,6 +347,7 @@ public:
// via a physical scan even in the absence of indexes.
const IndexingRequirement& requirements = getPropertyConst<IndexingRequirement>(_physProps);
+ const CandidateIndexMap& candidateIndexMap = node.getCandidateIndexMap();
const IndexReqTarget indexReqTarget = requirements.getIndexReqTarget();
switch (indexReqTarget) {
case IndexReqTarget::Complete:
@@ -356,6 +357,11 @@ public:
break;
case IndexReqTarget::Index:
+ if (candidateIndexMap.empty()) {
+ return;
+ }
+ [[fallthrough]];
+
case IndexReqTarget::Seek:
if (_hints._disableIndexes == DisableIndexOptions::DisableAll) {
return;
@@ -438,7 +444,7 @@ public:
// Consider all candidate indexes, and check if they satisfy the collation and
// distribution requirements.
- for (const auto& [indexDefName, candidateIndexEntry] : node.getCandidateIndexMap()) {
+ for (const auto& [indexDefName, candidateIndexEntry] : candidateIndexMap) {
const auto& indexDef = scanDef.getIndexDefs().at(indexDefName);
if (!indexDef.getPartialReqMap().empty() &&
(_hints._disableIndexes == DisableIndexOptions::DisablePartialOnly ||
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index 7a499b0ec18..e8d4603e0f4 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -79,11 +79,13 @@ LogicalRewriter::RewriteSet LogicalRewriter::_substitutionSet = {
LogicalRewriter::LogicalRewriter(Memo& memo,
PrefixId& prefixId,
const RewriteSet rewriteSet,
+ const QueryHints& hints,
const bool useHeuristicCE)
: _activeRewriteSet(std::move(rewriteSet)),
_groupsPending(),
_memo(memo),
_prefixId(prefixId),
+ _hints(hints),
_useHeuristicCE(useHeuristicCE) {
initializeRewrites();
@@ -182,6 +184,10 @@ public:
return _rewriter._prefixId;
}
+ const QueryHints& getHints() const {
+ return _rewriter._hints;
+ }
+
auto& getIndexFieldPrefixMap() const {
return _rewriter._indexFieldPrefixMap;
}
@@ -567,8 +573,12 @@ static boost::optional<ABT> mergeSargableNodes(
const ScanDefinition& scanDef =
ctx.getMetadata()._scanDefs.at(indexingAvailability.getScanDefName());
- auto candidateIndexMap = computeCandidateIndexMap(
- ctx.getPrefixId(), scanProjName, mergedReqs, scanDef, hasEmptyInterval);
+ auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
+ scanProjName,
+ mergedReqs,
+ scanDef,
+ ctx.getHints()._fastIndexNullHandling,
+ hasEmptyInterval);
if (hasEmptyInterval) {
return createEmptyValueScanNode(ctx);
}
@@ -700,8 +710,12 @@ static void convertFilterToSargableNode(ABT::reference_type node,
return;
}
- auto candidateIndexMap = computeCandidateIndexMap(
- ctx.getPrefixId(), scanProjName, conversion->_reqMap, scanDef, hasEmptyInterval);
+ auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
+ scanProjName,
+ conversion->_reqMap,
+ scanDef,
+ ctx.getHints()._fastIndexNullHandling,
+ hasEmptyInterval);
if (hasEmptyInterval) {
addEmptyValueScanNode(ctx);
@@ -880,8 +894,12 @@ struct SubstituteConvert<EvaluationNode> {
}
bool hasEmptyInterval = false;
- auto candidateIndexMap = computeCandidateIndexMap(
- ctx.getPrefixId(), scanProjName, conversion->_reqMap, scanDef, hasEmptyInterval);
+ auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
+ scanProjName,
+ conversion->_reqMap,
+ scanDef,
+ ctx.getHints()._fastIndexNullHandling,
+ hasEmptyInterval);
if (hasEmptyInterval) {
addEmptyValueScanNode(ctx);
@@ -957,6 +975,30 @@ struct ExploreConvert<SargableNode> {
const bool indexFieldMapHasScanDef = indexFieldPrefixMapIt != indexFieldPrefixMap.cend();
const auto& reqMap = sargableNode.getReqMap();
+
+ const bool fastIndexNullHandling = ctx.getHints()._fastIndexNullHandling;
+ std::vector<bool> isFullyOpen;
+ std::vector<bool> maybeHasNullAndBinds;
+ {
+ // Pre-compute if a requirement's interval is fully open.
+ isFullyOpen.reserve(reqMap.size());
+ for (const auto& [key, req] : reqMap) {
+ isFullyOpen.push_back(isIntervalReqFullyOpenDNF(req.getIntervals()));
+ }
+
+ if (!fastIndexNullHandling && !isIndex) {
+ // Pre-compute if needed if a requirement's interval may contain nulls, and also has
+ // an output binding.
+ maybeHasNullAndBinds.reserve(reqMap.size());
+ for (const auto& [key, req] : reqMap) {
+ maybeHasNullAndBinds.push_back(req.hasBoundProjectionName() &&
+ checkMaybeHasNull(req.getIntervals()));
+ }
+ }
+ }
+
+ // We iterate over the possible ways to split N predicates into 2^N subsets, one goes to the
+ // left, and the other to the right side.
const size_t reqSize = reqMap.size();
const size_t highMask = isIndex ? (1ull << (reqSize - 1)) : (1ull << reqSize);
for (size_t mask = 1; mask < highMask; mask++) {
@@ -968,21 +1010,43 @@ struct ExploreConvert<SargableNode> {
size_t index = 0;
for (const auto& [key, req] : reqMap) {
- const bool fullyOpenInterval = isIntervalReqFullyOpenDNF(req.getIntervals());
+ const bool fullyOpenInterval = isFullyOpen.at(index);
if (((1ull << index) & mask) != 0) {
- leftReqs.emplace(key, req);
-
- if (!fullyOpenInterval) {
- hasLeftIntervals = true;
+ bool addedToLeft = false;
+ if (fastIndexNullHandling || isIndex) {
+ leftReqs.emplace(key, req);
+ addedToLeft = true;
+ } else if (maybeHasNullAndBinds.at(index)) {
+ // We cannot return index values if our interval can possibly contain Null.
+ // Instead, we remove the output binding for the left side, and return the
+ // value from the right (seek) side.
+ if (!fullyOpenInterval) {
+ leftReqs.emplace(key, PartialSchemaRequirement{"", req.getIntervals()});
+ addedToLeft = true;
+ }
+ rightReqs.emplace(
+ key,
+ PartialSchemaRequirement{req.getBoundProjectionName(),
+ IntervalReqExpr::makeSingularDNF()});
+ } else {
+ leftReqs.emplace(key, req);
+ addedToLeft = true;
}
- if (indexFieldMapHasScanDef) {
- if (auto pathPtr = key._path.cast<PathGet>(); pathPtr != nullptr &&
- indexFieldPrefixMapIt->second.count(pathPtr->name()) == 0) {
- // We have found a left requirement which cannot be covered with an
- // index.
- hasFieldCoverage = false;
- break;
+
+ if (addedToLeft) {
+ if (!fullyOpenInterval) {
+ hasLeftIntervals = true;
+ }
+
+ if (indexFieldMapHasScanDef) {
+ if (auto pathPtr = key._path.cast<PathGet>(); pathPtr != nullptr &&
+ indexFieldPrefixMapIt->second.count(pathPtr->name()) == 0) {
+ // We have found a left requirement which cannot be covered with an
+ // index.
+ hasFieldCoverage = false;
+ break;
+ }
}
}
} else {
@@ -995,6 +1059,12 @@ struct ExploreConvert<SargableNode> {
index++;
}
+ if (leftReqs.empty()) {
+ // Can happen if we have intervals containing null.
+ invariant(!fastIndexNullHandling && !isIndex);
+ continue;
+ }
+
if (isIndex && (!hasLeftIntervals || !hasRightIntervals)) {
// Reject. Must have at least one proper interval on either side.
continue;
@@ -1005,16 +1075,24 @@ struct ExploreConvert<SargableNode> {
}
bool hasEmptyLeftInterval = false;
- auto leftCandidateIndexMap = computeCandidateIndexMap(
- ctx.getPrefixId(), scanProjectionName, leftReqs, scanDef, hasEmptyLeftInterval);
+ auto leftCandidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
+ scanProjectionName,
+ leftReqs,
+ scanDef,
+ fastIndexNullHandling,
+ hasEmptyLeftInterval);
if (isIndex && leftCandidateIndexMap.empty()) {
// Reject rewrite.
continue;
}
bool hasEmptyRightInterval = false;
- auto rightCandidateIndexMap = computeCandidateIndexMap(
- ctx.getPrefixId(), scanProjectionName, rightReqs, scanDef, hasEmptyRightInterval);
+ auto rightCandidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
+ scanProjectionName,
+ rightReqs,
+ scanDef,
+ fastIndexNullHandling,
+ hasEmptyRightInterval);
if (isIndex && rightCandidateIndexMap.empty()) {
// With empty candidate map, reject only if we cannot implement as Seek.
continue;
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
index 197d9c24771..97684ec992d 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.h
@@ -58,7 +58,11 @@ public:
*/
using RewriteSet = opt::unordered_map<LogicalRewriteType, double>;
- LogicalRewriter(Memo& memo, PrefixId& prefixId, RewriteSet rewriteSet, bool useHeuristicCE);
+ LogicalRewriter(Memo& memo,
+ PrefixId& prefixId,
+ RewriteSet rewriteSet,
+ const QueryHints& hints,
+ bool useHeuristicCE);
LogicalRewriter() = delete;
LogicalRewriter(const LogicalRewriter& other) = delete;
@@ -117,6 +121,7 @@ private:
// We don't own those:
Memo& _memo;
PrefixId& _prefixId;
+ const QueryHints& _hints;
RewriteFnMap _rewriteMap;
diff --git a/src/mongo/db/query/optimizer/defs.h b/src/mongo/db/query/optimizer/defs.h
index 863521b3a03..15dce7fc94f 100644
--- a/src/mongo/db/query/optimizer/defs.h
+++ b/src/mongo/db/query/optimizer/defs.h
@@ -252,6 +252,10 @@ struct QueryHints {
// Disable Cascades branch-and-bound strategy, and fully evaluate all plans. Used in conjunction
// with keeping rejected plans.
bool _disableBranchAndBound = false;
+
+ // Controls if we prefer to cover queries which may return nulls with indexes, even though we
+ // may not distinguish between null and missing. Alternatively we always fetch (slower).
+ bool _fastIndexNullHandling = false;
};
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/opt_phase_manager.cpp b/src/mongo/db/query/optimizer/opt_phase_manager.cpp
index 6b79345ba0b..17e38eecad4 100644
--- a/src/mongo/db/query/optimizer/opt_phase_manager.cpp
+++ b/src/mongo/db/query/optimizer/opt_phase_manager.cpp
@@ -144,11 +144,9 @@ bool OptPhaseManager::runMemoLogicalRewrite(const OptPhase phase,
}
_memo.clear();
- logicalRewriter = std::make_unique<LogicalRewriter>(
- _memo,
- _prefixId,
- rewriteSet,
- phase == OptPhase::MemoSubstitutionPhase /* useHeuristicCE */);
+ const bool useHeuristicCE = phase == OptPhase::MemoSubstitutionPhase;
+ logicalRewriter =
+ std::make_unique<LogicalRewriter>(_memo, _prefixId, rewriteSet, _hints, useHeuristicCE);
rootGroupId = logicalRewriter->addRootNode(input);
if (runStandalone) {
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 098ef8632d3..176063563aa 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -1203,13 +1203,14 @@ TEST(PhysRewriter, FilterIndexing4) {
hints.emplace(PartialSchemaKey{"root", make<PathGet>("d", make<PathIdentity>())},
kDefaultSelectivity);
+ // Make sure the intervals do not contain Null.
ABT evalNode = make<EvaluationNode>(
"pa",
make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
std::move(scanNode));
ABT filterANode = make<FilterNode>(
- make<EvalFilter>(make<PathTraverse>(make<PathCompare>(Operations::Lt, Constant::int64(1)),
+ make<EvalFilter>(make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(1)),
PathTraverse::kSingleLevel),
make<Variable>("pa")),
std::move(evalNode));
@@ -1217,7 +1218,7 @@ TEST(PhysRewriter, FilterIndexing4) {
ABT filterBNode = make<FilterNode>(
make<EvalFilter>(
make<PathGet>("b",
- make<PathTraverse>(make<PathCompare>(Operations::Lt, Constant::int64(1)),
+ make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(1)),
PathTraverse::kSingleLevel)),
make<Variable>("root")),
std::move(filterANode));
@@ -1225,7 +1226,7 @@ TEST(PhysRewriter, FilterIndexing4) {
ABT filterCNode = make<FilterNode>(
make<EvalFilter>(
make<PathGet>("c",
- make<PathTraverse>(make<PathCompare>(Operations::Lt, Constant::int64(1)),
+ make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(1)),
PathTraverse::kSingleLevel)),
make<Variable>("root")),
std::move(filterBNode));
@@ -1233,7 +1234,7 @@ TEST(PhysRewriter, FilterIndexing4) {
ABT filterDNode = make<FilterNode>(
make<EvalFilter>(
make<PathGet>("d",
- make<PathTraverse>(make<PathCompare>(Operations::Lt, Constant::int64(1)),
+ make<PathTraverse>(make<PathCompare>(Operations::Gt, Constant::int64(1)),
PathTraverse::kSingleLevel)),
make<Variable>("root")),
std::move(filterCNode));
@@ -1279,21 +1280,21 @@ TEST(PhysRewriter, FilterIndexing4) {
"Filter []\n"
"| EvalFilter []\n"
"| | Variable [evalTemp_8]\n"
- "| PathCompare [Lt]\n"
+ "| PathCompare [Gt]\n"
"| Const [1]\n"
"Filter []\n"
"| EvalFilter []\n"
"| | Variable [evalTemp_7]\n"
- "| PathCompare [Lt]\n"
+ "| PathCompare [Gt]\n"
"| Const [1]\n"
"Filter []\n"
"| EvalFilter []\n"
"| | Variable [evalTemp_6]\n"
- "| PathCompare [Lt]\n"
+ "| PathCompare [Gt]\n"
"| Const [1]\n"
"IndexScan [{'<indexKey> 0': pa, '<indexKey> 1': evalTemp_6, '<indexKey> 2': evalTemp_7, "
- "'<indexKey> 3': evalTemp_8}, scanDefName: c1, indexDefName: index1, interval: {[Const "
- "[minKey], Const [1]), [Const [minKey], Const [maxKey]], [Const [minKey], Const [maxKey]], "
+ "'<indexKey> 3': evalTemp_8}, scanDefName: c1, indexDefName: index1, interval: {(Const "
+ "[1], Const [maxKey]], [Const [minKey], Const [maxKey]], [Const [minKey], Const [maxKey]], "
"[Const [minKey], Const [maxKey]]}]\n"
" BindBlock:\n"
" [evalTemp_6]\n"
@@ -1842,42 +1843,67 @@ TEST(PhysRewriter, CoveredScan) {
using namespace properties;
PrefixId prefixId;
+ PartialSchemaSelHints hints;
+ hints.emplace(PartialSchemaKey{"root", make<PathGet>("a", make<PathIdentity>())}, 0.01);
+
ABT scanNode = make<ScanNode>("root", "c1");
- ABT evalNode1 = make<EvaluationNode>(
+ ABT evalNode = make<EvaluationNode>(
"pa",
make<EvalPath>(make<PathGet>("a", make<PathIdentity>()), make<Variable>("root")),
std::move(scanNode));
+ ABT filterNode =
+ make<FilterNode>(make<EvalFilter>(make<PathCompare>(Operations::Lt, Constant::int32(1)),
+ make<Variable>("pa")),
+ std::move(evalNode));
+
ABT rootNode =
- make<RootNode>(ProjectionRequirement{ProjectionNameVector{"pa"}}, std::move(evalNode1));
+ make<RootNode>(ProjectionRequirement{ProjectionNameVector{"pa"}}, std::move(filterNode));
OptPhaseManager phaseManager(
{OptPhaseManager::OptPhase::MemoSubstitutionPhase,
OptPhaseManager::OptPhase::MemoExplorationPhase,
OptPhaseManager::OptPhase::MemoImplementationPhase},
prefixId,
+ false /*requireRID*/,
{{{"c1",
ScanDefinition{
{},
{{"index1",
makeIndexDefinition("a", CollationOp::Ascending, false /*isMultiKey*/)}}}}}},
+ std::make_unique<HintedCE>(std::move(hints)),
+ std::make_unique<DefaultCosting>(),
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = std::move(rootNode);
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_EQ(4, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_EQ(5, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ // Since we do not optimize with fast null handling, we need to split the predicate between the
+ // index scan and fetch in order to handle null.
ASSERT_EXPLAIN_V2(
"Root []\n"
"| | projections: \n"
"| | pa\n"
"| RefBlock: \n"
"| Variable [pa]\n"
- "IndexScan [{'<indexKey> 0': pa}, scanDefName: c1, indexDefName: index1, interval: {[Const "
- "[minKey], Const [maxKey]]}]\n"
+ "BinaryJoin [joinType: Inner, {rid_0}]\n"
+ "| | Const [true]\n"
+ "| LimitSkip []\n"
+ "| | limitSkip:\n"
+ "| | limit: 1\n"
+ "| | skip: 0\n"
+ "| Seek [ridProjection: rid_0, {'a': pa}, c1]\n"
+ "| | BindBlock:\n"
+ "| | [pa]\n"
+ "| | Source []\n"
+ "| RefBlock: \n"
+ "| Variable [rid_0]\n"
+ "IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const "
+ "[minKey], Const [1])}]\n"
" BindBlock:\n"
- " [pa]\n"
+ " [rid_0]\n"
" Source []\n",
optimized);
}
@@ -2082,9 +2108,11 @@ TEST(PhysRewriter, EvalIndexing2) {
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
+ phaseManager.getHints()._fastIndexNullHandling = true;
ASSERT_TRUE(phaseManager.optimize(optimized));
ASSERT_BETWEEN(10, 20, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ // Verify collation is subsumed into the index scan.
ASSERT_EXPLAIN_V2(
"Root []\n"
"| | projections: \n"
@@ -2566,62 +2594,35 @@ TEST(PhysRewriter, CompoundIndex3) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_BETWEEN(70, 130, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_BETWEEN(40, 60, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// Demonstrate we have a merge join because we have point predicates.
- ASSERT_EXPLAIN_V2(
- "Root []\n"
- "| | projections: \n"
- "| | root\n"
- "| RefBlock: \n"
- "| Variable [root]\n"
- "Collation []\n"
- "| | collation: \n"
- "| | pa: Ascending\n"
- "| | pb: Ascending\n"
- "| RefBlock: \n"
- "| Variable [pa]\n"
- "| Variable [pb]\n"
- "BinaryJoin [joinType: Inner, {rid_0}]\n"
- "| | Const [true]\n"
- "| LimitSkip []\n"
- "| | limitSkip:\n"
- "| | limit: 1\n"
- "| | skip: 0\n"
- "| Seek [ridProjection: rid_0, {'<root>': root, 'a': pa, 'b': pb}, c1]\n"
- "| | BindBlock:\n"
- "| | [pa]\n"
- "| | Source []\n"
- "| | [pb]\n"
- "| | Source []\n"
- "| | [root]\n"
- "| | Source []\n"
- "| RefBlock: \n"
- "| Variable [rid_0]\n"
- "MergeJoin []\n"
- "| | | Condition\n"
- "| | | rid_0 = rid_1\n"
- "| | Collation\n"
- "| | Ascending\n"
- "| Union []\n"
- "| | BindBlock:\n"
- "| | [rid_1]\n"
- "| | Source []\n"
- "| Evaluation []\n"
- "| | BindBlock:\n"
- "| | [rid_1]\n"
- "| | Variable [rid_0]\n"
- "| IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index2, interval: {[Const "
- "[2], Const [2]], [Const [4], Const [4]]}]\n"
- "| BindBlock:\n"
- "| [rid_0]\n"
- "| Source []\n"
- "IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const "
- "[1], Const [1]], [Const [3], Const [3]]}]\n"
- " BindBlock:\n"
- " [rid_0]\n"
- " Source []\n",
- optimized);
+ const BSONObj& explainRoot = ExplainGenerator::explainBSONObj(optimized);
+ ASSERT_BSON_PATH("\"Collation\"", explainRoot, "child.nodeType");
+ ASSERT_BSON_PATH("\"pa\"", explainRoot, "child.collation.0.projectionName");
+ ASSERT_BSON_PATH("\"pb\"", explainRoot, "child.collation.1.projectionName");
+ ASSERT_BSON_PATH("\"BinaryJoin\"", explainRoot, "child.child.nodeType");
+ ASSERT_BSON_PATH("\"Seek\"", explainRoot, "child.child.rightChild.child.nodeType");
+ ASSERT_BSON_PATH("\"MergeJoin\"", explainRoot, "child.child.leftChild.nodeType");
+
+ const BSONObj& explainIndex1 =
+ dotted_path_support::extractElementAtPath(
+ explainRoot, "child.child.leftChild.rightChild.children.0.child")
+ .Obj();
+ ASSERT_BSON_PATH("\"IndexScan\"", explainIndex1, "nodeType");
+ ASSERT_BSON_PATH("2", explainIndex1, "interval.0.lowBound.bound.value");
+ ASSERT_BSON_PATH("2", explainIndex1, "interval.0.highBound.bound.value");
+ ASSERT_BSON_PATH("4", explainIndex1, "interval.1.lowBound.bound.value");
+ ASSERT_BSON_PATH("4", explainIndex1, "interval.1.highBound.bound.value");
+
+ const BSONObj& explainIndex2 =
+ dotted_path_support::extractElementAtPath(explainRoot, "child.child.leftChild.leftChild")
+ .Obj();
+ ASSERT_BSON_PATH("\"IndexScan\"", explainIndex2, "nodeType");
+ ASSERT_BSON_PATH("1", explainIndex2, "interval.0.lowBound.bound.value");
+ ASSERT_BSON_PATH("1", explainIndex2, "interval.0.highBound.bound.value");
+ ASSERT_BSON_PATH("3", explainIndex2, "interval.1.lowBound.bound.value");
+ ASSERT_BSON_PATH("3", explainIndex2, "interval.1.highBound.bound.value");
}
TEST(PhysRewriter, CompoundIndex4Negative) {
@@ -2891,7 +2892,7 @@ TEST(PhysRewriter, IndexBoundsIntersect2) {
ABT optimized = rootNode;
ASSERT_TRUE(phaseManager.optimize(optimized));
- ASSERT_EQ(6, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+ ASSERT_EQ(4, phaseManager.getMemo().getStats()._physPlanExplorationCount);
// Demonstrate we can intersect the bounds here because composition does not contain
// traverse.
@@ -3176,6 +3177,7 @@ TEST(PhysRewriter, IndexResidualReq1) {
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
+ phaseManager.getHints()._fastIndexNullHandling = true;
ASSERT_TRUE(phaseManager.optimize(optimized));
ASSERT_BETWEEN(25, 45, phaseManager.getMemo().getStats()._physPlanExplorationCount);
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index f08f3262ca6..1893840a40a 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -32,6 +32,7 @@
#include "mongo/db/query/optimizer/index_bounds.h"
#include "mongo/db/query/optimizer/metadata.h"
#include "mongo/db/query/optimizer/reference_tracker.h"
+#include "mongo/db/query/optimizer/rewrites/const_eval.h"
#include "mongo/db/query/optimizer/syntax/path.h"
#include "mongo/db/query/optimizer/utils/interval_utils.h"
#include "mongo/db/storage/storage_parameters_gen.h"
@@ -1011,10 +1012,24 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
const ProjectionName& scanProjectionName,
const PartialSchemaRequirements& reqMap,
const ScanDefinition& scanDef,
+ const bool fastNullHandling,
bool& hasEmptyInterval) {
CandidateIndexMap result;
hasEmptyInterval = false;
+ // Contains one instance for each unmatched key.
+ PartialSchemaKeySet unsatisfiedKeysInitial;
+ for (const auto& [key, req] : reqMap) {
+ unsatisfiedKeysInitial.insert(key);
+
+ if (!fastNullHandling && req.hasBoundProjectionName() &&
+ checkMaybeHasNull(req.getIntervals())) {
+ // We cannot use indexes to return values for fields if we have an interval with null
+ // bounds.
+ return {};
+ }
+ }
+
for (const auto& [indexDefName, indexDef] : scanDef.getIndexDefs()) {
FieldProjectionMap indexProjectionMap;
auto intervals = MultiKeyIntervalReqExpr::makeSingularDNF(); // Singular empty interval.
@@ -1023,12 +1038,7 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
ResidualKeyMap residualKeyMap;
opt::unordered_set<size_t> fieldsToCollate;
size_t intervalPrefixSize = 0;
-
- // Contains one instance for each unmatched key.
- PartialSchemaKeySet unsatisfiedKeys;
- for (const auto& [key, req] : reqMap) {
- unsatisfiedKeys.insert(key);
- }
+ PartialSchemaKeySet unsatisfiedKeys = unsatisfiedKeysInitial;
// True if the paths from partial schema requirements form a strict prefix of the index
// collation.
@@ -1163,6 +1173,60 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
return result;
}
+/**
+ * Transport that checks if we have a primitive interval which may contain null.
+ */
+class PartialSchemaReqMayContainNullTransport {
+public:
+ bool transport(const IntervalReqExpr::Atom& node) {
+ const auto& interval = node.getExpr();
+
+ if (const auto& lowBound = interval.getLowBound();
+ foldFn(make<BinaryOp>(lowBound.isInclusive() ? Operations::Gt : Operations::Gte,
+ lowBound.getBound(),
+ Constant::null())) == Constant::boolean(true)) {
+ // Lower bound is strictly larger than null, or equal to null but not inclusive.
+ return false;
+ }
+ if (const auto& highBound = interval.getHighBound();
+ foldFn(make<BinaryOp>(highBound.isInclusive() ? Operations::Lt : Operations::Lte,
+ highBound.getBound(),
+ Constant::null())) == Constant::boolean(true)) {
+ // Upper bound is strictly smaller than null, or equal to null but not inclusive.
+ return false;
+ }
+
+ return true;
+ }
+
+ bool transport(const IntervalReqExpr::Conjunction& node, std::vector<bool> childResults) {
+ return std::all_of(
+ childResults.cbegin(), childResults.cend(), [](const bool v) { return v; });
+ }
+
+ bool transport(const IntervalReqExpr::Disjunction& node, std::vector<bool> childResults) {
+ return std::any_of(
+ childResults.cbegin(), childResults.cend(), [](const bool v) { return v; });
+ }
+
+ bool check(const IntervalReqExpr::Node& intervals) {
+ return algebra::transport<false>(intervals, *this);
+ }
+
+private:
+ ABT foldFn(ABT expr) {
+ // Performs constant folding.
+ VariableEnvironment env = VariableEnvironment::build(expr);
+ ConstEval instance(env);
+ instance.optimize(expr);
+ return expr;
+ };
+};
+
+bool checkMaybeHasNull(const IntervalReqExpr::Node& intervals) {
+ return PartialSchemaReqMayContainNullTransport{}.check(intervals);
+}
+
class PartialSchemaReqLowerTransport {
public:
PartialSchemaReqLowerTransport(const bool hasBoundProjName)
@@ -1176,9 +1240,6 @@ public:
if (interval.isEquality()) {
if (auto constPtr = lowBound.getBound().cast<Constant>()) {
if (constPtr->isNull()) {
- uassert(6624163,
- "Cannot lower null index bound with bound projection",
- !_hasBoundProjName);
return make<PathComposeA>(make<PathDefault>(Constant::boolean(true)),
make<PathCompare>(Operations::Eq, Constant::null()));
}
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 1cf7f20d3f0..09a18cc8817 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -222,9 +222,16 @@ CandidateIndexMap computeCandidateIndexMap(PrefixId& prefixId,
const ProjectionName& scanProjectionName,
const PartialSchemaRequirements& reqMap,
const ScanDefinition& scanDef,
+ bool fastNullHandling,
bool& hasEmptyInterval);
/**
+ * Checks if we have an interval tree which has at least one atomic interval which may include Null
+ * as an endpoint.
+ */
+bool checkMaybeHasNull(const IntervalReqExpr::Node& intervals);
+
+/**
* Used to lower a Sargable node to a subtree consisting of functionally equivalent Filter and Eval
* nodes.
*/