summaryrefslogtreecommitdiff
path: root/src/mongo/db/query
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2022-05-27 19:35:12 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-27 20:47:40 +0000
commitc812983558ae5a4543db6b16f26ce1190edfdef7 (patch)
treec5c6f7f06d9a7500e944f4ebeb9ddd3e6f8cf40c /src/mongo/db/query
parenta474d4efa5a999526e696423af5d30f4f5187613 (diff)
downloadmongo-c812983558ae5a4543db6b16f26ce1190edfdef7.tar.gz
SERVER-62961 Fix ABT->SBE lowering of EvalFilter paths to correctly handle comparisons with arrays
Diffstat (limited to 'src/mongo/db/query')
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp55
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h1
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp106
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp107
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h5
5 files changed, 255 insertions, 19 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index ee2bd9268a5..fd6bf9e1e40 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -34,6 +34,7 @@ namespace mongo::optimizer::cascades {
LogicalRewriter::RewriteSet LogicalRewriter::_explorationSet = {
{LogicalRewriteType::GroupByExplore, 1},
+ {LogicalRewriteType::FilterExplore, 1},
{LogicalRewriteType::SargableSplit, 2},
{LogicalRewriteType::FilterRIDIntersectReorder, 2},
{LogicalRewriteType::EvaluationRIDIntersectReorder, 2}};
@@ -597,12 +598,13 @@ struct SubstituteConvert<LimitSkipNode> {
}
};
-static void addElemMatchAndSargableNode(const ABT& node, ABT sargableNode, RewriteContext& ctx) {
+static void addSargableChildNode(const ABT& node, ABT sargableNode, RewriteContext& ctx) {
ABT newNode = node;
newNode.cast<FilterNode>()->getChild() = std::move(sargableNode);
ctx.addNode(newNode, false /*substitute*/, true /*addExistingNodeWithNewChild*/);
}
+template <bool isSubstitution>
static void convertFilterToSargableNode(ABT::reference_type node,
const FilterNode& filterNode,
RewriteContext& ctx) {
@@ -643,6 +645,16 @@ static void convertFilterToSargableNode(ABT::reference_type node,
!isIntervalReqFullyOpenDNF(entry.second.getIntervals()));
}
+ // If in substitution mode, disallow retaining original predicate. If in exploration mode, only
+ // allow retaining the original predicate and if we have at least one index available.
+ if constexpr (isSubstitution) {
+ if (conversion._retainPredicate) {
+ return;
+ }
+ } else if (!conversion._retainPredicate || scanDef.getIndexDefs().empty()) {
+ return;
+ }
+
bool hasEmptyInterval = false;
auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
indexingAvailability.getScanProjection(),
@@ -658,7 +670,31 @@ static void convertFilterToSargableNode(ABT::reference_type node,
IndexReqTarget::Complete,
filterNode.getChild());
- ctx.addNode(sargableNode, true /*substitute*/);
+ if (conversion._retainPredicate) {
+ const GroupIdType childGroupId =
+ filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId();
+ if (childGroupId == indexingAvailability.getScanGroupId()) {
+ // Add node directly.
+ addSargableChildNode(node, std::move(sargableNode), ctx);
+ } else {
+ // Look at the child group to find SargableNodes and attempt to combine.
+ const auto& logicalNodes = ctx.getMemo().getGroup(childGroupId)._logicalNodes;
+ for (const ABT& childNode : logicalNodes.getVector()) {
+ if (auto childSargableNode = childNode.cast<SargableNode>();
+ childSargableNode != nullptr) {
+ const auto& result = mergeSargableNodes(indexingAvailability,
+ *sargableNode.cast<SargableNode>(),
+ *childSargableNode,
+ ctx);
+ if (result) {
+ addSargableChildNode(node, std::move(*result), ctx);
+ }
+ }
+ }
+ }
+ } else {
+ ctx.addNode(sargableNode, isSubstitution);
+ }
}
}
@@ -706,7 +742,7 @@ struct SubstituteConvert<FilterNode> {
}
}
- convertFilterToSargableNode(node, filterNode, ctx);
+ convertFilterToSargableNode<true /*isSubstitution*/>(node, filterNode, ctx);
}
};
@@ -779,6 +815,10 @@ struct SubstituteConvert<EvaluationNode> {
// We still want to extract sargable nodes from EvalNode to use for PhysicalScans.
PartialSchemaReqConversion conversion =
convertExprToPartialSchemaReq(evalNode.getProjection());
+ uassert(6624165,
+ "Should not be getting retainPredicate set for EvalNodes",
+ !conversion._retainPredicate);
+
if (!conversion._success || conversion._reqMap.size() != 1) {
// For evaluation nodes we expect to create a single entry.
return;
@@ -975,6 +1015,13 @@ struct ExploreConvert<SargableNode> {
};
template <>
+struct ExploreConvert<FilterNode> {
+ void operator()(ABT::reference_type node, RewriteContext& ctx) {
+ convertFilterToSargableNode<false /*isSubstitution*/>(node, *node.cast<FilterNode>(), ctx);
+ }
+};
+
+template <>
struct ExploreConvert<GroupByNode> {
void operator()(ABT::reference_type node, RewriteContext& ctx) {
const GroupByNode& groupByNode = *node.cast<GroupByNode>();
@@ -1167,6 +1214,8 @@ void LogicalRewriter::initializeRewrites() {
LogicalRewriteType::ExchangeValueScanPropagate,
&LogicalRewriter::bindAboveBelow<ExchangeNode, ValueScanNode, SubstituteReorder>);
+ registerRewrite(LogicalRewriteType::FilterExplore,
+ &LogicalRewriter::bindSingleNode<FilterNode, ExploreConvert>);
registerRewrite(LogicalRewriteType::GroupByExplore,
&LogicalRewriter::bindSingleNode<GroupByNode, ExploreConvert>);
registerRewrite(LogicalRewriteType::SargableSplit,
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h b/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h
index ab562c685c8..227b2b7ba03 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h
@@ -75,6 +75,7 @@ namespace mongo::optimizer::cascades {
\
/* Convert filter and evaluation nodes into sargable nodes */ \
F(FilterSubstitute) \
+ F(FilterExplore) \
F(EvaluationSubstitute) \
F(SargableSplit) \
F(FilterRIDIntersectReorder) \
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 9ff870e6117..6f6f6c743ed 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -3478,6 +3478,109 @@ TEST(PhysRewriter, ObjectElemMatch) {
optimized);
}
+TEST(PhysRewriter, ArrayConstantIndex) {
+ using namespace properties;
+ PrefixId prefixId;
+
+ ABT scanNode = make<ScanNode>("root", "c1");
+
+ ABT filterNode1 = make<FilterNode>(
+ make<EvalFilter>(
+ make<PathGet>(
+ "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(0)))),
+ make<Variable>("root")),
+ std::move(scanNode));
+
+ const auto [tag, val] = sbe::value::makeNewArray();
+ sbe::value::Array* arr = sbe::value::getArrayView(val);
+ for (int i = 0; i < 3; i++) {
+ arr->push_back(sbe::value::TypeTags::NumberInt32, i + 1);
+ }
+ ABT arrayConst = make<Constant>(tag, val);
+
+ // This encodes a match against an array constant.
+ ABT filterNode2 = make<FilterNode>(
+ make<EvalFilter>(
+ make<PathGet>("a",
+ make<PathComposeA>(
+ make<PathTraverse>(make<PathCompare>(Operations::Eq, arrayConst)),
+ make<PathCompare>(Operations::Eq, arrayConst))),
+ make<Variable>("root")),
+ std::move(filterNode1));
+
+ ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"root"}},
+ std::move(filterNode2));
+
+ OptPhaseManager phaseManager(
+ {OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+ OptPhaseManager::OptPhase::MemoExplorationPhase,
+ OptPhaseManager::OptPhase::MemoImplementationPhase},
+ prefixId,
+ {{{"c1",
+ ScanDefinition{{},
+ {{"index1",
+ makeCompositeIndexDefinition(
+ {{"b", CollationOp::Ascending, true /*isMultiKey*/},
+ {"a", CollationOp::Ascending, true /*isMultiKey*/}})}}}}}},
+ {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
+
+ ABT optimized = rootNode;
+ ASSERT_TRUE(phaseManager.optimize(optimized));
+ ASSERT_BETWEEN(8, 12, phaseManager.getMemo().getStats()._physPlanExplorationCount);
+
+ // Demonstrate we get index bounds to handle the array constant, while we also retain the
+ // original filter. We have index bound with the array itself unioned with bound using the first
+ // array element.
+ ASSERT_EXPLAIN_V2(
+ "Root []\n"
+ "| | projections: \n"
+ "| | root\n"
+ "| RefBlock: \n"
+ "| Variable [root]\n"
+ "Filter []\n"
+ "| EvalFilter []\n"
+ "| | Variable [root]\n"
+ "| PathGet [a]\n"
+ "| PathComposeA []\n"
+ "| | PathCompare [Eq]\n"
+ "| | Const [[1, 2, 3]]\n"
+ "| PathTraverse []\n"
+ "| PathCompare [Eq]\n"
+ "| Const [[1, 2, 3]]\n"
+ "BinaryJoin [joinType: Inner, {rid_0}]\n"
+ "| | Const [true]\n"
+ "| LimitSkip []\n"
+ "| | limitSkip:\n"
+ "| | limit: 1\n"
+ "| | skip: 0\n"
+ "| Seek [ridProjection: rid_0, {'<root>': root}, c1]\n"
+ "| | BindBlock:\n"
+ "| | [root]\n"
+ "| | Source []\n"
+ "| RefBlock: \n"
+ "| Variable [rid_0]\n"
+ "GroupBy []\n"
+ "| | groupings: \n"
+ "| | RefBlock: \n"
+ "| | Variable [rid_0]\n"
+ "| aggregations: \n"
+ "Union []\n"
+ "| | BindBlock:\n"
+ "| | [rid_0]\n"
+ "| | Source []\n"
+ "| IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const "
+ "[0], Const [0]], [Const [1], Const [1]]}]\n"
+ "| BindBlock:\n"
+ "| [rid_0]\n"
+ "| Source []\n"
+ "IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const "
+ "[0], Const [0]], [Const [[1, 2, 3]], Const [[1, 2, 3]]]}]\n"
+ " BindBlock:\n"
+ " [rid_0]\n"
+ " Source []\n",
+ optimized);
+}
+
TEST(PhysRewriter, ParallelScan) {
using namespace properties;
PrefixId prefixId;
@@ -4213,6 +4316,7 @@ TEST(PhysRewriter, PartialIndex1) {
make<Variable>("root")));
ASSERT_TRUE(conversionResult._success);
ASSERT_FALSE(conversionResult._hasEmptyInterval);
+ ASSERT_FALSE(conversionResult._retainPredicate);
OptPhaseManager phaseManager(
{OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4289,6 +4393,7 @@ TEST(PhysRewriter, PartialIndex2) {
make<Variable>("root")));
ASSERT_TRUE(conversionResult._success);
ASSERT_FALSE(conversionResult._hasEmptyInterval);
+ ASSERT_FALSE(conversionResult._retainPredicate);
OptPhaseManager phaseManager(
{OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4363,6 +4468,7 @@ TEST(PhysRewriter, PartialIndexReject) {
make<Variable>("root")));
ASSERT_TRUE(conversionResult._success);
ASSERT_FALSE(conversionResult._hasEmptyInterval);
+ ASSERT_FALSE(conversionResult._retainPredicate);
OptPhaseManager phaseManager(
{OptPhaseManager::OptPhase::MemoSubstitutionPhase,
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index 94fb5a2647d..322ae174570 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -343,7 +343,8 @@ PartialSchemaReqConversion::PartialSchemaReqConversion()
_reqMap(),
_hasIntersected(false),
_hasTraversed(false),
- _hasEmptyInterval(false) {}
+ _hasEmptyInterval(false),
+ _retainPredicate(false) {}
PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements reqMap)
: _success(true),
@@ -351,7 +352,8 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements
_reqMap(std::move(reqMap)),
_hasIntersected(false),
_hasTraversed(false),
- _hasEmptyInterval(false) {}
+ _hasEmptyInterval(false),
+ _retainPredicate(false) {}
PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound)
: _success(true),
@@ -359,7 +361,8 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound)
_reqMap(),
_hasIntersected(false),
_hasTraversed(false),
- _hasEmptyInterval(false) {}
+ _hasEmptyInterval(false),
+ _retainPredicate(false) {}
/**
* Helper class that builds PartialSchemaRequirements property from an EvalFilter or an EvalPath.
@@ -391,6 +394,7 @@ public:
PartialSchemaReqConversion result{std::move(newMap)};
result._hasEmptyInterval = pathResult._hasEmptyInterval;
+ result._retainPredicate = pathResult._retainPredicate;
return result;
}
@@ -421,12 +425,12 @@ public:
return {};
}
- auto& leftReq = leftResult._reqMap;
- auto& rightReq = rightResult._reqMap;
+ auto& leftReqMap = leftResult._reqMap;
+ auto& rightReqMap = rightResult._reqMap;
if (isMultiplicative) {
{
ProjectionRenames projectionRenames;
- if (!intersectPartialSchemaReq(leftReq, rightReq, projectionRenames)) {
+ if (!intersectPartialSchemaReq(leftReqMap, rightReqMap, projectionRenames)) {
return {};
}
if (!projectionRenames.empty()) {
@@ -437,7 +441,7 @@ public:
if (!leftResult._hasTraversed && !rightResult._hasTraversed) {
// Intersect intervals only if we have not traversed. E.g. (-inf, 90) ^ (70, +inf)
// becomes (70, 90).
- for (auto& [key, req] : leftReq) {
+ for (auto& [key, req] : leftReqMap) {
auto newIntervals = intersectDNFIntervals(req.getIntervals());
if (newIntervals) {
req.getIntervals() = std::move(newIntervals.get());
@@ -446,7 +450,7 @@ public:
break;
}
}
- } else if (leftReq.size() > 1) {
+ } else if (leftReqMap.size() > 1) {
// Reject if we have traversed, and composed requirements are more than one.
return {};
}
@@ -457,20 +461,91 @@ public:
// We can only perform additive composition (OR) if we have a single matching key on both
// sides.
- if (leftReq.size() != 1 || rightReq.size() != 1) {
+ if (leftReqMap.size() != 1 || rightReqMap.size() != 1) {
return {};
}
- auto leftEntry = leftReq.begin();
- auto rightEntry = rightReq.begin();
- if (!(leftEntry->first == rightEntry->first)) {
+ auto leftEntry = leftReqMap.begin();
+ auto rightEntry = rightReqMap.begin();
+ auto& [leftKey, leftReq] = *leftEntry;
+ auto& [rightKey, rightReq] = *rightEntry;
+
+ if (leftKey == rightKey) {
+ combineIntervalsDNF(false /*intersect*/,
+ leftEntry->second.getIntervals(),
+ rightEntry->second.getIntervals());
+ return leftResult;
+ }
+
+ // Here we can combine if paths differ only by a Traverse element and both intervals
+ // are the same, with array bounds. For example:
+ // Left: Id, [[1, 2, 3], [1, 2, 3]]
+ // Right: Traverse Id [[1, 2, 3], [1, 2, 3]]
+ // We can then combine into:
+ // Traverse Id: [[1, 2, 3], [1, 2, 3]] OR [1, 1]
+ // We also need to retain the original filter.
+
+ if (leftKey._projectionName != rightKey._projectionName) {
+ return {};
+ }
+ if (leftReq.hasBoundProjectionName() || rightReq.hasBoundProjectionName()) {
+ return {};
+ }
+
+ auto& leftIntervals = leftReq.getIntervals();
+ auto& rightIntervals = rightReq.getIntervals();
+ const auto& leftInterval = IntervalReqExpr::getSingularDNF(leftIntervals);
+ const auto& rightInterval = IntervalReqExpr::getSingularDNF(rightIntervals);
+ if (!leftInterval || !rightInterval || leftInterval != rightInterval) {
+ return {};
+ }
+ if (!leftInterval->isEquality() || !rightInterval->isEquality()) {
+ // For now only supporting equalities.
return {};
}
- combineIntervalsDNF(false /*intersect*/,
- leftEntry->second.getIntervals(),
- rightEntry->second.getIntervals());
- return leftResult;
+ const ABT& bound = leftInterval->getLowBound().getBound();
+ const auto constBoundPtr = bound.cast<Constant>();
+ if (constBoundPtr == nullptr) {
+ return {};
+ }
+ const auto [tag, val] = constBoundPtr->get();
+ if (tag != sbe::value::TypeTags::Array) {
+ return {};
+ }
+ const sbe::value::Array* arr = sbe::value::getArrayView(val);
+ if (arr->size() == 0) {
+ // For now we do not support empty arrays. Need to translate into null bounds.
+ return {};
+ }
+
+ const auto [elTag, elVal] = arr->getAt(0);
+ const auto [elTagCopy, elValCopy] = sbe::value::copyValue(elTag, elVal);
+ ABT elementBound = make<Constant>(elTagCopy, elValCopy);
+ // Create new interval which uses the first element of the array.
+ const IntervalReqExpr::Node& newInterval =
+ IntervalReqExpr::makeSingularDNF(IntervalRequirement{
+ {true /*inclusive*/, elementBound}, {true /*inclusive*/, elementBound}});
+
+ const ABT& leftPath = leftKey._path;
+ const ABT& rightPath = rightKey._path;
+ if (const auto leftTraversePtr = leftPath.cast<PathTraverse>();
+ leftTraversePtr != nullptr && leftTraversePtr->getPath().is<PathIdentity>() &&
+ rightPath.is<PathIdentity>()) {
+ // leftPath = Id, rightPath = Traverse Id.
+ combineIntervalsDNF(false /*intersect*/, leftIntervals, newInterval);
+ leftResult._retainPredicate = true;
+ return leftResult;
+ } else if (const auto rightTraversePtr = rightPath.cast<PathTraverse>();
+ rightTraversePtr != nullptr && rightTraversePtr->getPath().is<PathIdentity>() &&
+ leftPath.is<PathIdentity>()) {
+ // leftPath = Traverse Id, rightPath = Id.
+ combineIntervalsDNF(false /*intersect*/, rightIntervals, newInterval);
+ rightResult._retainPredicate = true;
+ return rightResult;
+ }
+
+ return {};
}
PartialSchemaReqConversion transport(const ABT& n,
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index a42fd47714b..42845f0ce95 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -176,6 +176,11 @@ struct PartialSchemaReqConversion {
// If we have determined that we have a contradiction.
bool _hasEmptyInterval;
+
+ // If true, retain original predicate after the conversion. In this case, the requirement map
+ // might capture only a part of the predicate.
+ // TODO: consider generalizing to retain only a part of the predicate.
+ bool _retainPredicate;
};
/**