diff options
author | Svilen Mihaylov <svilen.mihaylov@mongodb.com> | 2022-05-27 19:35:12 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-05-27 20:47:40 +0000 |
commit | c812983558ae5a4543db6b16f26ce1190edfdef7 (patch) | |
tree | c5c6f7f06d9a7500e944f4ebeb9ddd3e6f8cf40c /src/mongo/db/query | |
parent | a474d4efa5a999526e696423af5d30f4f5187613 (diff) | |
download | mongo-c812983558ae5a4543db6b16f26ce1190edfdef7.tar.gz |
SERVER-62961 Fix ABT->SBE lowering of EvalFilter paths to correctly handle comparisons with arrays
Diffstat (limited to 'src/mongo/db/query')
5 files changed, 255 insertions, 19 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp index ee2bd9268a5..fd6bf9e1e40 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp @@ -34,6 +34,7 @@ namespace mongo::optimizer::cascades { LogicalRewriter::RewriteSet LogicalRewriter::_explorationSet = { {LogicalRewriteType::GroupByExplore, 1}, + {LogicalRewriteType::FilterExplore, 1}, {LogicalRewriteType::SargableSplit, 2}, {LogicalRewriteType::FilterRIDIntersectReorder, 2}, {LogicalRewriteType::EvaluationRIDIntersectReorder, 2}}; @@ -597,12 +598,13 @@ struct SubstituteConvert<LimitSkipNode> { } }; -static void addElemMatchAndSargableNode(const ABT& node, ABT sargableNode, RewriteContext& ctx) { +static void addSargableChildNode(const ABT& node, ABT sargableNode, RewriteContext& ctx) { ABT newNode = node; newNode.cast<FilterNode>()->getChild() = std::move(sargableNode); ctx.addNode(newNode, false /*substitute*/, true /*addExistingNodeWithNewChild*/); } +template <bool isSubstitution> static void convertFilterToSargableNode(ABT::reference_type node, const FilterNode& filterNode, RewriteContext& ctx) { @@ -643,6 +645,16 @@ static void convertFilterToSargableNode(ABT::reference_type node, !isIntervalReqFullyOpenDNF(entry.second.getIntervals())); } + // If in substitution mode, disallow retaining original predicate. If in exploration mode, only + // allow retaining the original predicate and if we have at least one index available. + if constexpr (isSubstitution) { + if (conversion._retainPredicate) { + return; + } + } else if (!conversion._retainPredicate || scanDef.getIndexDefs().empty()) { + return; + } + bool hasEmptyInterval = false; auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(), indexingAvailability.getScanProjection(), @@ -658,7 +670,31 @@ static void convertFilterToSargableNode(ABT::reference_type node, IndexReqTarget::Complete, filterNode.getChild()); - ctx.addNode(sargableNode, true /*substitute*/); + if (conversion._retainPredicate) { + const GroupIdType childGroupId = + filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId(); + if (childGroupId == indexingAvailability.getScanGroupId()) { + // Add node directly. + addSargableChildNode(node, std::move(sargableNode), ctx); + } else { + // Look at the child group to find SargableNodes and attempt to combine. + const auto& logicalNodes = ctx.getMemo().getGroup(childGroupId)._logicalNodes; + for (const ABT& childNode : logicalNodes.getVector()) { + if (auto childSargableNode = childNode.cast<SargableNode>(); + childSargableNode != nullptr) { + const auto& result = mergeSargableNodes(indexingAvailability, + *sargableNode.cast<SargableNode>(), + *childSargableNode, + ctx); + if (result) { + addSargableChildNode(node, std::move(*result), ctx); + } + } + } + } + } else { + ctx.addNode(sargableNode, isSubstitution); + } } } @@ -706,7 +742,7 @@ struct SubstituteConvert<FilterNode> { } } - convertFilterToSargableNode(node, filterNode, ctx); + convertFilterToSargableNode<true /*isSubstitution*/>(node, filterNode, ctx); } }; @@ -779,6 +815,10 @@ struct SubstituteConvert<EvaluationNode> { // We still want to extract sargable nodes from EvalNode to use for PhysicalScans. PartialSchemaReqConversion conversion = convertExprToPartialSchemaReq(evalNode.getProjection()); + uassert(6624165, + "Should not be getting retainPredicate set for EvalNodes", + !conversion._retainPredicate); + if (!conversion._success || conversion._reqMap.size() != 1) { // For evaluation nodes we expect to create a single entry. return; @@ -975,6 +1015,13 @@ struct ExploreConvert<SargableNode> { }; template <> +struct ExploreConvert<FilterNode> { + void operator()(ABT::reference_type node, RewriteContext& ctx) { + convertFilterToSargableNode<false /*isSubstitution*/>(node, *node.cast<FilterNode>(), ctx); + } +}; + +template <> struct ExploreConvert<GroupByNode> { void operator()(ABT::reference_type node, RewriteContext& ctx) { const GroupByNode& groupByNode = *node.cast<GroupByNode>(); @@ -1167,6 +1214,8 @@ void LogicalRewriter::initializeRewrites() { LogicalRewriteType::ExchangeValueScanPropagate, &LogicalRewriter::bindAboveBelow<ExchangeNode, ValueScanNode, SubstituteReorder>); + registerRewrite(LogicalRewriteType::FilterExplore, + &LogicalRewriter::bindSingleNode<FilterNode, ExploreConvert>); registerRewrite(LogicalRewriteType::GroupByExplore, &LogicalRewriter::bindSingleNode<GroupByNode, ExploreConvert>); registerRewrite(LogicalRewriteType::SargableSplit, diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h b/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h index ab562c685c8..227b2b7ba03 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter_rules.h @@ -75,6 +75,7 @@ namespace mongo::optimizer::cascades { \ /* Convert filter and evaluation nodes into sargable nodes */ \ F(FilterSubstitute) \ + F(FilterExplore) \ F(EvaluationSubstitute) \ F(SargableSplit) \ F(FilterRIDIntersectReorder) \ diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp index 9ff870e6117..6f6f6c743ed 100644 --- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp @@ -3478,6 +3478,109 @@ TEST(PhysRewriter, ObjectElemMatch) { optimized); } +TEST(PhysRewriter, ArrayConstantIndex) { + using namespace properties; + PrefixId prefixId; + + ABT scanNode = make<ScanNode>("root", "c1"); + + ABT filterNode1 = make<FilterNode>( + make<EvalFilter>( + make<PathGet>( + "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(0)))), + make<Variable>("root")), + std::move(scanNode)); + + const auto [tag, val] = sbe::value::makeNewArray(); + sbe::value::Array* arr = sbe::value::getArrayView(val); + for (int i = 0; i < 3; i++) { + arr->push_back(sbe::value::TypeTags::NumberInt32, i + 1); + } + ABT arrayConst = make<Constant>(tag, val); + + // This encodes a match against an array constant. + ABT filterNode2 = make<FilterNode>( + make<EvalFilter>( + make<PathGet>("a", + make<PathComposeA>( + make<PathTraverse>(make<PathCompare>(Operations::Eq, arrayConst)), + make<PathCompare>(Operations::Eq, arrayConst))), + make<Variable>("root")), + std::move(filterNode1)); + + ABT rootNode = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"root"}}, + std::move(filterNode2)); + + OptPhaseManager phaseManager( + {OptPhaseManager::OptPhase::MemoSubstitutionPhase, + OptPhaseManager::OptPhase::MemoExplorationPhase, + OptPhaseManager::OptPhase::MemoImplementationPhase}, + prefixId, + {{{"c1", + ScanDefinition{{}, + {{"index1", + makeCompositeIndexDefinition( + {{"b", CollationOp::Ascending, true /*isMultiKey*/}, + {"a", CollationOp::Ascending, true /*isMultiKey*/}})}}}}}}, + {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests}); + + ABT optimized = rootNode; + ASSERT_TRUE(phaseManager.optimize(optimized)); + ASSERT_BETWEEN(8, 12, phaseManager.getMemo().getStats()._physPlanExplorationCount); + + // Demonstrate we get index bounds to handle the array constant, while we also retain the + // original filter. We have index bound with the array itself unioned with bound using the first + // array element. + ASSERT_EXPLAIN_V2( + "Root []\n" + "| | projections: \n" + "| | root\n" + "| RefBlock: \n" + "| Variable [root]\n" + "Filter []\n" + "| EvalFilter []\n" + "| | Variable [root]\n" + "| PathGet [a]\n" + "| PathComposeA []\n" + "| | PathCompare [Eq]\n" + "| | Const [[1, 2, 3]]\n" + "| PathTraverse []\n" + "| PathCompare [Eq]\n" + "| Const [[1, 2, 3]]\n" + "BinaryJoin [joinType: Inner, {rid_0}]\n" + "| | Const [true]\n" + "| LimitSkip []\n" + "| | limitSkip:\n" + "| | limit: 1\n" + "| | skip: 0\n" + "| Seek [ridProjection: rid_0, {'<root>': root}, c1]\n" + "| | BindBlock:\n" + "| | [root]\n" + "| | Source []\n" + "| RefBlock: \n" + "| Variable [rid_0]\n" + "GroupBy []\n" + "| | groupings: \n" + "| | RefBlock: \n" + "| | Variable [rid_0]\n" + "| aggregations: \n" + "Union []\n" + "| | BindBlock:\n" + "| | [rid_0]\n" + "| | Source []\n" + "| IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const " + "[0], Const [0]], [Const [1], Const [1]]}]\n" + "| BindBlock:\n" + "| [rid_0]\n" + "| Source []\n" + "IndexScan [{'<rid>': rid_0}, scanDefName: c1, indexDefName: index1, interval: {[Const " + "[0], Const [0]], [Const [[1, 2, 3]], Const [[1, 2, 3]]]}]\n" + " BindBlock:\n" + " [rid_0]\n" + " Source []\n", + optimized); +} + TEST(PhysRewriter, ParallelScan) { using namespace properties; PrefixId prefixId; @@ -4213,6 +4316,7 @@ TEST(PhysRewriter, PartialIndex1) { make<Variable>("root"))); ASSERT_TRUE(conversionResult._success); ASSERT_FALSE(conversionResult._hasEmptyInterval); + ASSERT_FALSE(conversionResult._retainPredicate); OptPhaseManager phaseManager( {OptPhaseManager::OptPhase::MemoSubstitutionPhase, @@ -4289,6 +4393,7 @@ TEST(PhysRewriter, PartialIndex2) { make<Variable>("root"))); ASSERT_TRUE(conversionResult._success); ASSERT_FALSE(conversionResult._hasEmptyInterval); + ASSERT_FALSE(conversionResult._retainPredicate); OptPhaseManager phaseManager( {OptPhaseManager::OptPhase::MemoSubstitutionPhase, @@ -4363,6 +4468,7 @@ TEST(PhysRewriter, PartialIndexReject) { make<Variable>("root"))); ASSERT_TRUE(conversionResult._success); ASSERT_FALSE(conversionResult._hasEmptyInterval); + ASSERT_FALSE(conversionResult._retainPredicate); OptPhaseManager phaseManager( {OptPhaseManager::OptPhase::MemoSubstitutionPhase, diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp index 94fb5a2647d..322ae174570 100644 --- a/src/mongo/db/query/optimizer/utils/utils.cpp +++ b/src/mongo/db/query/optimizer/utils/utils.cpp @@ -343,7 +343,8 @@ PartialSchemaReqConversion::PartialSchemaReqConversion() _reqMap(), _hasIntersected(false), _hasTraversed(false), - _hasEmptyInterval(false) {} + _hasEmptyInterval(false), + _retainPredicate(false) {} PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements reqMap) : _success(true), @@ -351,7 +352,8 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements _reqMap(std::move(reqMap)), _hasIntersected(false), _hasTraversed(false), - _hasEmptyInterval(false) {} + _hasEmptyInterval(false), + _retainPredicate(false) {} PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound) : _success(true), @@ -359,7 +361,8 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound) _reqMap(), _hasIntersected(false), _hasTraversed(false), - _hasEmptyInterval(false) {} + _hasEmptyInterval(false), + _retainPredicate(false) {} /** * Helper class that builds PartialSchemaRequirements property from an EvalFilter or an EvalPath. @@ -391,6 +394,7 @@ public: PartialSchemaReqConversion result{std::move(newMap)}; result._hasEmptyInterval = pathResult._hasEmptyInterval; + result._retainPredicate = pathResult._retainPredicate; return result; } @@ -421,12 +425,12 @@ public: return {}; } - auto& leftReq = leftResult._reqMap; - auto& rightReq = rightResult._reqMap; + auto& leftReqMap = leftResult._reqMap; + auto& rightReqMap = rightResult._reqMap; if (isMultiplicative) { { ProjectionRenames projectionRenames; - if (!intersectPartialSchemaReq(leftReq, rightReq, projectionRenames)) { + if (!intersectPartialSchemaReq(leftReqMap, rightReqMap, projectionRenames)) { return {}; } if (!projectionRenames.empty()) { @@ -437,7 +441,7 @@ public: if (!leftResult._hasTraversed && !rightResult._hasTraversed) { // Intersect intervals only if we have not traversed. E.g. (-inf, 90) ^ (70, +inf) // becomes (70, 90). - for (auto& [key, req] : leftReq) { + for (auto& [key, req] : leftReqMap) { auto newIntervals = intersectDNFIntervals(req.getIntervals()); if (newIntervals) { req.getIntervals() = std::move(newIntervals.get()); @@ -446,7 +450,7 @@ public: break; } } - } else if (leftReq.size() > 1) { + } else if (leftReqMap.size() > 1) { // Reject if we have traversed, and composed requirements are more than one. return {}; } @@ -457,20 +461,91 @@ public: // We can only perform additive composition (OR) if we have a single matching key on both // sides. - if (leftReq.size() != 1 || rightReq.size() != 1) { + if (leftReqMap.size() != 1 || rightReqMap.size() != 1) { return {}; } - auto leftEntry = leftReq.begin(); - auto rightEntry = rightReq.begin(); - if (!(leftEntry->first == rightEntry->first)) { + auto leftEntry = leftReqMap.begin(); + auto rightEntry = rightReqMap.begin(); + auto& [leftKey, leftReq] = *leftEntry; + auto& [rightKey, rightReq] = *rightEntry; + + if (leftKey == rightKey) { + combineIntervalsDNF(false /*intersect*/, + leftEntry->second.getIntervals(), + rightEntry->second.getIntervals()); + return leftResult; + } + + // Here we can combine if paths differ only by a Traverse element and both intervals + // are the same, with array bounds. For example: + // Left: Id, [[1, 2, 3], [1, 2, 3]] + // Right: Traverse Id [[1, 2, 3], [1, 2, 3]] + // We can then combine into: + // Traverse Id: [[1, 2, 3], [1, 2, 3]] OR [1, 1] + // We also need to retain the original filter. + + if (leftKey._projectionName != rightKey._projectionName) { + return {}; + } + if (leftReq.hasBoundProjectionName() || rightReq.hasBoundProjectionName()) { + return {}; + } + + auto& leftIntervals = leftReq.getIntervals(); + auto& rightIntervals = rightReq.getIntervals(); + const auto& leftInterval = IntervalReqExpr::getSingularDNF(leftIntervals); + const auto& rightInterval = IntervalReqExpr::getSingularDNF(rightIntervals); + if (!leftInterval || !rightInterval || leftInterval != rightInterval) { + return {}; + } + if (!leftInterval->isEquality() || !rightInterval->isEquality()) { + // For now only supporting equalities. return {}; } - combineIntervalsDNF(false /*intersect*/, - leftEntry->second.getIntervals(), - rightEntry->second.getIntervals()); - return leftResult; + const ABT& bound = leftInterval->getLowBound().getBound(); + const auto constBoundPtr = bound.cast<Constant>(); + if (constBoundPtr == nullptr) { + return {}; + } + const auto [tag, val] = constBoundPtr->get(); + if (tag != sbe::value::TypeTags::Array) { + return {}; + } + const sbe::value::Array* arr = sbe::value::getArrayView(val); + if (arr->size() == 0) { + // For now we do not support empty arrays. Need to translate into null bounds. + return {}; + } + + const auto [elTag, elVal] = arr->getAt(0); + const auto [elTagCopy, elValCopy] = sbe::value::copyValue(elTag, elVal); + ABT elementBound = make<Constant>(elTagCopy, elValCopy); + // Create new interval which uses the first element of the array. + const IntervalReqExpr::Node& newInterval = + IntervalReqExpr::makeSingularDNF(IntervalRequirement{ + {true /*inclusive*/, elementBound}, {true /*inclusive*/, elementBound}}); + + const ABT& leftPath = leftKey._path; + const ABT& rightPath = rightKey._path; + if (const auto leftTraversePtr = leftPath.cast<PathTraverse>(); + leftTraversePtr != nullptr && leftTraversePtr->getPath().is<PathIdentity>() && + rightPath.is<PathIdentity>()) { + // leftPath = Id, rightPath = Traverse Id. + combineIntervalsDNF(false /*intersect*/, leftIntervals, newInterval); + leftResult._retainPredicate = true; + return leftResult; + } else if (const auto rightTraversePtr = rightPath.cast<PathTraverse>(); + rightTraversePtr != nullptr && rightTraversePtr->getPath().is<PathIdentity>() && + leftPath.is<PathIdentity>()) { + // leftPath = Traverse Id, rightPath = Id. + combineIntervalsDNF(false /*intersect*/, rightIntervals, newInterval); + rightResult._retainPredicate = true; + return rightResult; + } + + return {}; } PartialSchemaReqConversion transport(const ABT& n, diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h index a42fd47714b..42845f0ce95 100644 --- a/src/mongo/db/query/optimizer/utils/utils.h +++ b/src/mongo/db/query/optimizer/utils/utils.h @@ -176,6 +176,11 @@ struct PartialSchemaReqConversion { // If we have determined that we have a contradiction. bool _hasEmptyInterval; + + // If true, retain original predicate after the conversion. In this case, the requirement map + // might capture only a part of the predicate. + // TODO: consider generalizing to retain only a part of the predicate. + bool _retainPredicate; }; /** |