diff options
author | Svilen Mihaylov <svilen.mihaylov@mongodb.com> | 2023-02-13 19:04:02 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-14 02:38:56 +0000 |
commit | 035ba8def8065ad6e5e27cde7a160431968a7fdc (patch) | |
tree | 3743f0a63580fdb2e60ede336e3a62584d8019f9 | |
parent | 26f1b9d0e4aea57e5c7b47fa56ace80ad629d405 (diff) | |
download | mongo-035ba8def8065ad6e5e27cde7a160431968a7fdc.tar.gz |
SERVER-73817 [CQF] Simplification around projection renames
-rw-r--r-- | src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/defs.h | 2 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/metadata_factory.cpp | 7 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.cpp | 67 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/utils/utils.h | 18 |
5 files changed, 62 insertions, 50 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp index 01f2bc84c77..53d666ca2be 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp @@ -554,14 +554,14 @@ static boost::optional<ABT> mergeSargableNodes( } PartialSchemaRequirements mergedReqs = belowNode.getReqMap(); - ProjectionRenames projectionRenames; - if (!intersectPartialSchemaReq(mergedReqs, aboveNode.getReqMap(), projectionRenames)) { + if (!intersectPartialSchemaReq(mergedReqs, aboveNode.getReqMap())) { return {}; } const ProjectionName& scanProjName = indexingAvailability.getScanProjection(); + ProjectionRenames projectionRenames; bool hasEmptyInterval = simplifyPartialSchemaReqPaths( - scanProjName, multikeynessTrie, mergedReqs, ctx.getConstFold()); + scanProjName, multikeynessTrie, mergedReqs, projectionRenames, ctx.getConstFold()); if (hasEmptyInterval) { return createEmptyValueScanNode(ctx); } @@ -682,8 +682,16 @@ static void convertFilterToSargableNode(ABT::reference_type node, return; } - bool hasEmptyInterval = simplifyPartialSchemaReqPaths( - scanProjName, scanDef.getMultikeynessTrie(), conversion->_reqMap, ctx.getConstFold()); + ProjectionRenames projectionRenames_unused; + bool hasEmptyInterval = simplifyPartialSchemaReqPaths(scanProjName, + scanDef.getMultikeynessTrie(), + conversion->_reqMap, + projectionRenames_unused, + ctx.getConstFold()); + tassert(6624156, + "We should not be seeing renames from a converted Filter", + projectionRenames_unused.empty()); + if (hasEmptyInterval) { addEmptyValueScanNode(ctx); return; diff --git a/src/mongo/db/query/optimizer/defs.h b/src/mongo/db/query/optimizer/defs.h index 0d4079fda2e..f157558d515 100644 --- a/src/mongo/db/query/optimizer/defs.h +++ b/src/mongo/db/query/optimizer/defs.h @@ -68,6 +68,8 @@ using ProjectionNameVector = std::vector<ProjectionName>; template <typename T> using ProjectionNameMap = opt::unordered_map<ProjectionName, T, ProjectionName::Hasher>; + +// Key: new/target projection, value: existing/source projection. using ProjectionRenames = ProjectionNameMap<ProjectionName>; // Map from scanDefName to rid projection name. diff --git a/src/mongo/db/query/optimizer/metadata_factory.cpp b/src/mongo/db/query/optimizer/metadata_factory.cpp index 6eadf67d1cc..1479f056184 100644 --- a/src/mongo/db/query/optimizer/metadata_factory.cpp +++ b/src/mongo/db/query/optimizer/metadata_factory.cpp @@ -69,8 +69,13 @@ ScanDefinition createScanDef(ScanDefOptions options, // Simplify partial filter requirements using the non-multikey paths. for (auto& [indexDefName, indexDef] : indexDefs) { + ProjectionRenames projRenames_unused; [[maybe_unused]] const bool hasEmptyInterval = simplifyPartialSchemaReqPaths( - "<root>", multikeynessTrie, indexDef.getPartialReqMap(), constFold); + "<root>", multikeynessTrie, indexDef.getPartialReqMap(), projRenames_unused, constFold); + tassert(6624157, + "We should not be seeing renames from partial index filters", + projRenames_unused.empty()); + // If "hasEmptyInterval" is set, we have a partial filter index with an unsatisfiable // condition, which is thus guaranteed to never contain any documents. } diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp index 3c12f562616..497129e2ffa 100644 --- a/src/mongo/db/query/optimizer/utils/utils.cpp +++ b/src/mongo/db/query/optimizer/utils/utils.cpp @@ -248,11 +248,7 @@ public: auto& leftReqMap = leftResult->_reqMap; auto& rightReqMap = rightResult->_reqMap; if constexpr (isMultiplicative) { - ProjectionRenames projectionRenames; - if (!intersectPartialSchemaReq(leftReqMap, rightReqMap, projectionRenames)) { - return {}; - } - if (!projectionRenames.empty()) { + if (!intersectPartialSchemaReq(leftReqMap, rightReqMap)) { return {}; } @@ -688,6 +684,7 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, const MultikeynessTrie& multikeynessTrie, PartialSchemaRequirements& reqMap, + ProjectionRenames& projectionRenames, const ConstFoldFn& constFold) { PartialSchemaRequirements result; boost::optional<std::pair<PartialSchemaKey, PartialSchemaRequirement>> prevEntry; @@ -704,26 +701,30 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, // Simplify paths by eliminating unnecessary Traverse elements. for (const auto& [key, req] : reqMap.conjuncts()) { PartialSchemaKey newKey = key; + bool simplified = false; - if (key._projectionName == scanProjName && checkPathContainsTraverse(newKey._path)) { + const bool containedTraverse = checkPathContainsTraverse(newKey._path); + if (key._projectionName == scanProjName && containedTraverse) { simplified |= simplifyTraverseNonArray(newKey._path, multikeynessTrie); } - // Maintain the invariant that Traverse-less keys appear only once: - // we can move the conjunction into the intervals and simplify. + // Ensure that Traverse-less keys appear only once: we can move the conjunction into the + // intervals and simplify. if (prevEntry) { - if (simplified && prevEntry->first == newKey) { + if ((!containedTraverse || simplified) && prevEntry->first == newKey) { auto& prevReq = prevEntry->second; - boost::optional<ProjectionName> resultBoundProjName; + boost::optional<ProjectionName> resultBoundProjName = + prevReq.getBoundProjectionName(); auto resultIntervals = prevReq.getIntervals(); if (const auto& boundProjName = req.getBoundProjectionName()) { - tassert(6624168, - "Should not be seeing more than one bound projection per key", - !prevReq.getBoundProjectionName()); - resultBoundProjName = boundProjName; - } else { - resultBoundProjName = prevReq.getBoundProjectionName(); + if (resultBoundProjName) { + // The existing name wins (stays in 'reqMap'). We tell the caller that the + // name "boundProjName" is available under "resultBoundProjName". + projectionRenames.emplace(*boundProjName, *resultBoundProjName); + } else { + resultBoundProjName = boundProjName; + } } combineIntervalsDNF(true /*intersect*/, resultIntervals, req.getIntervals()); @@ -793,8 +794,7 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, */ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, PartialSchemaKey key, - PartialSchemaRequirement req, - ProjectionRenames& projectionRenames) { + PartialSchemaRequirement req) { for (;;) { bool merged = false; @@ -802,6 +802,8 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, const bool pathHasTraverse = !pathIsId && checkPathContainsTraverse(key._path); { bool success = false; + bool addNewKey = false; + // Look for exact match on the path, and if found combine intervals. for (auto& [existingKey, existingReq] : reqMap.conjuncts()) { if (existingKey != key) { @@ -811,8 +813,9 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, auto resultIntervals = existingReq.getIntervals(); if (pathHasTraverse) { combineIntervalsDNF(true /*intersect*/, resultIntervals, req.getIntervals()); - if (resultIntervals == existingReq.getIntervals()) { - // Existing interval subsumes the new one. + if (resultIntervals == existingReq.getIntervals() || + resultIntervals == req.getIntervals()) { + // Existing interval subsumes the new one or vice versa. success = true; } } else { @@ -826,11 +829,9 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, existingReq.getBoundProjectionName(); if (const auto& boundProjName = req.getBoundProjectionName()) { if (resultBoundProjName) { - // The new and existing projections both bind a name, so: - // - The existing name wins (stays in 'reqMap'). - // - We tell the caller that the name bound is 'req' is now - // available by the name in 'existingReq'. - projectionRenames.emplace(*boundProjName, *resultBoundProjName); + // The new and existing projections both bind a name. Add an entry with + // the new name. We will attempt to simplify later. + addNewKey = true; } else { // Only the new projection binds a name, so we'll update 'reqMap' to // include it. @@ -849,6 +850,9 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, } if (success) { + if (addNewKey) { + reqMap.add(std::move(key), std::move(req)); + } return true; } } @@ -900,9 +904,8 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, !req.getBoundProjectionName()); } - ProjectionRenames projectionRenames_unused; PartialSchemaRequirements intersection = lhs; - if (intersectPartialSchemaReq(intersection, rhs, projectionRenames_unused)) { + if (intersectPartialSchemaReq(intersection, rhs)) { return intersection == lhs; } // Intersection was empty-set, and we assume neither input is empty-set. @@ -911,10 +914,9 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, } bool intersectPartialSchemaReq(PartialSchemaRequirements& target, - const PartialSchemaRequirements& source, - ProjectionRenames& projectionRenames) { + const PartialSchemaRequirements& source) { for (const auto& [key, req] : source.conjuncts()) { - if (!intersectPartialSchemaReq(target, key, req, projectionRenames)) { + if (!intersectPartialSchemaReq(target, key, req)) { return false; } } @@ -1582,13 +1584,10 @@ void sortResidualRequirements(ResidualRequirementsWithCE& residualReq) { } } -void applyProjectionRenames(ProjectionRenames projectionRenames, - ABT& node, - const std::function<void(const ABT& node)>& visitor) { +void applyProjectionRenames(ProjectionRenames projectionRenames, ABT& node) { for (auto&& [targetProjName, sourceProjName] : projectionRenames) { node = make<EvaluationNode>( std::move(targetProjName), make<Variable>(std::move(sourceProjName)), std::move(node)); - visitor(node); } } diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h index 65829118544..54a3519625c 100644 --- a/src/mongo/db/query/optimizer/utils/utils.h +++ b/src/mongo/db/query/optimizer/utils/utils.h @@ -241,11 +241,15 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( /** * Given a set of non-multikey paths, remove redundant Traverse elements from paths in a Partial - * Schema Requirement structure. Returns true if we have an empty result after simplification. + * Schema Requirement structure. Following that the intervals of any remaining non-multikey paths + * (following simplification) on the same key are intersected. Returns true if we have an empty + * result after simplification. Each redundant binding gets an entry in 'projectionRenames', which + * maps redundant name to the de-duplicated name. */ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, const MultikeynessTrie& multikeynessTrie, PartialSchemaRequirements& reqMap, + ProjectionRenames& projectionRenames, const ConstFoldFn& constFold); /** @@ -272,9 +276,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, * The intersection: * - is a predicate that matches iff both original predicates match. * - has all the bindings from 'target' and 'source', but excluding - * bindings that would be redundant (have the same key). Each - * redundant binding gets an entry in 'projectionRenames', which maps - * the redundant name to the de-duplicated name. + * bindings that would be redundant (have the same key). * * "Failure" means we are unable to represent the result as a PartialSchemaRequirements. * This can happen when: @@ -282,8 +284,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, * - 'source' reads from a projection bound by 'target'. */ bool intersectPartialSchemaReq(PartialSchemaRequirements& target, - const PartialSchemaRequirements& source, - ProjectionRenames& projectionRenames); + const PartialSchemaRequirements& source); /** @@ -343,10 +344,7 @@ void lowerPartialSchemaRequirements(CEType scanGroupCE, void sortResidualRequirements(ResidualRequirementsWithCE& residualReq); -void applyProjectionRenames( - ProjectionRenames projectionRenames, - ABT& node, - const std::function<void(const ABT& node)>& visitor = [](const ABT&) {}); +void applyProjectionRenames(ProjectionRenames projectionRenames, ABT& node); void removeRedundantResidualPredicates(const ProjectionNameOrderPreservingSet& requiredProjections, ResidualRequirements& residualReqs, |