diff options
author | David Percy <david.percy@mongodb.com> | 2023-01-13 03:26:24 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-01-20 19:42:15 +0000 |
commit | 52832ac0d4440785350bfaab4ff0cbe8311875b7 (patch) | |
tree | 2730f51e32ce40b3894c808fdd04445f0eb8a66d | |
parent | 8142491b15a87a6e81198cc249a36ed4e70d74ec (diff) | |
download | mongo-52832ac0d4440785350bfaab4ff0cbe8311875b7.tar.gz |
SERVER-71550 Change PartialSchemaRequirements map to vector
17 files changed, 353 insertions, 126 deletions
diff --git a/src/mongo/db/query/ce/heuristic_estimator.cpp b/src/mongo/db/query/ce/heuristic_estimator.cpp index 9134d6e102f..4d472d086c4 100644 --- a/src/mongo/db/query/ce/heuristic_estimator.cpp +++ b/src/mongo/db/query/ce/heuristic_estimator.cpp @@ -223,7 +223,7 @@ public: SelectivityType topLevelSel{1.0}; std::vector<SelectivityType> topLevelSelectivities; - for (const auto& [key, req] : node.getReqMap()) { + for (const auto& [key, req] : node.getReqMap().conjuncts()) { if (req.getIsPerfOnly()) { // Ignore perf-only requirements. continue; diff --git a/src/mongo/db/query/ce/hinted_estimator.cpp b/src/mongo/db/query/ce/hinted_estimator.cpp index 41b63a8f914..a29e1bf8037 100644 --- a/src/mongo/db/query/ce/hinted_estimator.cpp +++ b/src/mongo/db/query/ce/hinted_estimator.cpp @@ -40,7 +40,7 @@ public: CEType /*bindsResult*/, CEType /*refsResult*/) { CEType result = childResult; - for (const auto& [key, req] : node.getReqMap()) { + for (const auto& [key, req] : node.getReqMap().conjuncts()) { if (!isIntervalReqFullyOpenDNF(req.getIntervals())) { auto it = _hints.find(key); if (it != _hints.cend()) { diff --git a/src/mongo/db/query/ce/histogram_estimator.cpp b/src/mongo/db/query/ce/histogram_estimator.cpp index 05bfb1cd633..f8e271f92b5 100644 --- a/src/mongo/db/query/ce/histogram_estimator.cpp +++ b/src/mongo/db/query/ce/histogram_estimator.cpp @@ -250,7 +250,7 @@ public: // Initial first pass through the requirements map to extract information about each path. std::map<std::string, SargableConjunct> conjunctRequirements; - for (const auto& [key, req] : node.getReqMap()) { + for (const auto& [key, req] : node.getReqMap().conjuncts()) { if (req.getIsPerfOnly()) { // Ignore perf-only requirements. continue; diff --git a/src/mongo/db/query/ce/sampling_estimator.cpp b/src/mongo/db/query/ce/sampling_estimator.cpp index 779f839cf95..ee9e3d3c43b 100644 --- a/src/mongo/db/query/ce/sampling_estimator.cpp +++ b/src/mongo/db/query/ce/sampling_estimator.cpp @@ -75,7 +75,7 @@ public: void transport(ABT& n, const SargableNode& node, ABT& childResult, ABT& refs, ABT& binds) { ABT result = childResult; // Retain only output bindings without applying filters. - for (const auto& [key, req] : node.getReqMap()) { + for (const auto& [key, req] : node.getReqMap().conjuncts()) { if (const auto& boundProjName = req.getBoundProjectionName()) { lowerPartialSchemaRequirement( key, @@ -161,7 +161,7 @@ public: // Here we assume that each requirement is independent. // TODO: consider estimating together the entire set of requirements (but caching!) CEType result = childResult; - for (const auto& [key, req] : node.getReqMap()) { + for (const auto& [key, req] : node.getReqMap().conjuncts()) { if (req.getIsPerfOnly()) { // Ignore perf-only requirements. continue; diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h index 34e4ec729fb..4083c9aefdb 100644 --- a/src/mongo/db/query/ce/test_utils.h +++ b/src/mongo/db/query/ce/test_utils.h @@ -116,7 +116,7 @@ bool isSargableNode(const ABT& n) { // for a SargableNode with a specific number of predicates. For tests, we only care about // verifying the cardinality of that one. if (auto* sargable = n.cast<optimizer::SargableNode>()) { - return sargable->getReqMap().size() == NumReq; + return sargable->getReqMap().numLeaves() == NumReq; } return false; } diff --git a/src/mongo/db/query/optimizer/cascades/implementers.cpp b/src/mongo/db/query/optimizer/cascades/implementers.cpp index 12da8837f86..a1dfe1e8576 100644 --- a/src/mongo/db/query/optimizer/cascades/implementers.cpp +++ b/src/mongo/db/query/optimizer/cascades/implementers.cpp @@ -423,7 +423,7 @@ public: const ProjectionName& scanProjectionName = indexingAvailability.getScanProjection(); const PartialSchemaRequirements& reqMap = node.getReqMap(); - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (key._projectionName != scanProjectionName) { // We can only satisfy partial schema requirements using our root projection. return; @@ -444,7 +444,7 @@ public: requiresRootProjection = projectionsLeftToSatisfy.erase(scanProjectionName); } - for (const auto& entry : reqMap) { + for (const auto& entry : reqMap.conjuncts()) { if (const auto& boundProjName = entry.second.getBoundProjectionName()) { // Project field only if it required. projectionsLeftToSatisfy.erase(*boundProjName); @@ -526,8 +526,10 @@ public: { PartialSchemaKeySet residualQueryKeySet; for (const auto& [residualKey, residualReq, entryIndex] : residualReqs) { - auto entryIt = reqMap.cbegin(); + // Find the indexed requirement this residual requirement refers to. + auto entryIt = reqMap.conjuncts().cbegin(); std::advance(entryIt, entryIndex); + residualQueryKeySet.emplace(entryIt->first); residualReqsWithCE.emplace_back( residualKey, residualReq, partialSchemaKeyCE.at(entryIndex).second); @@ -535,7 +537,7 @@ public: if (scanGroupCE > 0.0) { size_t entryIndex = 0; - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (residualQueryKeySet.count(key) == 0) { const SelectivityType sel = partialSchemaKeyCE.at(entryIndex).second / scanGroupCE; @@ -1476,10 +1478,9 @@ private: distribAndProjections._projectionNames; for (const ABT& partitioningPath : distributionAndPaths._paths) { - if (auto it = reqMap.find(PartialSchemaKey{scanProjection, partitioningPath}); - it != reqMap.cend() && - it->second.getBoundProjectionName() == - requiredProjections.at(distributionPartitionIndex)) { + if (auto proj = reqMap.findProjection( + PartialSchemaKey{scanProjection, partitioningPath}); + proj && *proj == requiredProjections.at(distributionPartitionIndex)) { distributionPartitionIndex++; } else { return false; diff --git a/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp b/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp index ce3da42a69d..8f83ad3682a 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp @@ -62,6 +62,13 @@ static void populateInitialDistributions(const DistributionAndPaths& distributio } } +/** + * If the projections in 'req' cover every path of 'distributionAndPaths', + * then add a new 'DistributionAndProjections' entry to 'distributions'. + * + * This is used to populate metadata in a SargableNode, so we know what distributions + * it can provide, and what projection names matter for each one. + */ static void populateDistributionPaths(const PartialSchemaRequirements& req, const ProjectionName& scanProjectionName, const DistributionAndPaths& distributionAndPaths, @@ -72,13 +79,11 @@ static void populateDistributionPaths(const PartialSchemaRequirements& req, ProjectionNameVector distributionProjections; for (const ABT& path : distributionAndPaths._paths) { - auto it = req.find(PartialSchemaKey{scanProjectionName, path}); - if (it == req.cend()) { + if (auto binding = req.findProjection({scanProjectionName, path})) { + distributionProjections.push_back(*binding); + } else { break; } - if (const auto& boundProjName = it->second.getBoundProjectionName()) { - distributionProjections.push_back(*boundProjName); - } } if (distributionProjections.size() == distributionAndPaths._paths.size()) { @@ -100,7 +105,7 @@ static bool computeEqPredsOnly(const PartialSchemaRequirements& reqMap) { PartialSchemaKeySet equalityKeys; PartialSchemaKeySet fullyOpenKeys; - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { const auto& intervals = req.getIntervals(); if (auto singularInterval = IntervalReqExpr::getSingularDNF(intervals)) { if (singularInterval->isFullyOpen()) { diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp index 310abb117f4..a13ae81cdc5 100644 --- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp +++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp @@ -565,7 +565,7 @@ static boost::optional<ABT> mergeSargableNodes( return createEmptyValueScanNode(ctx); } - if (mergedReqs.size() > SargableNode::kMaxPartialSchemaReqs) { + if (mergedReqs.numLeaves() > SargableNode::kMaxPartialSchemaReqs) { return {}; } @@ -660,19 +660,16 @@ static void convertFilterToSargableNode(ABT::reference_type node, } // Remove any partial schema requirements which do not constrain their input. - for (auto it = conversion->_reqMap.cbegin(); it != conversion->_reqMap.cend();) { + + conversion->_reqMap.simplify([](const PartialSchemaKey& key, PartialSchemaRequirement& req) { uassert(6624111, "Filter partial schema requirement must contain a variable name.", - it->first._projectionName); + key._projectionName); uassert(6624112, "Filter partial schema requirement cannot bind.", - !it->second.getBoundProjectionName()); - if (isIntervalReqFullyOpenDNF(it->second.getIntervals())) { - it = conversion->_reqMap.erase(it); - } else { - ++it; - } - } + !req.getBoundProjectionName()); + return true; + }); if (conversion->_reqMap.empty()) { // If the filter has no constraints after removing no-ops, then replace with its child. We @@ -690,7 +687,7 @@ static void convertFilterToSargableNode(ABT::reference_type node, addEmptyValueScanNode(ctx); return; } - if (conversion->_reqMap.size() > SargableNode::kMaxPartialSchemaReqs) { + if (conversion->_reqMap.numLeaves() > SargableNode::kMaxPartialSchemaReqs) { // Too many requirements. return; } @@ -1128,12 +1125,13 @@ struct SubstituteConvert<EvaluationNode> { uassert(6624165, "Should not be getting retainPredicate set for EvalNodes", !conversion->_retainPredicate); - if (conversion->_reqMap.size() != 1) { + if (conversion->_reqMap.numLeaves() != 1) { // For evaluation nodes we expect to create a single entry. return; } - for (auto& [key, req] : conversion->_reqMap) { + conversion->_reqMap.transform([&](const PartialSchemaKey& key, + PartialSchemaRequirement& req) { req = { evalNode.getProjectionName(), std::move(req.getIntervals()), req.getIsPerfOnly()}; @@ -1143,7 +1141,7 @@ struct SubstituteConvert<EvaluationNode> { uassert(6624115, "Eval partial schema requirement cannot have a range", isIntervalReqFullyOpenDNF(req.getIntervals())); - } + }); bool hasEmptyInterval = false; auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(), @@ -1172,7 +1170,7 @@ struct SubstituteConvert<EvaluationNode> { static void lowerSargableNode(const SargableNode& node, RewriteContext& ctx) { ABT n = node.getChild(); const auto reqMap = node.getReqMap(); - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { lowerPartialSchemaRequirement(key, req, n, ctx.getPathToInterval()); } ctx.addNode(n, true /*clear*/); @@ -1217,14 +1215,13 @@ static SplitRequirementsResult splitRequirements( boost::optional<ProjectionName> boundProjectionName, IntervalReqExpr::Node intervals) { // We always strip out the perf-only flag. - reqMap.emplace(key, - PartialSchemaRequirement{std::move(boundProjectionName), - std::move(intervals), - false /*isPerfOnly*/}); + reqMap.add(key, + PartialSchemaRequirement{ + std::move(boundProjectionName), std::move(intervals), false /*isPerfOnly*/}); }; size_t index = 0; - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (((1ull << index) & mask) != 0) { bool addedToLeft = false; @@ -1339,16 +1336,16 @@ struct ExploreConvert<SargableNode> { std::vector<bool> mayReturnNull; { // Pre-compute if a requirement's interval is fully open. - isFullyOpen.reserve(reqMap.size()); - for (const auto& [key, req] : reqMap) { + isFullyOpen.reserve(reqMap.numLeaves()); + for (const auto& [key, req] : reqMap.conjuncts()) { isFullyOpen.push_back(isIntervalReqFullyOpenDNF(req.getIntervals())); } if (!hints._fastIndexNullHandling && !isIndex) { // Pre-compute if a requirement's interval may contain nulls, and also has an output // binding. - mayReturnNull.reserve(reqMap.size()); - for (const auto& [key, req] : reqMap) { + mayReturnNull.reserve(reqMap.numLeaves()); + for (const auto& [key, req] : reqMap.conjuncts()) { mayReturnNull.push_back(req.mayReturnNull(ctx.getConstFold())); } } @@ -1359,7 +1356,7 @@ struct ExploreConvert<SargableNode> { // try having at least one predicate on the left (mask = 1), and we try all possible // subsets. For index intersection however (isIndex = true), we try symmetric partitioning // (thus the high bound is 2^(N-1)). - const size_t reqSize = reqMap.size(); + const size_t reqSize = reqMap.numConjuncts(); const size_t highMask = isIndex ? (1ull << (reqSize - 1)) : (1ull << reqSize); for (size_t mask = 1; mask < highMask; mask++) { SplitRequirementsResult splitResult = splitRequirements(mask, diff --git a/src/mongo/db/query/optimizer/cascades/memo.cpp b/src/mongo/db/query/optimizer/cascades/memo.cpp index 68851f79cac..5353d66a566 100644 --- a/src/mongo/db/query/optimizer/cascades/memo.cpp +++ b/src/mongo/db/query/optimizer/cascades/memo.cpp @@ -623,7 +623,7 @@ void Memo::estimateCE(const Context& ctx, const GroupIdType groupId) { auto& partialSchemaKeyCE = ceProp.getPartialSchemaKeyCE(); invariant(partialSchemaKeyCE.empty()); - for (const auto& [key, req] : sargablePtr->getReqMap()) { + for (const auto& [key, req] : sargablePtr->getReqMap().conjuncts()) { ABT singularReq = make<SargableNode>(PartialSchemaRequirements{{key, req}}, CandidateIndexes{}, ScanParams{}, diff --git a/src/mongo/db/query/optimizer/explain.cpp b/src/mongo/db/query/optimizer/explain.cpp index d5bbd9b78ea..b3a6358bfec 100644 --- a/src/mongo/db/query/optimizer/explain.cpp +++ b/src/mongo/db/query/optimizer/explain.cpp @@ -1190,7 +1190,7 @@ public: void printPartialSchemaReqMap(ExplainPrinter& parent, const PartialSchemaRequirements& reqMap) { std::vector<ExplainPrinter> printers; - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { ExplainPrinter local; if (const auto& projName = key._projectionName) { diff --git a/src/mongo/db/query/optimizer/index_bounds.cpp b/src/mongo/db/query/optimizer/index_bounds.cpp index a1a6d10171b..3135126a57b 100644 --- a/src/mongo/db/query/optimizer/index_bounds.cpp +++ b/src/mongo/db/query/optimizer/index_bounds.cpp @@ -189,6 +189,111 @@ bool PartialSchemaKeyLessComparator::operator()(const PartialSchemaKey& k1, return compareExprAndPaths(k1._path, k2._path) < 0; } +void PartialSchemaRequirements::normalize() { + std::stable_sort( + _repr.begin(), + _repr.end(), + [lt = PartialSchemaKeyLessComparator{}](const auto& entry1, const auto& entry2) -> bool { + return lt(entry1.first, entry2.first); + }); +} + +PartialSchemaRequirements::PartialSchemaRequirements( + std::vector<PartialSchemaRequirements::Entry> entries) { + for (Entry& entry : entries) { + _repr.push_back(std::move(entry)); + } + + normalize(); +} + +std::set<ProjectionName> PartialSchemaRequirements::getBoundNames() const { + std::set<ProjectionName> names; + for (auto&& [key, b] : iterateBindings()) { + names.insert(b); + } + return names; +} + +bool PartialSchemaRequirements::operator==(const PartialSchemaRequirements& other) const { + return _repr == other._repr; +} + +bool PartialSchemaRequirements::empty() const { + return _repr.empty(); +} + +size_t PartialSchemaRequirements::numLeaves() const { + return _repr.size(); +} + +size_t PartialSchemaRequirements::numConjuncts() const { + return _repr.size(); +} + +boost::optional<ProjectionName> PartialSchemaRequirements::findProjection( + const PartialSchemaKey& key) const { + for (auto [k, req] : _repr) { + if (k == key) { + return req.getBoundProjectionName(); + } + } + return {}; +} + +boost::optional<std::pair<size_t, PartialSchemaRequirement>> +PartialSchemaRequirements::findFirstConjunct(const PartialSchemaKey& key) const { + size_t i = 0; + for (auto [k, req] : _repr) { + if (k == key) { + return {{i, req}}; + } + ++i; + } + return {}; +} + +PartialSchemaRequirements::Bindings PartialSchemaRequirements::iterateBindings() const { + Bindings result; + for (auto&& [key, req] : _repr) { + if (auto binding = req.getBoundProjectionName()) { + result.emplace_back(key, *binding); + } + } + return result; +} + +void PartialSchemaRequirements::add(PartialSchemaKey key, PartialSchemaRequirement req) { + _repr.emplace_back(std::move(key), std::move(req)); + + normalize(); +} + +void PartialSchemaRequirements::transform( + std::function<void(const PartialSchemaKey&, PartialSchemaRequirement&)> func) { + for (auto& [key, req] : _repr) { + func(key, req); + } +} + +bool PartialSchemaRequirements::simplify( + std::function<bool(const PartialSchemaKey&, PartialSchemaRequirement&)> func) { + for (auto it = _repr.begin(); it != _repr.end();) { + auto& [key, req] = *it; + + if (!func(key, req)) { + return false; + } + if (isIntervalReqFullyOpenDNF(it->second.getIntervals()) && !req.getBoundProjectionName()) { + it = _repr.erase(it); + } else { + ++it; + } + } + return true; +} + + ResidualRequirement::ResidualRequirement(PartialSchemaKey key, PartialSchemaRequirement req, size_t entryIndex) diff --git a/src/mongo/db/query/optimizer/index_bounds.h b/src/mongo/db/query/optimizer/index_bounds.h index 303920a56ba..f26f88b0aff 100644 --- a/src/mongo/db/query/optimizer/index_bounds.h +++ b/src/mongo/db/query/optimizer/index_bounds.h @@ -142,13 +142,122 @@ struct PartialSchemaKeyLessComparator { }; /** - * Map from referred (or input) projection name to list of requirements for that projection. + * Represents a set of predicates and projections. Cannot represent all predicates/projections: + * only those that can typically be answered efficiently with an index. + * * Only one instance of a path without Traverse elements (non-multikey) is allowed. By contrast * several instances of paths with Traverse elements (multikey) are allowed. For example: Get "a" * Get "b" Id is allowed just once while Get "a" Traverse Get "b" Id is allowed multiple times. + * + * The default / empty state represents a conjunction of zero predicates, which means always true. */ -using PartialSchemaRequirements = - std::multimap<PartialSchemaKey, PartialSchemaRequirement, PartialSchemaKeyLessComparator>; +class PartialSchemaRequirements { +public: + using Entry = std::pair<PartialSchemaKey, PartialSchemaRequirement>; + struct ConstIter { + auto begin() const { + return _begin; + } + auto end() const { + return _end; + } + auto cbegin() const { + return _begin; + } + auto cend() const { + return _end; + } + + std::vector<Entry>::const_iterator _begin; + std::vector<Entry>::const_iterator _end; + }; + + struct Iter { + auto begin() const { + return _begin; + } + auto end() const { + return _end; + } + auto cbegin() const { + return _begin; + } + auto cend() const { + return _end; + } + + std::vector<Entry>::iterator _begin; + std::vector<Entry>::iterator _end; + }; + + PartialSchemaRequirements() = default; + PartialSchemaRequirements(std::vector<Entry>); + PartialSchemaRequirements(std::initializer_list<Entry> entries) + : PartialSchemaRequirements(std::vector<Entry>(entries)) {} + + bool operator==(const PartialSchemaRequirements& other) const; + + /** + * Return true if there are zero predicates and zero projections. + */ + bool empty() const; + + size_t numLeaves() const; + size_t numConjuncts() const; + + std::set<ProjectionName> getBoundNames() const; + + boost::optional<ProjectionName> findProjection(const PartialSchemaKey&) const; + + /** + * Picks the first top-level conjunct matching the given key. + * + * Result includes the index of the top-level conjunct. + */ + boost::optional<std::pair<size_t, PartialSchemaRequirement>> findFirstConjunct( + const PartialSchemaKey&) const; + + ConstIter conjuncts() const { + return {_repr.begin(), _repr.end()}; + } + + Iter conjuncts() { + return {_repr.begin(), _repr.end()}; + } + + using Bindings = std::vector<std::pair<PartialSchemaKey, ProjectionName>>; + Bindings iterateBindings() const; + + void add(PartialSchemaKey, PartialSchemaRequirement); + + /** + * Apply an in-place transformation to each PartialSchemaRequirement. + * + * Since the key is only exposed read-only to the callback, we don't need to + * worry about restoring the no-Traverseless-duplicates invariant. + */ + void transform(std::function<void(const PartialSchemaKey&, PartialSchemaRequirement&)>); + + /** + * Apply a simplification to each PartialSchemaRequirement. + * + * The callback can return false if an individual PartialSchemaRequirement + * simplifies to an always-false predicate. + * + * This method returns false if the overall result is an always-false predicate. + * + * This method will also remove any predicates that are trivially true (those will + * a fully open DNF interval). + */ + bool simplify(std::function<bool(const PartialSchemaKey&, PartialSchemaRequirement&)>); + +private: + // Restore the invariant that the entries are sorted by key. + void normalize(); + + using Repr = std::vector<Entry>; + Repr _repr; +}; /** * Used to track cardinality estimates per predicate inside a PartialSchemaRequirement. The order of diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp index 716adaeaa32..c1694f9c898 100644 --- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp @@ -2438,14 +2438,7 @@ TEST(LogicalRewriter, EmptyArrayIndexBounds) { auto phaseManager = makePhaseManager( {OptPhase::MemoSubstitutionPhase}, prefixId, - {{{"c1", - createScanDef( - {}, - {{"index1", - IndexDefinition{{{makeNonMultikeyIndexPath("a"), CollationOp::Ascending}}, - false /*isMultiKey*/, - {DistributionType::Centralized}, - {}}}})}}}, + {{{"c1", createScanDef({}, {})}}}, boost::none /*costModel*/, {true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests}); @@ -2469,11 +2462,9 @@ TEST(LogicalRewriter, EmptyArrayIndexBounds) { "| Const [[]]\n" "Sargable [Complete]\n" "| | | | | requirementsMap: \n" - "| | | | | refProjection: root, path: 'PathGet [a] PathIdentity []', interv" - "als: {{{=Const [undefined]}} U {{=Const [[]]}}}, perfOnly\n" + "| | | | | refProjection: root, path: 'PathGet [a] PathTraverse [1] PathIde" + "ntity []', intervals: {{{=Const [undefined]}} U {{=Const [[]]}}}, perfOnly\n" "| | | | candidateIndexes: \n" - "| | | | candidateId: 1, index1, {}, {Compound}, {{{=Const [undefined]}} U " - "{{=Const [[]]}}}\n" "| | | scanParams: \n" "| | | {}\n" "| | BindBlock:\n" diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp index 1d0c1cb9260..133af61cd2d 100644 --- a/src/mongo/db/query/optimizer/node.cpp +++ b/src/mongo/db/query/optimizer/node.cpp @@ -372,17 +372,15 @@ const ProjectionName& RIDUnionNode::getScanProjectionName() const { static ProjectionNameVector createSargableBindings(const PartialSchemaRequirements& reqMap) { ProjectionNameVector result; - for (const auto& entry : reqMap) { - if (const auto& boundProjName = entry.second.getBoundProjectionName()) { - result.push_back(*boundProjName); - } + for (const auto& [key, binding] : reqMap.iterateBindings()) { + result.push_back(binding); } return result; } static ProjectionNameVector createSargableReferences(const PartialSchemaRequirements& reqMap) { ProjectionNameOrderPreservingSet result; - for (const auto& entry : reqMap) { + for (const auto& entry : reqMap.conjuncts()) { result.emplace_back(*entry.first._projectionName); } return result.getVector(); @@ -403,12 +401,12 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap, assertNodeSort(getChild()); tassert(6624085, "SargableNode requires at least one predicate", !_reqMap.empty()); tassert(6624086, - str::stream() << "SargableNode has too many predicates: " << _reqMap.size(), - _reqMap.size() <= kMaxPartialSchemaReqs); + str::stream() << "SargableNode has too many predicates: " << _reqMap.numLeaves(), + _reqMap.numLeaves() <= kMaxPartialSchemaReqs); // Assert merged map does not contain duplicate bound projections. ProjectionNameSet boundsProjectionNameSet; - for (const auto& [key, req] : _reqMap) { + for (const auto& [key, req] : _reqMap.conjuncts()) { if (const auto& boundProjName = req.getBoundProjectionName()) { tassert(6624094, "SargableNode has a multikey requirement with a non-trivial interval which " @@ -427,7 +425,7 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap, } // Assert there are no references to internally bound projections. - for (const auto& [key, req] : _reqMap) { + for (const auto& [key, req] : _reqMap.conjuncts()) { tassert(6624088, "SargableNode cannot reference an internally bound projection", boundsProjectionNameSet.count(*key._projectionName) == 0); @@ -436,7 +434,7 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap, // Assert that non-multikey paths have at most one requirement. { PartialSchemaKeySet seen; - for (auto&& [key, req] : reqMap) { + for (auto&& [key, req] : reqMap.conjuncts()) { if (!checkPathContainsTraverse(key._path)) { auto inserted = seen.insert(key).second; tassert(7155020, diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp index 47179205e5c..b7a62a4682f 100644 --- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp @@ -4830,7 +4830,7 @@ TEST(PhysRewriter, HashPartitioning) { optimized); } -TEST(PhysRewriter, IndexPartitioning) { +TEST(PhysRewriter, IndexPartitioning0) { using namespace properties; auto prefixId = PrefixId::createForTests(); diff --git a/src/mongo/db/query/optimizer/utils/abt_hash.cpp b/src/mongo/db/query/optimizer/utils/abt_hash.cpp index 9ac38721dae..2b9454cfc84 100644 --- a/src/mongo/db/query/optimizer/utils/abt_hash.cpp +++ b/src/mongo/db/query/optimizer/utils/abt_hash.cpp @@ -122,7 +122,7 @@ static size_t computePartialSchemaReqHash(const PartialSchemaRequirements& reqMa size_t result = 17; IntervalHasher<IntervalReqExpr> intervalHasher; - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (const auto& projName = key._projectionName) { updateHash(result, ProjectionName::Hasher()(*projName)); } diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp index c188bc1ff59..d67b05c9cdc 100644 --- a/src/mongo/db/query/optimizer/utils/utils.cpp +++ b/src/mongo/db/query/optimizer/utils/utils.cpp @@ -221,11 +221,11 @@ public: const ProjectionName& boundVarName = boundPtr->name(); PartialSchemaRequirements newMap; - for (auto& [key, req] : pathResult->_reqMap) { + for (auto& [key, req] : pathResult->_reqMap.conjuncts()) { if (key._projectionName) { return {}; } - newMap.emplace(PartialSchemaKey{boundVarName, key._path}, std::move(req)); + newMap.add(PartialSchemaKey{boundVarName, key._path}, std::move(req)); } PartialSchemaReqConversion result{std::move(newMap)}; @@ -291,51 +291,63 @@ public: return leftResult; } // Additive composition: we never have projections in this case; only predicates. - for (const auto& [key, req] : leftReqMap) { + for (const auto& [key, req] : leftReqMap.conjuncts()) { tassert(7155021, "Unexpected binding in ComposeA in PartialSchemaReqConverter", !req.getBoundProjectionName()); } - for (const auto& [key, req] : rightReqMap) { + for (const auto& [key, req] : rightReqMap.conjuncts()) { tassert(7155022, "Unexpected binding in ComposeA in PartialSchemaReqConverter", !req.getBoundProjectionName()); } - auto leftEntry = leftReqMap.begin(); - auto rightEntry = rightReqMap.begin(); - auto& [leftKey, leftReq] = *leftEntry; - auto& [rightKey, rightReq] = *rightEntry; - { // Check if the left and right requirements are all or none perf-only. size_t perfOnlyCount = 0; - for (const auto& [key, req] : leftReqMap) { + for (const auto& [key, req] : leftReqMap.conjuncts()) { if (req.getIsPerfOnly()) { perfOnlyCount++; } } - for (const auto& [key, req] : rightReqMap) { + for (const auto& [key, req] : rightReqMap.conjuncts()) { if (req.getIsPerfOnly()) { perfOnlyCount++; } } - if (perfOnlyCount != 0 && perfOnlyCount != leftReqMap.size() + rightReqMap.size()) { + if (perfOnlyCount != 0 && + perfOnlyCount != leftReqMap.numLeaves() + rightReqMap.numLeaves()) { // For now allow only predicates with the same perf-only flag. return {}; } } - if (leftReqMap.count(leftKey) == leftReqMap.size() && - rightReqMap.count(leftKey) == rightReqMap.size()) { + auto leftEntries = leftReqMap.conjuncts(); + auto rightEntries = rightReqMap.conjuncts(); + auto leftEntry = leftEntries.begin(); + auto rightEntry = rightEntries.begin(); + auto& [leftKey, leftReq] = *leftEntry; + auto& [rightKey, rightReq] = *rightEntry; + + // Do all reqs from both sides use the same key? + bool allSameKey = true; + for (const auto* reqs : {&leftReqMap, &rightReqMap}) { + for (auto&& [k, req] : reqs->conjuncts()) { + if (k != leftKey) { + allSameKey = false; + break; + } + } + } + if (allSameKey) { // All reqs from both sides use the same key (input binding + path). // Each side is a conjunction, and we're taking a disjunction. // Use the fact that OR distributes over AND to build a new conjunction: // (a & b) | (x & y) == (a | x) & (a | y) & (b | x) & (b | y) PartialSchemaRequirements resultMap; - for (const auto& [rightKey1, rightReq1] : rightReqMap) { - for (const auto& [leftKey1, leftReq1] : leftReqMap) { + for (const auto& [rightKey1, rightReq1] : rightReqMap.conjuncts()) { + for (const auto& [leftKey1, leftReq1] : leftReqMap.conjuncts()) { auto combinedIntervals = leftReq1.getIntervals(); combineIntervalsDNF( false /*intersect*/, combinedIntervals, rightReq1.getIntervals()); @@ -346,17 +358,20 @@ public: std::move(combinedIntervals), leftReq1.getIsPerfOnly(), }; - resultMap.emplace(leftKey1, combinedReq); + resultMap.add(leftKey1, combinedReq); } } leftReqMap = std::move(resultMap); return leftResult; } + // Left and right don't all use the same key. - if (leftReqMap.size() != 1 || rightReqMap.size() != 1) { + if (leftReqMap.numLeaves() != 1 || rightReqMap.numLeaves() != 1) { return {}; } + // Left and right don't all use the same key, but they both have exactly 1 entry. + // In other words, leftKey != rightKey. // Here we can combine if paths differ only by a Traverse element and both intervals // are the same, with array bounds. For example: @@ -490,7 +505,7 @@ public: // New map has keys with appended paths. PartialSchemaRequirements newMap; - for (auto& entry : inputResult->_reqMap) { + for (const auto& entry : inputResult->_reqMap.conjuncts()) { if (entry.first._projectionName) { return {}; } @@ -502,7 +517,7 @@ public: std::swap(appendedPath.cast<T>()->getPath(), path); std::swap(path, appendedPath); - newMap.emplace(PartialSchemaKey{std::move(path)}, std::move(entry.second)); + newMap.add(PartialSchemaKey{std::move(path)}, std::move(entry.second)); } inputResult->_reqMap = std::move(newMap); @@ -517,7 +532,7 @@ public: if (!inputResult) { return {}; } - if (inputResult->_reqMap.size() > 1) { + if (inputResult->_reqMap.numConjuncts() > 1) { // More than one requirement means we are handling a conjunction inside a traverse. // We can change it to a traverse inside a conjunction, but that's an // over-approximation, so we have to keep the original predicate. @@ -683,7 +698,7 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( return {}; } - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (key._path.is<PathIdentity>() && isIntervalReqFullyOpenDNF(req.getIntervals())) { // We need to determine either path or interval (or both). return {}; @@ -692,11 +707,11 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq( // If we over-approximate, we need to switch all requirements to perf-only. if (result->_retainPredicate) { - for (auto& [key, req] : reqMap) { + reqMap.transform([&](const PartialSchemaKey& key, PartialSchemaRequirement& req) { if (!req.getIsPerfOnly()) { req = {req.getBoundProjectionName(), req.getIntervals(), true /*isPerfOnly*/}; } - } + }); } return result; } @@ -859,13 +874,15 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, }; // Simplify paths by eliminating unnecessary Traverse elements. - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { PartialSchemaKey newKey = key; bool simplified = false; if (key._projectionName == scanProjName && checkPathContainsTraverse(newKey._path)) { simplified |= simplifyTraverseNonArray(newKey._path, multikeynessTrie); } + // Maintain the invariant that Traverse-less keys appear only once: + // we can move the conjunction into the intervals and simplify. if (prevEntry) { if (simplified && prevEntry->first == newKey) { auto& prevReq = prevEntry->second; @@ -890,7 +907,7 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, std::move(resultIntervals), req.getIsPerfOnly() && prevReq.getIsPerfOnly()}; } else { - result.insert(std::move(*prevEntry)); + result.add(std::move(prevEntry->first), std::move(prevEntry->second)); prevEntry.reset({std::move(newKey), req}); } } else { @@ -898,19 +915,26 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName, } } if (prevEntry) { - result.insert(std::move(*prevEntry)); + result.add(std::move(prevEntry->first), std::move(prevEntry->second)); } // Intersect and normalize intervals. - for (auto& [key, req] : result) { - auto resultIntervals = req.getIntervals(); - if (!simplifyFn(resultIntervals)) { - return true; - } + const bool representable = + result.simplify([&](const PartialSchemaKey&, PartialSchemaRequirement& req) -> bool { + auto resultIntervals = req.getIntervals(); + if (!simplifyFn(resultIntervals)) { + return false; + } - normalizeIntervals(resultIntervals); + normalizeIntervals(resultIntervals); - req = {req.getBoundProjectionName(), std::move(resultIntervals), req.getIsPerfOnly()}; + req = {req.getBoundProjectionName(), std::move(resultIntervals), req.getIsPerfOnly()}; + return true; + }); + if (!representable) { + // It simplifies to an always-false predicate, which we do not represent + // as PartialSchemaRequirements. + return true; } reqMap = std::move(result); @@ -951,7 +975,7 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, { bool success = false; // Look for exact match on the path, and if found combine intervals. - for (auto& [existingKey, existingReq] : reqMap) { + for (auto& [existingKey, existingReq] : reqMap.conjuncts()) { if (existingKey != key) { continue; } @@ -1002,7 +1026,7 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, } const bool reqHasBoundProj = req.getBoundProjectionName().has_value(); - for (const auto& [existingKey, existingReq] : reqMap) { + for (const auto& [existingKey, existingReq] : reqMap.conjuncts()) { uassert(6624150, "Existing key referring to new requirement.", !reqHasBoundProj || @@ -1027,7 +1051,7 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap, if (merged) { // continue around the loop } else { - reqMap.emplace(std::move(key), std::move(req)); + reqMap.add(std::move(key), std::move(req)); return true; } } @@ -1042,7 +1066,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, // However, we're assuming that 'rhs' has no projections. // If it did have projections, the result (lhs ^ rhs) would have projections // and wouldn't match 'lhs'. - for (auto&& [key, req] : rhs) { + for (const auto& [key, req] : rhs.conjuncts()) { tassert(7155010, "isSubsetOfPartialSchemaReq expects 'rhs' to have no projections", !req.getBoundProjectionName()); @@ -1061,7 +1085,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs, bool intersectPartialSchemaReq(PartialSchemaRequirements& target, const PartialSchemaRequirements& source, ProjectionRenames& projectionRenames) { - for (const auto& [key, req] : source) { + for (const auto& [key, req] : source.conjuncts()) { if (!intersectPartialSchemaReq(target, key, req, projectionRenames)) { return false; } @@ -1261,14 +1285,15 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId, // Add open interval for the first equality prefix. eqPrefixes.emplace_back(0); + // For each component of the index, for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) { const auto& indexCollationEntry = indexCollationSpec.at(indexField); const bool reverse = indexCollationEntry._op == CollationOp::Descending; bool foundSuitableField = false; PartialSchemaKey indexKey{scanProjName, indexCollationEntry._path}; - if (auto indexKeyIt = reqMap.find(indexKey); indexKeyIt != reqMap.cend()) { - if (const PartialSchemaRequirement& req = indexKeyIt->second; + if (auto result = reqMap.findFirstConjunct(indexKey)) { + if (const auto& [queryPredPos, req] = *result; fastNullHandling || !req.getIsPerfOnly() || !req.mayReturnNull(constFold)) { const auto& requiredInterval = req.getIntervals(); const bool success = extendCompoundInterval(prefixId, @@ -1297,7 +1322,6 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId, } entry._intervalPrefixSize++; - const size_t queryPredPos = std::distance(reqMap.cbegin(), indexKeyIt); eqPrefixes.back()._predPosSet.insert(queryPredPos); if (const auto& boundProjName = req.getBoundProjectionName()) { @@ -1341,19 +1365,16 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId, if (fusedPath._numTraversesFused > 0) { allowFuseTraverse[indexField] = false; } - auto it = reqMap.find(queryKey); - tassert( - 6624158, "QueryKey must exist in the requirements map", it != reqMap.cend()); + auto result = reqMap.findFirstConjunct(queryKey); + tassert(6624158, "QueryKey must exist in the requirements map", result); + const auto& [index, req] = *result; - const PartialSchemaRequirement& req = it->second; if (!req.getIsPerfOnly()) { // Only regular requirements are added to residual predicates. const ProjectionName& tempProjName = getExistingOrTempProjForFieldName( prefixId, FieldNameType{encodeIndexKeyName(indexField)}, fieldProjMap); entry._residualRequirements.emplace_back( - PartialSchemaKey{tempProjName, std::move(*fusedPath._suffix)}, - req, - std::distance(reqMap.cbegin(), it)); + PartialSchemaKey{tempProjName, std::move(*fusedPath._suffix)}, req, index); } satisfied = true; @@ -1396,7 +1417,7 @@ CandidateIndexes computeCandidateIndexes(PrefixId& prefixId, const ConstFoldFn& constFold) { // Contains one instance for each unmatched key. PartialSchemaKeySet unsatisfiedKeysInitial; - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (req.getIsPerfOnly()) { // Perf only do not need to be necessarily satisfied. continue; @@ -1445,7 +1466,7 @@ boost::optional<ScanParams> computeScanParams(PrefixId& prefixId, auto& fieldProjMap = result._fieldProjectionMap; size_t entryIndex = 0; - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (req.getIsPerfOnly()) { // Ignore perf only requirements. continue; @@ -2352,7 +2373,7 @@ bool pathEndsInTraverse(const optimizer::ABT& path) { bool hasProperIntervals(const PartialSchemaRequirements& reqMap) { // Compute if this node has any proper (not fully open) intervals. - for (const auto& [key, req] : reqMap) { + for (const auto& [key, req] : reqMap.conjuncts()) { if (!isIntervalReqFullyOpenDNF(req.getIntervals())) { return true; } |