summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Percy <david.percy@mongodb.com>2023-01-13 03:26:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-01-20 19:42:15 +0000
commit52832ac0d4440785350bfaab4ff0cbe8311875b7 (patch)
tree2730f51e32ce40b3894c808fdd04445f0eb8a66d
parent8142491b15a87a6e81198cc249a36ed4e70d74ec (diff)
downloadmongo-52832ac0d4440785350bfaab4ff0cbe8311875b7.tar.gz
SERVER-71550 Change PartialSchemaRequirements map to vector
-rw-r--r--src/mongo/db/query/ce/heuristic_estimator.cpp2
-rw-r--r--src/mongo/db/query/ce/hinted_estimator.cpp2
-rw-r--r--src/mongo/db/query/ce/histogram_estimator.cpp2
-rw-r--r--src/mongo/db/query/ce/sampling_estimator.cpp4
-rw-r--r--src/mongo/db/query/ce/test_utils.h2
-rw-r--r--src/mongo/db/query/optimizer/cascades/implementers.cpp17
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp17
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp47
-rw-r--r--src/mongo/db/query/optimizer/cascades/memo.cpp2
-rw-r--r--src/mongo/db/query/optimizer/explain.cpp2
-rw-r--r--src/mongo/db/query/optimizer/index_bounds.cpp105
-rw-r--r--src/mongo/db/query/optimizer/index_bounds.h115
-rw-r--r--src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp15
-rw-r--r--src/mongo/db/query/optimizer/node.cpp18
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp2
-rw-r--r--src/mongo/db/query/optimizer/utils/abt_hash.cpp2
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp125
17 files changed, 353 insertions, 126 deletions
diff --git a/src/mongo/db/query/ce/heuristic_estimator.cpp b/src/mongo/db/query/ce/heuristic_estimator.cpp
index 9134d6e102f..4d472d086c4 100644
--- a/src/mongo/db/query/ce/heuristic_estimator.cpp
+++ b/src/mongo/db/query/ce/heuristic_estimator.cpp
@@ -223,7 +223,7 @@ public:
SelectivityType topLevelSel{1.0};
std::vector<SelectivityType> topLevelSelectivities;
- for (const auto& [key, req] : node.getReqMap()) {
+ for (const auto& [key, req] : node.getReqMap().conjuncts()) {
if (req.getIsPerfOnly()) {
// Ignore perf-only requirements.
continue;
diff --git a/src/mongo/db/query/ce/hinted_estimator.cpp b/src/mongo/db/query/ce/hinted_estimator.cpp
index 41b63a8f914..a29e1bf8037 100644
--- a/src/mongo/db/query/ce/hinted_estimator.cpp
+++ b/src/mongo/db/query/ce/hinted_estimator.cpp
@@ -40,7 +40,7 @@ public:
CEType /*bindsResult*/,
CEType /*refsResult*/) {
CEType result = childResult;
- for (const auto& [key, req] : node.getReqMap()) {
+ for (const auto& [key, req] : node.getReqMap().conjuncts()) {
if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
auto it = _hints.find(key);
if (it != _hints.cend()) {
diff --git a/src/mongo/db/query/ce/histogram_estimator.cpp b/src/mongo/db/query/ce/histogram_estimator.cpp
index 05bfb1cd633..f8e271f92b5 100644
--- a/src/mongo/db/query/ce/histogram_estimator.cpp
+++ b/src/mongo/db/query/ce/histogram_estimator.cpp
@@ -250,7 +250,7 @@ public:
// Initial first pass through the requirements map to extract information about each path.
std::map<std::string, SargableConjunct> conjunctRequirements;
- for (const auto& [key, req] : node.getReqMap()) {
+ for (const auto& [key, req] : node.getReqMap().conjuncts()) {
if (req.getIsPerfOnly()) {
// Ignore perf-only requirements.
continue;
diff --git a/src/mongo/db/query/ce/sampling_estimator.cpp b/src/mongo/db/query/ce/sampling_estimator.cpp
index 779f839cf95..ee9e3d3c43b 100644
--- a/src/mongo/db/query/ce/sampling_estimator.cpp
+++ b/src/mongo/db/query/ce/sampling_estimator.cpp
@@ -75,7 +75,7 @@ public:
void transport(ABT& n, const SargableNode& node, ABT& childResult, ABT& refs, ABT& binds) {
ABT result = childResult;
// Retain only output bindings without applying filters.
- for (const auto& [key, req] : node.getReqMap()) {
+ for (const auto& [key, req] : node.getReqMap().conjuncts()) {
if (const auto& boundProjName = req.getBoundProjectionName()) {
lowerPartialSchemaRequirement(
key,
@@ -161,7 +161,7 @@ public:
// Here we assume that each requirement is independent.
// TODO: consider estimating together the entire set of requirements (but caching!)
CEType result = childResult;
- for (const auto& [key, req] : node.getReqMap()) {
+ for (const auto& [key, req] : node.getReqMap().conjuncts()) {
if (req.getIsPerfOnly()) {
// Ignore perf-only requirements.
continue;
diff --git a/src/mongo/db/query/ce/test_utils.h b/src/mongo/db/query/ce/test_utils.h
index 34e4ec729fb..4083c9aefdb 100644
--- a/src/mongo/db/query/ce/test_utils.h
+++ b/src/mongo/db/query/ce/test_utils.h
@@ -116,7 +116,7 @@ bool isSargableNode(const ABT& n) {
// for a SargableNode with a specific number of predicates. For tests, we only care about
// verifying the cardinality of that one.
if (auto* sargable = n.cast<optimizer::SargableNode>()) {
- return sargable->getReqMap().size() == NumReq;
+ return sargable->getReqMap().numLeaves() == NumReq;
}
return false;
}
diff --git a/src/mongo/db/query/optimizer/cascades/implementers.cpp b/src/mongo/db/query/optimizer/cascades/implementers.cpp
index 12da8837f86..a1dfe1e8576 100644
--- a/src/mongo/db/query/optimizer/cascades/implementers.cpp
+++ b/src/mongo/db/query/optimizer/cascades/implementers.cpp
@@ -423,7 +423,7 @@ public:
const ProjectionName& scanProjectionName = indexingAvailability.getScanProjection();
const PartialSchemaRequirements& reqMap = node.getReqMap();
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (key._projectionName != scanProjectionName) {
// We can only satisfy partial schema requirements using our root projection.
return;
@@ -444,7 +444,7 @@ public:
requiresRootProjection = projectionsLeftToSatisfy.erase(scanProjectionName);
}
- for (const auto& entry : reqMap) {
+ for (const auto& entry : reqMap.conjuncts()) {
if (const auto& boundProjName = entry.second.getBoundProjectionName()) {
// Project field only if it required.
projectionsLeftToSatisfy.erase(*boundProjName);
@@ -526,8 +526,10 @@ public:
{
PartialSchemaKeySet residualQueryKeySet;
for (const auto& [residualKey, residualReq, entryIndex] : residualReqs) {
- auto entryIt = reqMap.cbegin();
+ // Find the indexed requirement this residual requirement refers to.
+ auto entryIt = reqMap.conjuncts().cbegin();
std::advance(entryIt, entryIndex);
+
residualQueryKeySet.emplace(entryIt->first);
residualReqsWithCE.emplace_back(
residualKey, residualReq, partialSchemaKeyCE.at(entryIndex).second);
@@ -535,7 +537,7 @@ public:
if (scanGroupCE > 0.0) {
size_t entryIndex = 0;
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (residualQueryKeySet.count(key) == 0) {
const SelectivityType sel =
partialSchemaKeyCE.at(entryIndex).second / scanGroupCE;
@@ -1476,10 +1478,9 @@ private:
distribAndProjections._projectionNames;
for (const ABT& partitioningPath : distributionAndPaths._paths) {
- if (auto it = reqMap.find(PartialSchemaKey{scanProjection, partitioningPath});
- it != reqMap.cend() &&
- it->second.getBoundProjectionName() ==
- requiredProjections.at(distributionPartitionIndex)) {
+ if (auto proj = reqMap.findProjection(
+ PartialSchemaKey{scanProjection, partitioningPath});
+ proj && *proj == requiredProjections.at(distributionPartitionIndex)) {
distributionPartitionIndex++;
} else {
return false;
diff --git a/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp b/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp
index ce3da42a69d..8f83ad3682a 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_props_derivation.cpp
@@ -62,6 +62,13 @@ static void populateInitialDistributions(const DistributionAndPaths& distributio
}
}
+/**
+ * If the projections in 'req' cover every path of 'distributionAndPaths',
+ * then add a new 'DistributionAndProjections' entry to 'distributions'.
+ *
+ * This is used to populate metadata in a SargableNode, so we know what distributions
+ * it can provide, and what projection names matter for each one.
+ */
static void populateDistributionPaths(const PartialSchemaRequirements& req,
const ProjectionName& scanProjectionName,
const DistributionAndPaths& distributionAndPaths,
@@ -72,13 +79,11 @@ static void populateDistributionPaths(const PartialSchemaRequirements& req,
ProjectionNameVector distributionProjections;
for (const ABT& path : distributionAndPaths._paths) {
- auto it = req.find(PartialSchemaKey{scanProjectionName, path});
- if (it == req.cend()) {
+ if (auto binding = req.findProjection({scanProjectionName, path})) {
+ distributionProjections.push_back(*binding);
+ } else {
break;
}
- if (const auto& boundProjName = it->second.getBoundProjectionName()) {
- distributionProjections.push_back(*boundProjName);
- }
}
if (distributionProjections.size() == distributionAndPaths._paths.size()) {
@@ -100,7 +105,7 @@ static bool computeEqPredsOnly(const PartialSchemaRequirements& reqMap) {
PartialSchemaKeySet equalityKeys;
PartialSchemaKeySet fullyOpenKeys;
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
const auto& intervals = req.getIntervals();
if (auto singularInterval = IntervalReqExpr::getSingularDNF(intervals)) {
if (singularInterval->isFullyOpen()) {
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index 310abb117f4..a13ae81cdc5 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -565,7 +565,7 @@ static boost::optional<ABT> mergeSargableNodes(
return createEmptyValueScanNode(ctx);
}
- if (mergedReqs.size() > SargableNode::kMaxPartialSchemaReqs) {
+ if (mergedReqs.numLeaves() > SargableNode::kMaxPartialSchemaReqs) {
return {};
}
@@ -660,19 +660,16 @@ static void convertFilterToSargableNode(ABT::reference_type node,
}
// Remove any partial schema requirements which do not constrain their input.
- for (auto it = conversion->_reqMap.cbegin(); it != conversion->_reqMap.cend();) {
+
+ conversion->_reqMap.simplify([](const PartialSchemaKey& key, PartialSchemaRequirement& req) {
uassert(6624111,
"Filter partial schema requirement must contain a variable name.",
- it->first._projectionName);
+ key._projectionName);
uassert(6624112,
"Filter partial schema requirement cannot bind.",
- !it->second.getBoundProjectionName());
- if (isIntervalReqFullyOpenDNF(it->second.getIntervals())) {
- it = conversion->_reqMap.erase(it);
- } else {
- ++it;
- }
- }
+ !req.getBoundProjectionName());
+ return true;
+ });
if (conversion->_reqMap.empty()) {
// If the filter has no constraints after removing no-ops, then replace with its child. We
@@ -690,7 +687,7 @@ static void convertFilterToSargableNode(ABT::reference_type node,
addEmptyValueScanNode(ctx);
return;
}
- if (conversion->_reqMap.size() > SargableNode::kMaxPartialSchemaReqs) {
+ if (conversion->_reqMap.numLeaves() > SargableNode::kMaxPartialSchemaReqs) {
// Too many requirements.
return;
}
@@ -1128,12 +1125,13 @@ struct SubstituteConvert<EvaluationNode> {
uassert(6624165,
"Should not be getting retainPredicate set for EvalNodes",
!conversion->_retainPredicate);
- if (conversion->_reqMap.size() != 1) {
+ if (conversion->_reqMap.numLeaves() != 1) {
// For evaluation nodes we expect to create a single entry.
return;
}
- for (auto& [key, req] : conversion->_reqMap) {
+ conversion->_reqMap.transform([&](const PartialSchemaKey& key,
+ PartialSchemaRequirement& req) {
req = {
evalNode.getProjectionName(), std::move(req.getIntervals()), req.getIsPerfOnly()};
@@ -1143,7 +1141,7 @@ struct SubstituteConvert<EvaluationNode> {
uassert(6624115,
"Eval partial schema requirement cannot have a range",
isIntervalReqFullyOpenDNF(req.getIntervals()));
- }
+ });
bool hasEmptyInterval = false;
auto candidateIndexes = computeCandidateIndexes(ctx.getPrefixId(),
@@ -1172,7 +1170,7 @@ struct SubstituteConvert<EvaluationNode> {
static void lowerSargableNode(const SargableNode& node, RewriteContext& ctx) {
ABT n = node.getChild();
const auto reqMap = node.getReqMap();
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
lowerPartialSchemaRequirement(key, req, n, ctx.getPathToInterval());
}
ctx.addNode(n, true /*clear*/);
@@ -1217,14 +1215,13 @@ static SplitRequirementsResult splitRequirements(
boost::optional<ProjectionName> boundProjectionName,
IntervalReqExpr::Node intervals) {
// We always strip out the perf-only flag.
- reqMap.emplace(key,
- PartialSchemaRequirement{std::move(boundProjectionName),
- std::move(intervals),
- false /*isPerfOnly*/});
+ reqMap.add(key,
+ PartialSchemaRequirement{
+ std::move(boundProjectionName), std::move(intervals), false /*isPerfOnly*/});
};
size_t index = 0;
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (((1ull << index) & mask) != 0) {
bool addedToLeft = false;
@@ -1339,16 +1336,16 @@ struct ExploreConvert<SargableNode> {
std::vector<bool> mayReturnNull;
{
// Pre-compute if a requirement's interval is fully open.
- isFullyOpen.reserve(reqMap.size());
- for (const auto& [key, req] : reqMap) {
+ isFullyOpen.reserve(reqMap.numLeaves());
+ for (const auto& [key, req] : reqMap.conjuncts()) {
isFullyOpen.push_back(isIntervalReqFullyOpenDNF(req.getIntervals()));
}
if (!hints._fastIndexNullHandling && !isIndex) {
// Pre-compute if a requirement's interval may contain nulls, and also has an output
// binding.
- mayReturnNull.reserve(reqMap.size());
- for (const auto& [key, req] : reqMap) {
+ mayReturnNull.reserve(reqMap.numLeaves());
+ for (const auto& [key, req] : reqMap.conjuncts()) {
mayReturnNull.push_back(req.mayReturnNull(ctx.getConstFold()));
}
}
@@ -1359,7 +1356,7 @@ struct ExploreConvert<SargableNode> {
// try having at least one predicate on the left (mask = 1), and we try all possible
// subsets. For index intersection however (isIndex = true), we try symmetric partitioning
// (thus the high bound is 2^(N-1)).
- const size_t reqSize = reqMap.size();
+ const size_t reqSize = reqMap.numConjuncts();
const size_t highMask = isIndex ? (1ull << (reqSize - 1)) : (1ull << reqSize);
for (size_t mask = 1; mask < highMask; mask++) {
SplitRequirementsResult splitResult = splitRequirements(mask,
diff --git a/src/mongo/db/query/optimizer/cascades/memo.cpp b/src/mongo/db/query/optimizer/cascades/memo.cpp
index 68851f79cac..5353d66a566 100644
--- a/src/mongo/db/query/optimizer/cascades/memo.cpp
+++ b/src/mongo/db/query/optimizer/cascades/memo.cpp
@@ -623,7 +623,7 @@ void Memo::estimateCE(const Context& ctx, const GroupIdType groupId) {
auto& partialSchemaKeyCE = ceProp.getPartialSchemaKeyCE();
invariant(partialSchemaKeyCE.empty());
- for (const auto& [key, req] : sargablePtr->getReqMap()) {
+ for (const auto& [key, req] : sargablePtr->getReqMap().conjuncts()) {
ABT singularReq = make<SargableNode>(PartialSchemaRequirements{{key, req}},
CandidateIndexes{},
ScanParams{},
diff --git a/src/mongo/db/query/optimizer/explain.cpp b/src/mongo/db/query/optimizer/explain.cpp
index d5bbd9b78ea..b3a6358bfec 100644
--- a/src/mongo/db/query/optimizer/explain.cpp
+++ b/src/mongo/db/query/optimizer/explain.cpp
@@ -1190,7 +1190,7 @@ public:
void printPartialSchemaReqMap(ExplainPrinter& parent, const PartialSchemaRequirements& reqMap) {
std::vector<ExplainPrinter> printers;
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
ExplainPrinter local;
if (const auto& projName = key._projectionName) {
diff --git a/src/mongo/db/query/optimizer/index_bounds.cpp b/src/mongo/db/query/optimizer/index_bounds.cpp
index a1a6d10171b..3135126a57b 100644
--- a/src/mongo/db/query/optimizer/index_bounds.cpp
+++ b/src/mongo/db/query/optimizer/index_bounds.cpp
@@ -189,6 +189,111 @@ bool PartialSchemaKeyLessComparator::operator()(const PartialSchemaKey& k1,
return compareExprAndPaths(k1._path, k2._path) < 0;
}
+void PartialSchemaRequirements::normalize() {
+ std::stable_sort(
+ _repr.begin(),
+ _repr.end(),
+ [lt = PartialSchemaKeyLessComparator{}](const auto& entry1, const auto& entry2) -> bool {
+ return lt(entry1.first, entry2.first);
+ });
+}
+
+PartialSchemaRequirements::PartialSchemaRequirements(
+ std::vector<PartialSchemaRequirements::Entry> entries) {
+ for (Entry& entry : entries) {
+ _repr.push_back(std::move(entry));
+ }
+
+ normalize();
+}
+
+std::set<ProjectionName> PartialSchemaRequirements::getBoundNames() const {
+ std::set<ProjectionName> names;
+ for (auto&& [key, b] : iterateBindings()) {
+ names.insert(b);
+ }
+ return names;
+}
+
+bool PartialSchemaRequirements::operator==(const PartialSchemaRequirements& other) const {
+ return _repr == other._repr;
+}
+
+bool PartialSchemaRequirements::empty() const {
+ return _repr.empty();
+}
+
+size_t PartialSchemaRequirements::numLeaves() const {
+ return _repr.size();
+}
+
+size_t PartialSchemaRequirements::numConjuncts() const {
+ return _repr.size();
+}
+
+boost::optional<ProjectionName> PartialSchemaRequirements::findProjection(
+ const PartialSchemaKey& key) const {
+ for (auto [k, req] : _repr) {
+ if (k == key) {
+ return req.getBoundProjectionName();
+ }
+ }
+ return {};
+}
+
+boost::optional<std::pair<size_t, PartialSchemaRequirement>>
+PartialSchemaRequirements::findFirstConjunct(const PartialSchemaKey& key) const {
+ size_t i = 0;
+ for (auto [k, req] : _repr) {
+ if (k == key) {
+ return {{i, req}};
+ }
+ ++i;
+ }
+ return {};
+}
+
+PartialSchemaRequirements::Bindings PartialSchemaRequirements::iterateBindings() const {
+ Bindings result;
+ for (auto&& [key, req] : _repr) {
+ if (auto binding = req.getBoundProjectionName()) {
+ result.emplace_back(key, *binding);
+ }
+ }
+ return result;
+}
+
+void PartialSchemaRequirements::add(PartialSchemaKey key, PartialSchemaRequirement req) {
+ _repr.emplace_back(std::move(key), std::move(req));
+
+ normalize();
+}
+
+void PartialSchemaRequirements::transform(
+ std::function<void(const PartialSchemaKey&, PartialSchemaRequirement&)> func) {
+ for (auto& [key, req] : _repr) {
+ func(key, req);
+ }
+}
+
+bool PartialSchemaRequirements::simplify(
+ std::function<bool(const PartialSchemaKey&, PartialSchemaRequirement&)> func) {
+ for (auto it = _repr.begin(); it != _repr.end();) {
+ auto& [key, req] = *it;
+
+ if (!func(key, req)) {
+ return false;
+ }
+ if (isIntervalReqFullyOpenDNF(it->second.getIntervals()) && !req.getBoundProjectionName()) {
+ it = _repr.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ return true;
+}
+
+
ResidualRequirement::ResidualRequirement(PartialSchemaKey key,
PartialSchemaRequirement req,
size_t entryIndex)
diff --git a/src/mongo/db/query/optimizer/index_bounds.h b/src/mongo/db/query/optimizer/index_bounds.h
index 303920a56ba..f26f88b0aff 100644
--- a/src/mongo/db/query/optimizer/index_bounds.h
+++ b/src/mongo/db/query/optimizer/index_bounds.h
@@ -142,13 +142,122 @@ struct PartialSchemaKeyLessComparator {
};
/**
- * Map from referred (or input) projection name to list of requirements for that projection.
+ * Represents a set of predicates and projections. Cannot represent all predicates/projections:
+ * only those that can typically be answered efficiently with an index.
+ *
* Only one instance of a path without Traverse elements (non-multikey) is allowed. By contrast
* several instances of paths with Traverse elements (multikey) are allowed. For example: Get "a"
* Get "b" Id is allowed just once while Get "a" Traverse Get "b" Id is allowed multiple times.
+ *
+ * The default / empty state represents a conjunction of zero predicates, which means always true.
*/
-using PartialSchemaRequirements =
- std::multimap<PartialSchemaKey, PartialSchemaRequirement, PartialSchemaKeyLessComparator>;
+class PartialSchemaRequirements {
+public:
+ using Entry = std::pair<PartialSchemaKey, PartialSchemaRequirement>;
+ struct ConstIter {
+ auto begin() const {
+ return _begin;
+ }
+ auto end() const {
+ return _end;
+ }
+ auto cbegin() const {
+ return _begin;
+ }
+ auto cend() const {
+ return _end;
+ }
+
+ std::vector<Entry>::const_iterator _begin;
+ std::vector<Entry>::const_iterator _end;
+ };
+
+ struct Iter {
+ auto begin() const {
+ return _begin;
+ }
+ auto end() const {
+ return _end;
+ }
+ auto cbegin() const {
+ return _begin;
+ }
+ auto cend() const {
+ return _end;
+ }
+
+ std::vector<Entry>::iterator _begin;
+ std::vector<Entry>::iterator _end;
+ };
+
+ PartialSchemaRequirements() = default;
+ PartialSchemaRequirements(std::vector<Entry>);
+ PartialSchemaRequirements(std::initializer_list<Entry> entries)
+ : PartialSchemaRequirements(std::vector<Entry>(entries)) {}
+
+ bool operator==(const PartialSchemaRequirements& other) const;
+
+ /**
+ * Return true if there are zero predicates and zero projections.
+ */
+ bool empty() const;
+
+ size_t numLeaves() const;
+ size_t numConjuncts() const;
+
+ std::set<ProjectionName> getBoundNames() const;
+
+ boost::optional<ProjectionName> findProjection(const PartialSchemaKey&) const;
+
+ /**
+ * Picks the first top-level conjunct matching the given key.
+ *
+ * Result includes the index of the top-level conjunct.
+ */
+ boost::optional<std::pair<size_t, PartialSchemaRequirement>> findFirstConjunct(
+ const PartialSchemaKey&) const;
+
+ ConstIter conjuncts() const {
+ return {_repr.begin(), _repr.end()};
+ }
+
+ Iter conjuncts() {
+ return {_repr.begin(), _repr.end()};
+ }
+
+ using Bindings = std::vector<std::pair<PartialSchemaKey, ProjectionName>>;
+ Bindings iterateBindings() const;
+
+ void add(PartialSchemaKey, PartialSchemaRequirement);
+
+ /**
+ * Apply an in-place transformation to each PartialSchemaRequirement.
+ *
+ * Since the key is only exposed read-only to the callback, we don't need to
+ * worry about restoring the no-Traverseless-duplicates invariant.
+ */
+ void transform(std::function<void(const PartialSchemaKey&, PartialSchemaRequirement&)>);
+
+ /**
+ * Apply a simplification to each PartialSchemaRequirement.
+ *
+ * The callback can return false if an individual PartialSchemaRequirement
+ * simplifies to an always-false predicate.
+ *
+ * This method returns false if the overall result is an always-false predicate.
+ *
+ * This method will also remove any predicates that are trivially true (those will
+ * a fully open DNF interval).
+ */
+ bool simplify(std::function<bool(const PartialSchemaKey&, PartialSchemaRequirement&)>);
+
+private:
+ // Restore the invariant that the entries are sorted by key.
+ void normalize();
+
+ using Repr = std::vector<Entry>;
+ Repr _repr;
+};
/**
* Used to track cardinality estimates per predicate inside a PartialSchemaRequirement. The order of
diff --git a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
index 716adaeaa32..c1694f9c898 100644
--- a/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/logical_rewriter_optimizer_test.cpp
@@ -2438,14 +2438,7 @@ TEST(LogicalRewriter, EmptyArrayIndexBounds) {
auto phaseManager = makePhaseManager(
{OptPhase::MemoSubstitutionPhase},
prefixId,
- {{{"c1",
- createScanDef(
- {},
- {{"index1",
- IndexDefinition{{{makeNonMultikeyIndexPath("a"), CollationOp::Ascending}},
- false /*isMultiKey*/,
- {DistributionType::Centralized},
- {}}}})}}},
+ {{{"c1", createScanDef({}, {})}}},
boost::none /*costModel*/,
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
@@ -2469,11 +2462,9 @@ TEST(LogicalRewriter, EmptyArrayIndexBounds) {
"| Const [[]]\n"
"Sargable [Complete]\n"
"| | | | | requirementsMap: \n"
- "| | | | | refProjection: root, path: 'PathGet [a] PathIdentity []', interv"
- "als: {{{=Const [undefined]}} U {{=Const [[]]}}}, perfOnly\n"
+ "| | | | | refProjection: root, path: 'PathGet [a] PathTraverse [1] PathIde"
+ "ntity []', intervals: {{{=Const [undefined]}} U {{=Const [[]]}}}, perfOnly\n"
"| | | | candidateIndexes: \n"
- "| | | | candidateId: 1, index1, {}, {Compound}, {{{=Const [undefined]}} U "
- "{{=Const [[]]}}}\n"
"| | | scanParams: \n"
"| | | {}\n"
"| | BindBlock:\n"
diff --git a/src/mongo/db/query/optimizer/node.cpp b/src/mongo/db/query/optimizer/node.cpp
index 1d0c1cb9260..133af61cd2d 100644
--- a/src/mongo/db/query/optimizer/node.cpp
+++ b/src/mongo/db/query/optimizer/node.cpp
@@ -372,17 +372,15 @@ const ProjectionName& RIDUnionNode::getScanProjectionName() const {
static ProjectionNameVector createSargableBindings(const PartialSchemaRequirements& reqMap) {
ProjectionNameVector result;
- for (const auto& entry : reqMap) {
- if (const auto& boundProjName = entry.second.getBoundProjectionName()) {
- result.push_back(*boundProjName);
- }
+ for (const auto& [key, binding] : reqMap.iterateBindings()) {
+ result.push_back(binding);
}
return result;
}
static ProjectionNameVector createSargableReferences(const PartialSchemaRequirements& reqMap) {
ProjectionNameOrderPreservingSet result;
- for (const auto& entry : reqMap) {
+ for (const auto& entry : reqMap.conjuncts()) {
result.emplace_back(*entry.first._projectionName);
}
return result.getVector();
@@ -403,12 +401,12 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap,
assertNodeSort(getChild());
tassert(6624085, "SargableNode requires at least one predicate", !_reqMap.empty());
tassert(6624086,
- str::stream() << "SargableNode has too many predicates: " << _reqMap.size(),
- _reqMap.size() <= kMaxPartialSchemaReqs);
+ str::stream() << "SargableNode has too many predicates: " << _reqMap.numLeaves(),
+ _reqMap.numLeaves() <= kMaxPartialSchemaReqs);
// Assert merged map does not contain duplicate bound projections.
ProjectionNameSet boundsProjectionNameSet;
- for (const auto& [key, req] : _reqMap) {
+ for (const auto& [key, req] : _reqMap.conjuncts()) {
if (const auto& boundProjName = req.getBoundProjectionName()) {
tassert(6624094,
"SargableNode has a multikey requirement with a non-trivial interval which "
@@ -427,7 +425,7 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap,
}
// Assert there are no references to internally bound projections.
- for (const auto& [key, req] : _reqMap) {
+ for (const auto& [key, req] : _reqMap.conjuncts()) {
tassert(6624088,
"SargableNode cannot reference an internally bound projection",
boundsProjectionNameSet.count(*key._projectionName) == 0);
@@ -436,7 +434,7 @@ SargableNode::SargableNode(PartialSchemaRequirements reqMap,
// Assert that non-multikey paths have at most one requirement.
{
PartialSchemaKeySet seen;
- for (auto&& [key, req] : reqMap) {
+ for (auto&& [key, req] : reqMap.conjuncts()) {
if (!checkPathContainsTraverse(key._path)) {
auto inserted = seen.insert(key).second;
tassert(7155020,
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 47179205e5c..b7a62a4682f 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -4830,7 +4830,7 @@ TEST(PhysRewriter, HashPartitioning) {
optimized);
}
-TEST(PhysRewriter, IndexPartitioning) {
+TEST(PhysRewriter, IndexPartitioning0) {
using namespace properties;
auto prefixId = PrefixId::createForTests();
diff --git a/src/mongo/db/query/optimizer/utils/abt_hash.cpp b/src/mongo/db/query/optimizer/utils/abt_hash.cpp
index 9ac38721dae..2b9454cfc84 100644
--- a/src/mongo/db/query/optimizer/utils/abt_hash.cpp
+++ b/src/mongo/db/query/optimizer/utils/abt_hash.cpp
@@ -122,7 +122,7 @@ static size_t computePartialSchemaReqHash(const PartialSchemaRequirements& reqMa
size_t result = 17;
IntervalHasher<IntervalReqExpr> intervalHasher;
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (const auto& projName = key._projectionName) {
updateHash(result, ProjectionName::Hasher()(*projName));
}
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index c188bc1ff59..d67b05c9cdc 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -221,11 +221,11 @@ public:
const ProjectionName& boundVarName = boundPtr->name();
PartialSchemaRequirements newMap;
- for (auto& [key, req] : pathResult->_reqMap) {
+ for (auto& [key, req] : pathResult->_reqMap.conjuncts()) {
if (key._projectionName) {
return {};
}
- newMap.emplace(PartialSchemaKey{boundVarName, key._path}, std::move(req));
+ newMap.add(PartialSchemaKey{boundVarName, key._path}, std::move(req));
}
PartialSchemaReqConversion result{std::move(newMap)};
@@ -291,51 +291,63 @@ public:
return leftResult;
}
// Additive composition: we never have projections in this case; only predicates.
- for (const auto& [key, req] : leftReqMap) {
+ for (const auto& [key, req] : leftReqMap.conjuncts()) {
tassert(7155021,
"Unexpected binding in ComposeA in PartialSchemaReqConverter",
!req.getBoundProjectionName());
}
- for (const auto& [key, req] : rightReqMap) {
+ for (const auto& [key, req] : rightReqMap.conjuncts()) {
tassert(7155022,
"Unexpected binding in ComposeA in PartialSchemaReqConverter",
!req.getBoundProjectionName());
}
- auto leftEntry = leftReqMap.begin();
- auto rightEntry = rightReqMap.begin();
- auto& [leftKey, leftReq] = *leftEntry;
- auto& [rightKey, rightReq] = *rightEntry;
-
{
// Check if the left and right requirements are all or none perf-only.
size_t perfOnlyCount = 0;
- for (const auto& [key, req] : leftReqMap) {
+ for (const auto& [key, req] : leftReqMap.conjuncts()) {
if (req.getIsPerfOnly()) {
perfOnlyCount++;
}
}
- for (const auto& [key, req] : rightReqMap) {
+ for (const auto& [key, req] : rightReqMap.conjuncts()) {
if (req.getIsPerfOnly()) {
perfOnlyCount++;
}
}
- if (perfOnlyCount != 0 && perfOnlyCount != leftReqMap.size() + rightReqMap.size()) {
+ if (perfOnlyCount != 0 &&
+ perfOnlyCount != leftReqMap.numLeaves() + rightReqMap.numLeaves()) {
// For now allow only predicates with the same perf-only flag.
return {};
}
}
- if (leftReqMap.count(leftKey) == leftReqMap.size() &&
- rightReqMap.count(leftKey) == rightReqMap.size()) {
+ auto leftEntries = leftReqMap.conjuncts();
+ auto rightEntries = rightReqMap.conjuncts();
+ auto leftEntry = leftEntries.begin();
+ auto rightEntry = rightEntries.begin();
+ auto& [leftKey, leftReq] = *leftEntry;
+ auto& [rightKey, rightReq] = *rightEntry;
+
+ // Do all reqs from both sides use the same key?
+ bool allSameKey = true;
+ for (const auto* reqs : {&leftReqMap, &rightReqMap}) {
+ for (auto&& [k, req] : reqs->conjuncts()) {
+ if (k != leftKey) {
+ allSameKey = false;
+ break;
+ }
+ }
+ }
+ if (allSameKey) {
// All reqs from both sides use the same key (input binding + path).
// Each side is a conjunction, and we're taking a disjunction.
// Use the fact that OR distributes over AND to build a new conjunction:
// (a & b) | (x & y) == (a | x) & (a | y) & (b | x) & (b | y)
PartialSchemaRequirements resultMap;
- for (const auto& [rightKey1, rightReq1] : rightReqMap) {
- for (const auto& [leftKey1, leftReq1] : leftReqMap) {
+ for (const auto& [rightKey1, rightReq1] : rightReqMap.conjuncts()) {
+ for (const auto& [leftKey1, leftReq1] : leftReqMap.conjuncts()) {
auto combinedIntervals = leftReq1.getIntervals();
combineIntervalsDNF(
false /*intersect*/, combinedIntervals, rightReq1.getIntervals());
@@ -346,17 +358,20 @@ public:
std::move(combinedIntervals),
leftReq1.getIsPerfOnly(),
};
- resultMap.emplace(leftKey1, combinedReq);
+ resultMap.add(leftKey1, combinedReq);
}
}
leftReqMap = std::move(resultMap);
return leftResult;
}
+ // Left and right don't all use the same key.
- if (leftReqMap.size() != 1 || rightReqMap.size() != 1) {
+ if (leftReqMap.numLeaves() != 1 || rightReqMap.numLeaves() != 1) {
return {};
}
+ // Left and right don't all use the same key, but they both have exactly 1 entry.
+ // In other words, leftKey != rightKey.
// Here we can combine if paths differ only by a Traverse element and both intervals
// are the same, with array bounds. For example:
@@ -490,7 +505,7 @@ public:
// New map has keys with appended paths.
PartialSchemaRequirements newMap;
- for (auto& entry : inputResult->_reqMap) {
+ for (const auto& entry : inputResult->_reqMap.conjuncts()) {
if (entry.first._projectionName) {
return {};
}
@@ -502,7 +517,7 @@ public:
std::swap(appendedPath.cast<T>()->getPath(), path);
std::swap(path, appendedPath);
- newMap.emplace(PartialSchemaKey{std::move(path)}, std::move(entry.second));
+ newMap.add(PartialSchemaKey{std::move(path)}, std::move(entry.second));
}
inputResult->_reqMap = std::move(newMap);
@@ -517,7 +532,7 @@ public:
if (!inputResult) {
return {};
}
- if (inputResult->_reqMap.size() > 1) {
+ if (inputResult->_reqMap.numConjuncts() > 1) {
// More than one requirement means we are handling a conjunction inside a traverse.
// We can change it to a traverse inside a conjunction, but that's an
// over-approximation, so we have to keep the original predicate.
@@ -683,7 +698,7 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
return {};
}
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (key._path.is<PathIdentity>() && isIntervalReqFullyOpenDNF(req.getIntervals())) {
// We need to determine either path or interval (or both).
return {};
@@ -692,11 +707,11 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
// If we over-approximate, we need to switch all requirements to perf-only.
if (result->_retainPredicate) {
- for (auto& [key, req] : reqMap) {
+ reqMap.transform([&](const PartialSchemaKey& key, PartialSchemaRequirement& req) {
if (!req.getIsPerfOnly()) {
req = {req.getBoundProjectionName(), req.getIntervals(), true /*isPerfOnly*/};
}
- }
+ });
}
return result;
}
@@ -859,13 +874,15 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
};
// Simplify paths by eliminating unnecessary Traverse elements.
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
PartialSchemaKey newKey = key;
bool simplified = false;
if (key._projectionName == scanProjName && checkPathContainsTraverse(newKey._path)) {
simplified |= simplifyTraverseNonArray(newKey._path, multikeynessTrie);
}
+ // Maintain the invariant that Traverse-less keys appear only once:
+ // we can move the conjunction into the intervals and simplify.
if (prevEntry) {
if (simplified && prevEntry->first == newKey) {
auto& prevReq = prevEntry->second;
@@ -890,7 +907,7 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
std::move(resultIntervals),
req.getIsPerfOnly() && prevReq.getIsPerfOnly()};
} else {
- result.insert(std::move(*prevEntry));
+ result.add(std::move(prevEntry->first), std::move(prevEntry->second));
prevEntry.reset({std::move(newKey), req});
}
} else {
@@ -898,19 +915,26 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
}
}
if (prevEntry) {
- result.insert(std::move(*prevEntry));
+ result.add(std::move(prevEntry->first), std::move(prevEntry->second));
}
// Intersect and normalize intervals.
- for (auto& [key, req] : result) {
- auto resultIntervals = req.getIntervals();
- if (!simplifyFn(resultIntervals)) {
- return true;
- }
+ const bool representable =
+ result.simplify([&](const PartialSchemaKey&, PartialSchemaRequirement& req) -> bool {
+ auto resultIntervals = req.getIntervals();
+ if (!simplifyFn(resultIntervals)) {
+ return false;
+ }
- normalizeIntervals(resultIntervals);
+ normalizeIntervals(resultIntervals);
- req = {req.getBoundProjectionName(), std::move(resultIntervals), req.getIsPerfOnly()};
+ req = {req.getBoundProjectionName(), std::move(resultIntervals), req.getIsPerfOnly()};
+ return true;
+ });
+ if (!representable) {
+ // It simplifies to an always-false predicate, which we do not represent
+ // as PartialSchemaRequirements.
+ return true;
}
reqMap = std::move(result);
@@ -951,7 +975,7 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
{
bool success = false;
// Look for exact match on the path, and if found combine intervals.
- for (auto& [existingKey, existingReq] : reqMap) {
+ for (auto& [existingKey, existingReq] : reqMap.conjuncts()) {
if (existingKey != key) {
continue;
}
@@ -1002,7 +1026,7 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
}
const bool reqHasBoundProj = req.getBoundProjectionName().has_value();
- for (const auto& [existingKey, existingReq] : reqMap) {
+ for (const auto& [existingKey, existingReq] : reqMap.conjuncts()) {
uassert(6624150,
"Existing key referring to new requirement.",
!reqHasBoundProj ||
@@ -1027,7 +1051,7 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
if (merged) {
// continue around the loop
} else {
- reqMap.emplace(std::move(key), std::move(req));
+ reqMap.add(std::move(key), std::move(req));
return true;
}
}
@@ -1042,7 +1066,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
// However, we're assuming that 'rhs' has no projections.
// If it did have projections, the result (lhs ^ rhs) would have projections
// and wouldn't match 'lhs'.
- for (auto&& [key, req] : rhs) {
+ for (const auto& [key, req] : rhs.conjuncts()) {
tassert(7155010,
"isSubsetOfPartialSchemaReq expects 'rhs' to have no projections",
!req.getBoundProjectionName());
@@ -1061,7 +1085,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
const PartialSchemaRequirements& source,
ProjectionRenames& projectionRenames) {
- for (const auto& [key, req] : source) {
+ for (const auto& [key, req] : source.conjuncts()) {
if (!intersectPartialSchemaReq(target, key, req, projectionRenames)) {
return false;
}
@@ -1261,14 +1285,15 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId,
// Add open interval for the first equality prefix.
eqPrefixes.emplace_back(0);
+ // For each component of the index,
for (size_t indexField = 0; indexField < indexCollationSpec.size(); indexField++) {
const auto& indexCollationEntry = indexCollationSpec.at(indexField);
const bool reverse = indexCollationEntry._op == CollationOp::Descending;
bool foundSuitableField = false;
PartialSchemaKey indexKey{scanProjName, indexCollationEntry._path};
- if (auto indexKeyIt = reqMap.find(indexKey); indexKeyIt != reqMap.cend()) {
- if (const PartialSchemaRequirement& req = indexKeyIt->second;
+ if (auto result = reqMap.findFirstConjunct(indexKey)) {
+ if (const auto& [queryPredPos, req] = *result;
fastNullHandling || !req.getIsPerfOnly() || !req.mayReturnNull(constFold)) {
const auto& requiredInterval = req.getIntervals();
const bool success = extendCompoundInterval(prefixId,
@@ -1297,7 +1322,6 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId,
}
entry._intervalPrefixSize++;
- const size_t queryPredPos = std::distance(reqMap.cbegin(), indexKeyIt);
eqPrefixes.back()._predPosSet.insert(queryPredPos);
if (const auto& boundProjName = req.getBoundProjectionName()) {
@@ -1341,19 +1365,16 @@ static bool computeCandidateIndexEntry(PrefixId& prefixId,
if (fusedPath._numTraversesFused > 0) {
allowFuseTraverse[indexField] = false;
}
- auto it = reqMap.find(queryKey);
- tassert(
- 6624158, "QueryKey must exist in the requirements map", it != reqMap.cend());
+ auto result = reqMap.findFirstConjunct(queryKey);
+ tassert(6624158, "QueryKey must exist in the requirements map", result);
+ const auto& [index, req] = *result;
- const PartialSchemaRequirement& req = it->second;
if (!req.getIsPerfOnly()) {
// Only regular requirements are added to residual predicates.
const ProjectionName& tempProjName = getExistingOrTempProjForFieldName(
prefixId, FieldNameType{encodeIndexKeyName(indexField)}, fieldProjMap);
entry._residualRequirements.emplace_back(
- PartialSchemaKey{tempProjName, std::move(*fusedPath._suffix)},
- req,
- std::distance(reqMap.cbegin(), it));
+ PartialSchemaKey{tempProjName, std::move(*fusedPath._suffix)}, req, index);
}
satisfied = true;
@@ -1396,7 +1417,7 @@ CandidateIndexes computeCandidateIndexes(PrefixId& prefixId,
const ConstFoldFn& constFold) {
// Contains one instance for each unmatched key.
PartialSchemaKeySet unsatisfiedKeysInitial;
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (req.getIsPerfOnly()) {
// Perf only do not need to be necessarily satisfied.
continue;
@@ -1445,7 +1466,7 @@ boost::optional<ScanParams> computeScanParams(PrefixId& prefixId,
auto& fieldProjMap = result._fieldProjectionMap;
size_t entryIndex = 0;
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (req.getIsPerfOnly()) {
// Ignore perf only requirements.
continue;
@@ -2352,7 +2373,7 @@ bool pathEndsInTraverse(const optimizer::ABT& path) {
bool hasProperIntervals(const PartialSchemaRequirements& reqMap) {
// Compute if this node has any proper (not fully open) intervals.
- for (const auto& [key, req] : reqMap) {
+ for (const auto& [key, req] : reqMap.conjuncts()) {
if (!isIntervalReqFullyOpenDNF(req.getIntervals())) {
return true;
}