summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2023-02-13 19:04:02 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-14 02:38:56 +0000
commit035ba8def8065ad6e5e27cde7a160431968a7fdc (patch)
tree3743f0a63580fdb2e60ede336e3a62584d8019f9
parent26f1b9d0e4aea57e5c7b47fa56ace80ad629d405 (diff)
downloadmongo-035ba8def8065ad6e5e27cde7a160431968a7fdc.tar.gz
SERVER-73817 [CQF] Simplification around projection renames
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp18
-rw-r--r--src/mongo/db/query/optimizer/defs.h2
-rw-r--r--src/mongo/db/query/optimizer/metadata_factory.cpp7
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp67
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h18
5 files changed, 62 insertions, 50 deletions
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index 01f2bc84c77..53d666ca2be 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -554,14 +554,14 @@ static boost::optional<ABT> mergeSargableNodes(
}
PartialSchemaRequirements mergedReqs = belowNode.getReqMap();
- ProjectionRenames projectionRenames;
- if (!intersectPartialSchemaReq(mergedReqs, aboveNode.getReqMap(), projectionRenames)) {
+ if (!intersectPartialSchemaReq(mergedReqs, aboveNode.getReqMap())) {
return {};
}
const ProjectionName& scanProjName = indexingAvailability.getScanProjection();
+ ProjectionRenames projectionRenames;
bool hasEmptyInterval = simplifyPartialSchemaReqPaths(
- scanProjName, multikeynessTrie, mergedReqs, ctx.getConstFold());
+ scanProjName, multikeynessTrie, mergedReqs, projectionRenames, ctx.getConstFold());
if (hasEmptyInterval) {
return createEmptyValueScanNode(ctx);
}
@@ -682,8 +682,16 @@ static void convertFilterToSargableNode(ABT::reference_type node,
return;
}
- bool hasEmptyInterval = simplifyPartialSchemaReqPaths(
- scanProjName, scanDef.getMultikeynessTrie(), conversion->_reqMap, ctx.getConstFold());
+ ProjectionRenames projectionRenames_unused;
+ bool hasEmptyInterval = simplifyPartialSchemaReqPaths(scanProjName,
+ scanDef.getMultikeynessTrie(),
+ conversion->_reqMap,
+ projectionRenames_unused,
+ ctx.getConstFold());
+ tassert(6624156,
+ "We should not be seeing renames from a converted Filter",
+ projectionRenames_unused.empty());
+
if (hasEmptyInterval) {
addEmptyValueScanNode(ctx);
return;
diff --git a/src/mongo/db/query/optimizer/defs.h b/src/mongo/db/query/optimizer/defs.h
index 0d4079fda2e..f157558d515 100644
--- a/src/mongo/db/query/optimizer/defs.h
+++ b/src/mongo/db/query/optimizer/defs.h
@@ -68,6 +68,8 @@ using ProjectionNameVector = std::vector<ProjectionName>;
template <typename T>
using ProjectionNameMap = opt::unordered_map<ProjectionName, T, ProjectionName::Hasher>;
+
+// Key: new/target projection, value: existing/source projection.
using ProjectionRenames = ProjectionNameMap<ProjectionName>;
// Map from scanDefName to rid projection name.
diff --git a/src/mongo/db/query/optimizer/metadata_factory.cpp b/src/mongo/db/query/optimizer/metadata_factory.cpp
index 6eadf67d1cc..1479f056184 100644
--- a/src/mongo/db/query/optimizer/metadata_factory.cpp
+++ b/src/mongo/db/query/optimizer/metadata_factory.cpp
@@ -69,8 +69,13 @@ ScanDefinition createScanDef(ScanDefOptions options,
// Simplify partial filter requirements using the non-multikey paths.
for (auto& [indexDefName, indexDef] : indexDefs) {
+ ProjectionRenames projRenames_unused;
[[maybe_unused]] const bool hasEmptyInterval = simplifyPartialSchemaReqPaths(
- "<root>", multikeynessTrie, indexDef.getPartialReqMap(), constFold);
+ "<root>", multikeynessTrie, indexDef.getPartialReqMap(), projRenames_unused, constFold);
+ tassert(6624157,
+ "We should not be seeing renames from partial index filters",
+ projRenames_unused.empty());
+
// If "hasEmptyInterval" is set, we have a partial filter index with an unsatisfiable
// condition, which is thus guaranteed to never contain any documents.
}
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index 3c12f562616..497129e2ffa 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -248,11 +248,7 @@ public:
auto& leftReqMap = leftResult->_reqMap;
auto& rightReqMap = rightResult->_reqMap;
if constexpr (isMultiplicative) {
- ProjectionRenames projectionRenames;
- if (!intersectPartialSchemaReq(leftReqMap, rightReqMap, projectionRenames)) {
- return {};
- }
- if (!projectionRenames.empty()) {
+ if (!intersectPartialSchemaReq(leftReqMap, rightReqMap)) {
return {};
}
@@ -688,6 +684,7 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
const MultikeynessTrie& multikeynessTrie,
PartialSchemaRequirements& reqMap,
+ ProjectionRenames& projectionRenames,
const ConstFoldFn& constFold) {
PartialSchemaRequirements result;
boost::optional<std::pair<PartialSchemaKey, PartialSchemaRequirement>> prevEntry;
@@ -704,26 +701,30 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
// Simplify paths by eliminating unnecessary Traverse elements.
for (const auto& [key, req] : reqMap.conjuncts()) {
PartialSchemaKey newKey = key;
+
bool simplified = false;
- if (key._projectionName == scanProjName && checkPathContainsTraverse(newKey._path)) {
+ const bool containedTraverse = checkPathContainsTraverse(newKey._path);
+ if (key._projectionName == scanProjName && containedTraverse) {
simplified |= simplifyTraverseNonArray(newKey._path, multikeynessTrie);
}
- // Maintain the invariant that Traverse-less keys appear only once:
- // we can move the conjunction into the intervals and simplify.
+ // Ensure that Traverse-less keys appear only once: we can move the conjunction into the
+ // intervals and simplify.
if (prevEntry) {
- if (simplified && prevEntry->first == newKey) {
+ if ((!containedTraverse || simplified) && prevEntry->first == newKey) {
auto& prevReq = prevEntry->second;
- boost::optional<ProjectionName> resultBoundProjName;
+ boost::optional<ProjectionName> resultBoundProjName =
+ prevReq.getBoundProjectionName();
auto resultIntervals = prevReq.getIntervals();
if (const auto& boundProjName = req.getBoundProjectionName()) {
- tassert(6624168,
- "Should not be seeing more than one bound projection per key",
- !prevReq.getBoundProjectionName());
- resultBoundProjName = boundProjName;
- } else {
- resultBoundProjName = prevReq.getBoundProjectionName();
+ if (resultBoundProjName) {
+ // The existing name wins (stays in 'reqMap'). We tell the caller that the
+ // name "boundProjName" is available under "resultBoundProjName".
+ projectionRenames.emplace(*boundProjName, *resultBoundProjName);
+ } else {
+ resultBoundProjName = boundProjName;
+ }
}
combineIntervalsDNF(true /*intersect*/, resultIntervals, req.getIntervals());
@@ -793,8 +794,7 @@ bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
*/
static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
PartialSchemaKey key,
- PartialSchemaRequirement req,
- ProjectionRenames& projectionRenames) {
+ PartialSchemaRequirement req) {
for (;;) {
bool merged = false;
@@ -802,6 +802,8 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
const bool pathHasTraverse = !pathIsId && checkPathContainsTraverse(key._path);
{
bool success = false;
+ bool addNewKey = false;
+
// Look for exact match on the path, and if found combine intervals.
for (auto& [existingKey, existingReq] : reqMap.conjuncts()) {
if (existingKey != key) {
@@ -811,8 +813,9 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
auto resultIntervals = existingReq.getIntervals();
if (pathHasTraverse) {
combineIntervalsDNF(true /*intersect*/, resultIntervals, req.getIntervals());
- if (resultIntervals == existingReq.getIntervals()) {
- // Existing interval subsumes the new one.
+ if (resultIntervals == existingReq.getIntervals() ||
+ resultIntervals == req.getIntervals()) {
+ // Existing interval subsumes the new one or vice versa.
success = true;
}
} else {
@@ -826,11 +829,9 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
existingReq.getBoundProjectionName();
if (const auto& boundProjName = req.getBoundProjectionName()) {
if (resultBoundProjName) {
- // The new and existing projections both bind a name, so:
- // - The existing name wins (stays in 'reqMap').
- // - We tell the caller that the name bound is 'req' is now
- // available by the name in 'existingReq'.
- projectionRenames.emplace(*boundProjName, *resultBoundProjName);
+ // The new and existing projections both bind a name. Add an entry with
+ // the new name. We will attempt to simplify later.
+ addNewKey = true;
} else {
// Only the new projection binds a name, so we'll update 'reqMap' to
// include it.
@@ -849,6 +850,9 @@ static bool intersectPartialSchemaReq(PartialSchemaRequirements& reqMap,
}
if (success) {
+ if (addNewKey) {
+ reqMap.add(std::move(key), std::move(req));
+ }
return true;
}
}
@@ -900,9 +904,8 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
!req.getBoundProjectionName());
}
- ProjectionRenames projectionRenames_unused;
PartialSchemaRequirements intersection = lhs;
- if (intersectPartialSchemaReq(intersection, rhs, projectionRenames_unused)) {
+ if (intersectPartialSchemaReq(intersection, rhs)) {
return intersection == lhs;
}
// Intersection was empty-set, and we assume neither input is empty-set.
@@ -911,10 +914,9 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
}
bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
- const PartialSchemaRequirements& source,
- ProjectionRenames& projectionRenames) {
+ const PartialSchemaRequirements& source) {
for (const auto& [key, req] : source.conjuncts()) {
- if (!intersectPartialSchemaReq(target, key, req, projectionRenames)) {
+ if (!intersectPartialSchemaReq(target, key, req)) {
return false;
}
}
@@ -1582,13 +1584,10 @@ void sortResidualRequirements(ResidualRequirementsWithCE& residualReq) {
}
}
-void applyProjectionRenames(ProjectionRenames projectionRenames,
- ABT& node,
- const std::function<void(const ABT& node)>& visitor) {
+void applyProjectionRenames(ProjectionRenames projectionRenames, ABT& node) {
for (auto&& [targetProjName, sourceProjName] : projectionRenames) {
node = make<EvaluationNode>(
std::move(targetProjName), make<Variable>(std::move(sourceProjName)), std::move(node));
- visitor(node);
}
}
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 65829118544..54a3519625c 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -241,11 +241,15 @@ boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
/**
* Given a set of non-multikey paths, remove redundant Traverse elements from paths in a Partial
- * Schema Requirement structure. Returns true if we have an empty result after simplification.
+ * Schema Requirement structure. Following that the intervals of any remaining non-multikey paths
+ * (following simplification) on the same key are intersected. Returns true if we have an empty
+ * result after simplification. Each redundant binding gets an entry in 'projectionRenames', which
+ * maps redundant name to the de-duplicated name.
*/
bool simplifyPartialSchemaReqPaths(const ProjectionName& scanProjName,
const MultikeynessTrie& multikeynessTrie,
PartialSchemaRequirements& reqMap,
+ ProjectionRenames& projectionRenames,
const ConstFoldFn& constFold);
/**
@@ -272,9 +276,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
* The intersection:
* - is a predicate that matches iff both original predicates match.
* - has all the bindings from 'target' and 'source', but excluding
- * bindings that would be redundant (have the same key). Each
- * redundant binding gets an entry in 'projectionRenames', which maps
- * the redundant name to the de-duplicated name.
+ * bindings that would be redundant (have the same key).
*
* "Failure" means we are unable to represent the result as a PartialSchemaRequirements.
* This can happen when:
@@ -282,8 +284,7 @@ bool isSubsetOfPartialSchemaReq(const PartialSchemaRequirements& lhs,
* - 'source' reads from a projection bound by 'target'.
*/
bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
- const PartialSchemaRequirements& source,
- ProjectionRenames& projectionRenames);
+ const PartialSchemaRequirements& source);
/**
@@ -343,10 +344,7 @@ void lowerPartialSchemaRequirements(CEType scanGroupCE,
void sortResidualRequirements(ResidualRequirementsWithCE& residualReq);
-void applyProjectionRenames(
- ProjectionRenames projectionRenames,
- ABT& node,
- const std::function<void(const ABT& node)>& visitor = [](const ABT&) {});
+void applyProjectionRenames(ProjectionRenames projectionRenames, ABT& node);
void removeRedundantResidualPredicates(const ProjectionNameOrderPreservingSet& requiredProjections,
ResidualRequirements& residualReqs,