summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2022-03-14 22:51:27 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-14 23:27:58 +0000
commit8d7a536c1e60452028501e7871bace126aee1d65 (patch)
tree962cd61a6d3641d08ce592d0309d5e857a545b98
parentebef0c9e6507fae743189cef2f2517016b6873f2 (diff)
downloadmongo-8d7a536c1e60452028501e7871bace126aee1d65.tar.gz
SERVER-64414 Fix invalid reference tracker state during path fusion rewrite
-rw-r--r--src/mongo/db/pipeline/abt/pipeline_test.cpp60
-rw-r--r--src/mongo/db/query/optimizer/rewrites/path.cpp137
-rw-r--r--src/mongo/db/query/optimizer/rewrites/path.h2
-rw-r--r--src/mongo/db/query/optimizer/syntax/expr.h8
4 files changed, 199 insertions, 8 deletions
diff --git a/src/mongo/db/pipeline/abt/pipeline_test.cpp b/src/mongo/db/pipeline/abt/pipeline_test.cpp
index 36dc2235db8..332cb1221ba 100644
--- a/src/mongo/db/pipeline/abt/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/abt/pipeline_test.cpp
@@ -2504,5 +2504,65 @@ TEST(ABTTranslate, CommonExpressionElimination) {
rootNode);
}
+TEST(ABTTranslate, GroupByDependency) {
+ PrefixId prefixId;
+ Metadata metadata = {{{"test", {{}, {}}}}};
+
+ ABT translated =
+ translatePipeline(metadata,
+ "[{$group: {_id: {}, b: {$addToSet: '$a'}}}, {$project: "
+ "{_id: 0, b: {$size: '$b'}}}, {$project: {_id: 0, c: '$b'}}]",
+ "test",
+ prefixId);
+
+ OptPhaseManager phaseManager({OptPhaseManager::OptPhase::ConstEvalPre,
+ OptPhaseManager::OptPhase::PathFuse,
+ OptPhaseManager::OptPhase::MemoSubstitutionPhase,
+ OptPhaseManager::OptPhase::MemoExplorationPhase,
+ OptPhaseManager::OptPhase::MemoImplementationPhase},
+ prefixId,
+ metadata,
+ DebugInfo::kDefaultForTests);
+
+ ABT optimized = translated;
+ ASSERT_TRUE(phaseManager.optimize(optimized));
+
+ // Demonstrate that "c" is set to the array size (not the array itself coming from the group).
+ ASSERT_EXPLAIN_V2(
+ "Root []\n"
+ "| | projections: \n"
+ "| | combinedProjection_1\n"
+ "| RefBlock: \n"
+ "| Variable [combinedProjection_1]\n"
+ "Evaluation []\n"
+ "| BindBlock:\n"
+ "| [combinedProjection_1]\n"
+ "| EvalPath []\n"
+ "| | Const [{}]\n"
+ "| PathComposeM []\n"
+ "| | PathField [c]\n"
+ "| | PathConstant []\n"
+ "| | FunctionCall [getArraySize]\n"
+ "| | Variable [b_agg_0]\n"
+ "| PathKeep []\n"
+ "GroupBy []\n"
+ "| | groupings: \n"
+ "| | RefBlock: \n"
+ "| | Variable [groupByProj_0]\n"
+ "| aggregations: \n"
+ "| [b_agg_0]\n"
+ "| FunctionCall [$addToSet]\n"
+ "| Variable [groupByInputProj_0]\n"
+ "Evaluation []\n"
+ "| BindBlock:\n"
+ "| [groupByProj_0]\n"
+ "| Const [{}]\n"
+ "PhysicalScan [{'a': groupByInputProj_0}, test]\n"
+ " BindBlock:\n"
+ " [groupByInputProj_0]\n"
+ " Source []\n",
+ optimized);
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/query/optimizer/rewrites/path.cpp b/src/mongo/db/query/optimizer/rewrites/path.cpp
index 1378dadb6ae..d9f240a9ad6 100644
--- a/src/mongo/db/query/optimizer/rewrites/path.cpp
+++ b/src/mongo/db/query/optimizer/rewrites/path.cpp
@@ -44,12 +44,13 @@ ABT::reference_type PathFusion::follow(ABT::reference_type n) {
bool PathFusion::fuse(ABT& lhs, const ABT& rhs) {
if (auto rhsComposeM = rhs.cast<PathComposeM>(); rhsComposeM != nullptr) {
- if (_info[rhsComposeM->getPath1().cast<PathSyntaxSort>()]._isConst) {
- if (fuse(lhs, rhsComposeM->getPath1())) {
+ // Try to fuse first with right path, then left.
+ if (_info[rhsComposeM->getPath2().cast<PathSyntaxSort>()]._isConst) {
+ if (fuse(lhs, rhsComposeM->getPath2())) {
return true;
}
- if (_info[rhsComposeM->getPath2().cast<PathSyntaxSort>()]._isConst &&
- fuse(lhs, rhsComposeM->getPath2())) {
+ if (_info[rhsComposeM->getPath1().cast<PathSyntaxSort>()]._isConst &&
+ fuse(lhs, rhsComposeM->getPath1())) {
return true;
}
}
@@ -110,14 +111,20 @@ bool PathFusion::fuse(ABT& lhs, const ABT& rhs) {
}
bool PathFusion::optimize(ABT& root) {
- _changed = false;
- algebra::transport<true>(root, *this);
+ for (;;) {
+ _changed = false;
+ algebra::transport<true>(root, *this);
+
+ if (!_changed) {
+ break;
+ }
- if (_changed) {
_env.rebuild(root);
+ _redundant.clear();
+ _info.clear();
}
- return _changed;
+ return false;
}
void PathFusion::transport(ABT& n, const PathConstant& path, ABT& c) {
@@ -152,6 +159,10 @@ void PathFusion::transport(ABT& n, const PathCompare& path, ABT& c) {
}
void PathFusion::transport(ABT& n, const PathGet& get, ABT& path) {
+ if (_changed) {
+ return;
+ }
+
// Get "a" Const <c> -> Const <c>
if (auto constPath = path.cast<PathConstant>(); constPath) {
// Pull out the constant path
@@ -200,6 +211,10 @@ void PathFusion::transport(ABT& n, const PathTraverse& path, ABT& inner) {
}
void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) {
+ if (_changed) {
+ return;
+ }
+
if (auto p1Const = p1.cast<PathConstant>(); p1Const != nullptr) {
switch (_kindCtx.back()) {
case Kind::filter:
@@ -279,7 +294,103 @@ void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) {
}
}
+void PathFusion::tryFuseComposition(ABT& n, const ABT& input) {
+ // Check to see if our flattened composition consists of constant branches containing only
+ // Field and Keep elements. If we have duplicate Field branches then retain only the
+ // latest one. For example:
+ // (Field "a" ConstPath1) * (Field "b" ConstPath2) * Keep "a" -> Field "a" ConstPath1
+ // (Field "a" ConstPath1) * (Field "a" ConstPath2) -> Field "a" ConstPath2
+ // TODO: handle Drop elements.
+
+ // Latest value per field.
+ opt::unordered_map<FieldNameType, ABT> fieldMap;
+ // Used to preserve the relative order in which fields are set on the result.
+ FieldPathType orderedFieldNames;
+ boost::optional<opt::unordered_set<FieldNameType>> toKeep;
+
+ Type inputType = Type::any;
+ if (auto constPtr = input.cast<Constant>(); constPtr != nullptr && constPtr->isObject()) {
+ inputType = Type::object;
+ }
+
+ bool updated = false;
+ for (const auto& branch : collectComposed(n)) {
+ auto info = _info.find(branch.cast<PathSyntaxSort>());
+ if (info == _info.cend()) {
+ return;
+ }
+
+ if (auto fieldPtr = branch.cast<PathField>()) {
+ if (!info->second._isConst) {
+ // Rewrite is valid only with constant paths.
+ return;
+ }
+
+ // Overwrite field with the latest value.
+ if (fieldMap.insert_or_assign(fieldPtr->name(), branch).second) {
+ orderedFieldNames.push_back(fieldPtr->name());
+ } else {
+ updated = true;
+ }
+ } else if (auto keepPtr = branch.cast<PathKeep>()) {
+ for (auto it = fieldMap.begin(); it != fieldMap.cend();) {
+ if (keepPtr->getNames().count(it->first) == 0) {
+ // Field is not kept, erase.
+ fieldMap.erase(it++);
+ updated = true;
+ } else {
+ it++;
+ }
+ }
+
+ auto newKeepSet = keepPtr->getNames();
+ if (toKeep) {
+ for (auto it = newKeepSet.begin(); it != newKeepSet.end();) {
+ if (toKeep->count(*it) == 0) {
+ // Field was not previously kept.
+ newKeepSet.erase(it++);
+ updated = true;
+ } else {
+ it++;
+ }
+ }
+ }
+ toKeep = std::move(newKeepSet);
+ } else if (auto defaultPtr = branch.cast<PathDefault>(); defaultPtr != nullptr &&
+ defaultPtr->getDefault().is<Constant>() &&
+ defaultPtr->getDefault().cast<Constant>()->isObject() &&
+ inputType == Type::object) {
+ // Skip over PathDefault with an empty object since our input is already an object.
+ updated = true;
+ } else {
+ return;
+ };
+ }
+ if (!updated) {
+ return;
+ }
+
+ ABT result = make<PathIdentity>();
+ if (toKeep) {
+ maybeComposePath(result, make<PathKeep>(std::move(*toKeep)));
+ }
+ for (const auto& fieldName : orderedFieldNames) {
+ auto it = fieldMap.find(fieldName);
+ if (it != fieldMap.cend()) {
+ // We may have removed the field name by virtue of not keeping.
+ maybeComposePath(result, it->second);
+ }
+ }
+
+ std::swap(n, result);
+ _changed = true;
+}
+
void PathFusion::transport(ABT& n, const EvalPath& eval, ABT& path, ABT& input) {
+ if (_changed) {
+ return;
+ }
+
auto realInput = follow(input);
// If we are evaluating const path then we can simply replace the whole expression with the
// result.
@@ -308,11 +419,18 @@ void PathFusion::transport(ABT& n, const EvalPath& eval, ABT& path, ABT& input)
_changed = true;
}
+ } else {
+ tryFuseComposition(path, input);
}
+
_kindCtx.pop_back();
}
void PathFusion::transport(ABT& n, const EvalFilter& eval, ABT& path, ABT& input) {
+ if (_changed) {
+ return;
+ }
+
auto realInput = follow(input);
// If we are evaluating const path then we can simply replace the whole expression with the
// result.
@@ -340,7 +458,10 @@ void PathFusion::transport(ABT& n, const EvalFilter& eval, ABT& path, ABT& input
_changed = true;
}
+ } else {
+ tryFuseComposition(path, input);
}
+
_kindCtx.pop_back();
}
} // namespace mongo::optimizer
diff --git a/src/mongo/db/query/optimizer/rewrites/path.h b/src/mongo/db/query/optimizer/rewrites/path.h
index 6e53fe5a7b5..6fa95534360 100644
--- a/src/mongo/db/query/optimizer/rewrites/path.h
+++ b/src/mongo/db/query/optimizer/rewrites/path.h
@@ -83,6 +83,8 @@ private:
}
bool fuse(ABT& lhs, const ABT& rhs);
+ void tryFuseComposition(ABT& n, const ABT& input);
+
VariableEnvironment& _env;
opt::unordered_map<const PathSyntaxSort*, CollectedInfo> _info;
opt::unordered_set<const PathSyntaxSort*> _redundant;
diff --git a/src/mongo/db/query/optimizer/syntax/expr.h b/src/mongo/db/query/optimizer/syntax/expr.h
index 636cf75fda7..217fbac9e8e 100644
--- a/src/mongo/db/query/optimizer/syntax/expr.h
+++ b/src/mongo/db/query/optimizer/syntax/expr.h
@@ -86,6 +86,14 @@ public:
return _tag == sbe::value::TypeTags::Nothing;
}
+ bool isObject() const {
+ return _tag == sbe::value::TypeTags::Object;
+ }
+
+ bool isArray() const {
+ return _tag == sbe::value::TypeTags::Array;
+ }
+
private:
sbe::value::TypeTags _tag;
sbe::value::Value _val;