From 8d7a536c1e60452028501e7871bace126aee1d65 Mon Sep 17 00:00:00 2001 From: Svilen Mihaylov Date: Mon, 14 Mar 2022 22:51:27 +0000 Subject: SERVER-64414 Fix invalid reference tracker state during path fusion rewrite --- src/mongo/db/pipeline/abt/pipeline_test.cpp | 60 +++++++++++ src/mongo/db/query/optimizer/rewrites/path.cpp | 137 +++++++++++++++++++++++-- src/mongo/db/query/optimizer/rewrites/path.h | 2 + src/mongo/db/query/optimizer/syntax/expr.h | 8 ++ 4 files changed, 199 insertions(+), 8 deletions(-) diff --git a/src/mongo/db/pipeline/abt/pipeline_test.cpp b/src/mongo/db/pipeline/abt/pipeline_test.cpp index 36dc2235db8..332cb1221ba 100644 --- a/src/mongo/db/pipeline/abt/pipeline_test.cpp +++ b/src/mongo/db/pipeline/abt/pipeline_test.cpp @@ -2504,5 +2504,65 @@ TEST(ABTTranslate, CommonExpressionElimination) { rootNode); } +TEST(ABTTranslate, GroupByDependency) { + PrefixId prefixId; + Metadata metadata = {{{"test", {{}, {}}}}}; + + ABT translated = + translatePipeline(metadata, + "[{$group: {_id: {}, b: {$addToSet: '$a'}}}, {$project: " + "{_id: 0, b: {$size: '$b'}}}, {$project: {_id: 0, c: '$b'}}]", + "test", + prefixId); + + OptPhaseManager phaseManager({OptPhaseManager::OptPhase::ConstEvalPre, + OptPhaseManager::OptPhase::PathFuse, + OptPhaseManager::OptPhase::MemoSubstitutionPhase, + OptPhaseManager::OptPhase::MemoExplorationPhase, + OptPhaseManager::OptPhase::MemoImplementationPhase}, + prefixId, + metadata, + DebugInfo::kDefaultForTests); + + ABT optimized = translated; + ASSERT_TRUE(phaseManager.optimize(optimized)); + + // Demonstrate that "c" is set to the array size (not the array itself coming from the group). + ASSERT_EXPLAIN_V2( + "Root []\n" + "| | projections: \n" + "| | combinedProjection_1\n" + "| RefBlock: \n" + "| Variable [combinedProjection_1]\n" + "Evaluation []\n" + "| BindBlock:\n" + "| [combinedProjection_1]\n" + "| EvalPath []\n" + "| | Const [{}]\n" + "| PathComposeM []\n" + "| | PathField [c]\n" + "| | PathConstant []\n" + "| | FunctionCall [getArraySize]\n" + "| | Variable [b_agg_0]\n" + "| PathKeep []\n" + "GroupBy []\n" + "| | groupings: \n" + "| | RefBlock: \n" + "| | Variable [groupByProj_0]\n" + "| aggregations: \n" + "| [b_agg_0]\n" + "| FunctionCall [$addToSet]\n" + "| Variable [groupByInputProj_0]\n" + "Evaluation []\n" + "| BindBlock:\n" + "| [groupByProj_0]\n" + "| Const [{}]\n" + "PhysicalScan [{'a': groupByInputProj_0}, test]\n" + " BindBlock:\n" + " [groupByInputProj_0]\n" + " Source []\n", + optimized); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/query/optimizer/rewrites/path.cpp b/src/mongo/db/query/optimizer/rewrites/path.cpp index 1378dadb6ae..d9f240a9ad6 100644 --- a/src/mongo/db/query/optimizer/rewrites/path.cpp +++ b/src/mongo/db/query/optimizer/rewrites/path.cpp @@ -44,12 +44,13 @@ ABT::reference_type PathFusion::follow(ABT::reference_type n) { bool PathFusion::fuse(ABT& lhs, const ABT& rhs) { if (auto rhsComposeM = rhs.cast(); rhsComposeM != nullptr) { - if (_info[rhsComposeM->getPath1().cast()]._isConst) { - if (fuse(lhs, rhsComposeM->getPath1())) { + // Try to fuse first with right path, then left. + if (_info[rhsComposeM->getPath2().cast()]._isConst) { + if (fuse(lhs, rhsComposeM->getPath2())) { return true; } - if (_info[rhsComposeM->getPath2().cast()]._isConst && - fuse(lhs, rhsComposeM->getPath2())) { + if (_info[rhsComposeM->getPath1().cast()]._isConst && + fuse(lhs, rhsComposeM->getPath1())) { return true; } } @@ -110,14 +111,20 @@ bool PathFusion::fuse(ABT& lhs, const ABT& rhs) { } bool PathFusion::optimize(ABT& root) { - _changed = false; - algebra::transport(root, *this); + for (;;) { + _changed = false; + algebra::transport(root, *this); + + if (!_changed) { + break; + } - if (_changed) { _env.rebuild(root); + _redundant.clear(); + _info.clear(); } - return _changed; + return false; } void PathFusion::transport(ABT& n, const PathConstant& path, ABT& c) { @@ -152,6 +159,10 @@ void PathFusion::transport(ABT& n, const PathCompare& path, ABT& c) { } void PathFusion::transport(ABT& n, const PathGet& get, ABT& path) { + if (_changed) { + return; + } + // Get "a" Const -> Const if (auto constPath = path.cast(); constPath) { // Pull out the constant path @@ -200,6 +211,10 @@ void PathFusion::transport(ABT& n, const PathTraverse& path, ABT& inner) { } void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) { + if (_changed) { + return; + } + if (auto p1Const = p1.cast(); p1Const != nullptr) { switch (_kindCtx.back()) { case Kind::filter: @@ -279,7 +294,103 @@ void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) { } } +void PathFusion::tryFuseComposition(ABT& n, const ABT& input) { + // Check to see if our flattened composition consists of constant branches containing only + // Field and Keep elements. If we have duplicate Field branches then retain only the + // latest one. For example: + // (Field "a" ConstPath1) * (Field "b" ConstPath2) * Keep "a" -> Field "a" ConstPath1 + // (Field "a" ConstPath1) * (Field "a" ConstPath2) -> Field "a" ConstPath2 + // TODO: handle Drop elements. + + // Latest value per field. + opt::unordered_map fieldMap; + // Used to preserve the relative order in which fields are set on the result. + FieldPathType orderedFieldNames; + boost::optional> toKeep; + + Type inputType = Type::any; + if (auto constPtr = input.cast(); constPtr != nullptr && constPtr->isObject()) { + inputType = Type::object; + } + + bool updated = false; + for (const auto& branch : collectComposed(n)) { + auto info = _info.find(branch.cast()); + if (info == _info.cend()) { + return; + } + + if (auto fieldPtr = branch.cast()) { + if (!info->second._isConst) { + // Rewrite is valid only with constant paths. + return; + } + + // Overwrite field with the latest value. + if (fieldMap.insert_or_assign(fieldPtr->name(), branch).second) { + orderedFieldNames.push_back(fieldPtr->name()); + } else { + updated = true; + } + } else if (auto keepPtr = branch.cast()) { + for (auto it = fieldMap.begin(); it != fieldMap.cend();) { + if (keepPtr->getNames().count(it->first) == 0) { + // Field is not kept, erase. + fieldMap.erase(it++); + updated = true; + } else { + it++; + } + } + + auto newKeepSet = keepPtr->getNames(); + if (toKeep) { + for (auto it = newKeepSet.begin(); it != newKeepSet.end();) { + if (toKeep->count(*it) == 0) { + // Field was not previously kept. + newKeepSet.erase(it++); + updated = true; + } else { + it++; + } + } + } + toKeep = std::move(newKeepSet); + } else if (auto defaultPtr = branch.cast(); defaultPtr != nullptr && + defaultPtr->getDefault().is() && + defaultPtr->getDefault().cast()->isObject() && + inputType == Type::object) { + // Skip over PathDefault with an empty object since our input is already an object. + updated = true; + } else { + return; + }; + } + if (!updated) { + return; + } + + ABT result = make(); + if (toKeep) { + maybeComposePath(result, make(std::move(*toKeep))); + } + for (const auto& fieldName : orderedFieldNames) { + auto it = fieldMap.find(fieldName); + if (it != fieldMap.cend()) { + // We may have removed the field name by virtue of not keeping. + maybeComposePath(result, it->second); + } + } + + std::swap(n, result); + _changed = true; +} + void PathFusion::transport(ABT& n, const EvalPath& eval, ABT& path, ABT& input) { + if (_changed) { + return; + } + auto realInput = follow(input); // If we are evaluating const path then we can simply replace the whole expression with the // result. @@ -308,11 +419,18 @@ void PathFusion::transport(ABT& n, const EvalPath& eval, ABT& path, ABT& input) _changed = true; } + } else { + tryFuseComposition(path, input); } + _kindCtx.pop_back(); } void PathFusion::transport(ABT& n, const EvalFilter& eval, ABT& path, ABT& input) { + if (_changed) { + return; + } + auto realInput = follow(input); // If we are evaluating const path then we can simply replace the whole expression with the // result. @@ -340,7 +458,10 @@ void PathFusion::transport(ABT& n, const EvalFilter& eval, ABT& path, ABT& input _changed = true; } + } else { + tryFuseComposition(path, input); } + _kindCtx.pop_back(); } } // namespace mongo::optimizer diff --git a/src/mongo/db/query/optimizer/rewrites/path.h b/src/mongo/db/query/optimizer/rewrites/path.h index 6e53fe5a7b5..6fa95534360 100644 --- a/src/mongo/db/query/optimizer/rewrites/path.h +++ b/src/mongo/db/query/optimizer/rewrites/path.h @@ -83,6 +83,8 @@ private: } bool fuse(ABT& lhs, const ABT& rhs); + void tryFuseComposition(ABT& n, const ABT& input); + VariableEnvironment& _env; opt::unordered_map _info; opt::unordered_set _redundant; diff --git a/src/mongo/db/query/optimizer/syntax/expr.h b/src/mongo/db/query/optimizer/syntax/expr.h index 636cf75fda7..217fbac9e8e 100644 --- a/src/mongo/db/query/optimizer/syntax/expr.h +++ b/src/mongo/db/query/optimizer/syntax/expr.h @@ -86,6 +86,14 @@ public: return _tag == sbe::value::TypeTags::Nothing; } + bool isObject() const { + return _tag == sbe::value::TypeTags::Object; + } + + bool isArray() const { + return _tag == sbe::value::TypeTags::Array; + } + private: sbe::value::TypeTags _tag; sbe::value::Value _val; -- cgit v1.2.1