diff options
-rw-r--r-- | jstests/cqf/project_expr_dependency.js | 26 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/rewrites/path.cpp | 50 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/rewrites/path.h | 9 | ||||
-rw-r--r-- | src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp | 90 |
4 files changed, 159 insertions, 16 deletions
diff --git a/jstests/cqf/project_expr_dependency.js b/jstests/cqf/project_expr_dependency.js new file mode 100644 index 00000000000..0c3d1510d1b --- /dev/null +++ b/jstests/cqf/project_expr_dependency.js @@ -0,0 +1,26 @@ +(function() { +"use strict"; + +load("jstests/libs/optimizer_utils.js"); // For checkCascadesOptimizerEnabled. +if (!checkCascadesOptimizerEnabled(db)) { + jsTestLog("Skipping test because the optimizer is not enabled"); + return; +} + +const t = db.cqf_project_expr_dependency; +t.drop(); + +for (let i = 0; i < 100; i++) { + assert.commandWorked(t.insert({a1: i, b1: i, c1: i})); +} + +const res = t.explain("executionStats").aggregate([ + {$addFields: {x: '$a1', y: '$b1', z: '$c1'}}, + {$addFields: {a: {$add: ["$x", "$y"]}, b: {$add: ["$y", "$z"]}}}, + {$project: {_id: 0, out: "$b"}} +]); + +// Demonstrate we only need to read "b1" and "c1" from the collection. +const scanNodeProjFieldMap = navigateToPlanPath(res, "child.child.fieldProjectionMap"); +assert.eq(["b1", "c1"], Object.keys(scanNodeProjFieldMap)); +}()); diff --git a/src/mongo/db/query/optimizer/rewrites/path.cpp b/src/mongo/db/query/optimizer/rewrites/path.cpp index 2752b72c752..58c5b34a68b 100644 --- a/src/mongo/db/query/optimizer/rewrites/path.cpp +++ b/src/mongo/db/query/optimizer/rewrites/path.cpp @@ -44,13 +44,8 @@ ABT::reference_type PathFusion::follow(ABT::reference_type n) { bool PathFusion::fuse(ABT& lhs, const ABT& rhs) { if (auto rhsComposeM = rhs.cast<PathComposeM>(); rhsComposeM != nullptr) { - // Try to fuse first with right path, then left. - if (_info[rhsComposeM->getPath2().cast<PathSyntaxSort>()]._isConst) { - if (fuse(lhs, rhsComposeM->getPath2())) { - return true; - } - if (_info[rhsComposeM->getPath1().cast<PathSyntaxSort>()]._isConst && - fuse(lhs, rhsComposeM->getPath1())) { + for (const auto& branch : collectComposed(rhs)) { + if (_info[branch.cast<PathSyntaxSort>()]._isConst && fuse(lhs, branch)) { return true; } } @@ -307,7 +302,7 @@ void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) { } } -void PathFusion::tryFuseComposition(ABT& n, const ABT& input) { +void PathFusion::tryFuseComposition(ABT& n, ABT& input) { // Check to see if our flattened composition consists of constant branches containing only // Field and Keep elements. If we have duplicate Field branches then retain only the // latest one. For example: @@ -327,6 +322,7 @@ void PathFusion::tryFuseComposition(ABT& n, const ABT& input) { } bool updated = false; + bool hasDefault = false; for (const auto& branch : collectComposed(n)) { auto info = _info.find(branch.cast<PathSyntaxSort>()); if (info == _info.cend()) { @@ -369,19 +365,43 @@ void PathFusion::tryFuseComposition(ABT& n, const ABT& input) { } } toKeep = std::move(newKeepSet); - } else if (auto defaultPtr = branch.cast<PathDefault>(); defaultPtr != nullptr && - defaultPtr->getDefault().is<Constant>() && - defaultPtr->getDefault().cast<Constant>()->isObject() && - inputType == Type::object) { - // Skip over PathDefault with an empty object since our input is already an object. - updated = true; + } else if (auto defaultPtr = branch.cast<PathDefault>(); defaultPtr != nullptr) { + if (auto constPtr = defaultPtr->getDefault().cast<Constant>(); + constPtr != nullptr && constPtr->isObject() && inputType == Type::object) { + // Skip over PathDefault with an empty object since our input is already an object. + updated = true; + } else { + // Skip over other PathDefaults but remember we had one. + hasDefault = true; + } } else { return; - }; + } + } + + if (toKeep && input != Constant::emptyObject()) { + // Check if we assign to every field we keep. If so, drop dependence on input. + bool assignToAllKeptFields = true; + for (const auto& fieldName : *toKeep) { + if (fieldMap.count(fieldName) == 0) { + assignToAllKeptFields = false; + break; + } + } + if (assignToAllKeptFields) { + // Do not need the input, do not keep fields, and ignore defaults. + input = Constant::emptyObject(); + toKeep = boost::none; + updated = true; + hasDefault = false; + } } if (!updated) { return; } + if (hasDefault) { + return; + } ABT result = make<PathIdentity>(); if (toKeep) { diff --git a/src/mongo/db/query/optimizer/rewrites/path.h b/src/mongo/db/query/optimizer/rewrites/path.h index 6fa95534360..de1d4835351 100644 --- a/src/mongo/db/query/optimizer/rewrites/path.h +++ b/src/mongo/db/query/optimizer/rewrites/path.h @@ -32,6 +32,10 @@ #include "mongo/db/query/optimizer/reference_tracker.h" namespace mongo::optimizer { + +/** + * Performs fusion rewrites over paths. Those attempt to simplify complex paths. + */ class PathFusion { enum class Type { unknown, nothing, object, array, boolean, any }; enum class Kind { project, filter }; @@ -83,7 +87,10 @@ private: } bool fuse(ABT& lhs, const ABT& rhs); - void tryFuseComposition(ABT& n, const ABT& input); + /** + * Attempt to eliminate a chain of Fields with constant children. + */ + void tryFuseComposition(ABT& n, ABT& input); VariableEnvironment& _env; opt::unordered_map<const PathSyntaxSort*, CollectedInfo> _info; diff --git a/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp b/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp index 69e97412617..9a373a826f2 100644 --- a/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp +++ b/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp @@ -564,6 +564,96 @@ TEST(Path, Fuse6) { tree); } +TEST(Path, Fuse7) { + auto scanNode = make<ScanNode>("root", "test"); + + auto project1 = make<EvaluationNode>( + "px", + make<EvalPath>(make<PathGet>("x", make<PathIdentity>()), make<Variable>("root")), + std::move(scanNode)); + + auto project2 = make<EvaluationNode>( + "py", + make<EvalPath>( + make<PathComposeM>( + make<PathComposeM>(make<PathKeep>(PathKeep::NameSet{"a"}), + make<PathField>("a", make<PathConstant>(make<Variable>("px")))), + make<PathDefault>(Constant::emptyObject())), + make<Variable>("root")), + std::move(project1)); + + auto tree = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"py"}}, + std::move(project2)); + + ASSERT_EXPLAIN( + "Root []\n" + " projections: \n" + " py\n" + " RefBlock: \n" + " Variable [py]\n" + " Evaluation []\n" + " BindBlock:\n" + " [py]\n" + " EvalPath []\n" + " PathComposeM []\n" + " PathComposeM []\n" + " PathKeep [a]\n" + " PathField [a]\n" + " PathConstant []\n" + " Variable [px]\n" + " PathDefault []\n" + " Const [{}]\n" + " Variable [root]\n" + " Evaluation []\n" + " BindBlock:\n" + " [px]\n" + " EvalPath []\n" + " PathGet [x]\n" + " PathIdentity []\n" + " Variable [root]\n" + " Scan [test]\n" + " BindBlock:\n" + " [root]\n" + " Source []\n", + tree); + + auto env = VariableEnvironment::build(tree); + bool changed = false; + do { + changed = false; + if (PathFusion{env}.optimize(tree)) { + changed = true; + } + if (ConstEval{env}.optimize(tree)) { + changed = true; + } + } while (changed); + + // Obtain "x" and directly assign at "a". + ASSERT_EXPLAIN( + "Root []\n" + " projections: \n" + " py\n" + " RefBlock: \n" + " Variable [py]\n" + " Evaluation []\n" + " BindBlock:\n" + " [py]\n" + " EvalPath []\n" + " PathField [a]\n" + " PathConstant []\n" + " EvalPath []\n" + " PathGet [x]\n" + " PathIdentity []\n" + " Variable [root]\n" + " Const [{}]\n" + " Scan [test]\n" + " BindBlock:\n" + " [root]\n" + " Source []\n", + tree); +} + TEST(Path, Lower1) { PrefixId prefixId; |