summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSvilen Mihaylov <svilen.mihaylov@mongodb.com>2022-09-01 18:13:11 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-01 20:13:33 +0000
commitd2979f3ee500ed4dfd5f67fe2c9412f44c9ceb62 (patch)
treee53e74f3cfddc74396f6373f4a493697e9ab4ab7
parent04ea05b120dfbecccb01cf1d1363f25d9164abaf (diff)
downloadmongo-d2979f3ee500ed4dfd5f67fe2c9412f44c9ceb62.tar.gz
SERVER-69361 [CQF] Extend path fusion to better handle field dependencies
-rw-r--r--jstests/cqf/project_expr_dependency.js26
-rw-r--r--src/mongo/db/query/optimizer/rewrites/path.cpp50
-rw-r--r--src/mongo/db/query/optimizer/rewrites/path.h9
-rw-r--r--src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp90
4 files changed, 159 insertions, 16 deletions
diff --git a/jstests/cqf/project_expr_dependency.js b/jstests/cqf/project_expr_dependency.js
new file mode 100644
index 00000000000..0c3d1510d1b
--- /dev/null
+++ b/jstests/cqf/project_expr_dependency.js
@@ -0,0 +1,26 @@
+(function() {
+"use strict";
+
+load("jstests/libs/optimizer_utils.js"); // For checkCascadesOptimizerEnabled.
+if (!checkCascadesOptimizerEnabled(db)) {
+ jsTestLog("Skipping test because the optimizer is not enabled");
+ return;
+}
+
+const t = db.cqf_project_expr_dependency;
+t.drop();
+
+for (let i = 0; i < 100; i++) {
+ assert.commandWorked(t.insert({a1: i, b1: i, c1: i}));
+}
+
+const res = t.explain("executionStats").aggregate([
+ {$addFields: {x: '$a1', y: '$b1', z: '$c1'}},
+ {$addFields: {a: {$add: ["$x", "$y"]}, b: {$add: ["$y", "$z"]}}},
+ {$project: {_id: 0, out: "$b"}}
+]);
+
+// Demonstrate we only need to read "b1" and "c1" from the collection.
+const scanNodeProjFieldMap = navigateToPlanPath(res, "child.child.fieldProjectionMap");
+assert.eq(["b1", "c1"], Object.keys(scanNodeProjFieldMap));
+}());
diff --git a/src/mongo/db/query/optimizer/rewrites/path.cpp b/src/mongo/db/query/optimizer/rewrites/path.cpp
index 2752b72c752..58c5b34a68b 100644
--- a/src/mongo/db/query/optimizer/rewrites/path.cpp
+++ b/src/mongo/db/query/optimizer/rewrites/path.cpp
@@ -44,13 +44,8 @@ ABT::reference_type PathFusion::follow(ABT::reference_type n) {
bool PathFusion::fuse(ABT& lhs, const ABT& rhs) {
if (auto rhsComposeM = rhs.cast<PathComposeM>(); rhsComposeM != nullptr) {
- // Try to fuse first with right path, then left.
- if (_info[rhsComposeM->getPath2().cast<PathSyntaxSort>()]._isConst) {
- if (fuse(lhs, rhsComposeM->getPath2())) {
- return true;
- }
- if (_info[rhsComposeM->getPath1().cast<PathSyntaxSort>()]._isConst &&
- fuse(lhs, rhsComposeM->getPath1())) {
+ for (const auto& branch : collectComposed(rhs)) {
+ if (_info[branch.cast<PathSyntaxSort>()]._isConst && fuse(lhs, branch)) {
return true;
}
}
@@ -307,7 +302,7 @@ void PathFusion::transport(ABT& n, const PathComposeM& path, ABT& p1, ABT& p2) {
}
}
-void PathFusion::tryFuseComposition(ABT& n, const ABT& input) {
+void PathFusion::tryFuseComposition(ABT& n, ABT& input) {
// Check to see if our flattened composition consists of constant branches containing only
// Field and Keep elements. If we have duplicate Field branches then retain only the
// latest one. For example:
@@ -327,6 +322,7 @@ void PathFusion::tryFuseComposition(ABT& n, const ABT& input) {
}
bool updated = false;
+ bool hasDefault = false;
for (const auto& branch : collectComposed(n)) {
auto info = _info.find(branch.cast<PathSyntaxSort>());
if (info == _info.cend()) {
@@ -369,19 +365,43 @@ void PathFusion::tryFuseComposition(ABT& n, const ABT& input) {
}
}
toKeep = std::move(newKeepSet);
- } else if (auto defaultPtr = branch.cast<PathDefault>(); defaultPtr != nullptr &&
- defaultPtr->getDefault().is<Constant>() &&
- defaultPtr->getDefault().cast<Constant>()->isObject() &&
- inputType == Type::object) {
- // Skip over PathDefault with an empty object since our input is already an object.
- updated = true;
+ } else if (auto defaultPtr = branch.cast<PathDefault>(); defaultPtr != nullptr) {
+ if (auto constPtr = defaultPtr->getDefault().cast<Constant>();
+ constPtr != nullptr && constPtr->isObject() && inputType == Type::object) {
+ // Skip over PathDefault with an empty object since our input is already an object.
+ updated = true;
+ } else {
+ // Skip over other PathDefaults but remember we had one.
+ hasDefault = true;
+ }
} else {
return;
- };
+ }
+ }
+
+ if (toKeep && input != Constant::emptyObject()) {
+ // Check if we assign to every field we keep. If so, drop dependence on input.
+ bool assignToAllKeptFields = true;
+ for (const auto& fieldName : *toKeep) {
+ if (fieldMap.count(fieldName) == 0) {
+ assignToAllKeptFields = false;
+ break;
+ }
+ }
+ if (assignToAllKeptFields) {
+ // Do not need the input, do not keep fields, and ignore defaults.
+ input = Constant::emptyObject();
+ toKeep = boost::none;
+ updated = true;
+ hasDefault = false;
+ }
}
if (!updated) {
return;
}
+ if (hasDefault) {
+ return;
+ }
ABT result = make<PathIdentity>();
if (toKeep) {
diff --git a/src/mongo/db/query/optimizer/rewrites/path.h b/src/mongo/db/query/optimizer/rewrites/path.h
index 6fa95534360..de1d4835351 100644
--- a/src/mongo/db/query/optimizer/rewrites/path.h
+++ b/src/mongo/db/query/optimizer/rewrites/path.h
@@ -32,6 +32,10 @@
#include "mongo/db/query/optimizer/reference_tracker.h"
namespace mongo::optimizer {
+
+/**
+ * Performs fusion rewrites over paths. Those attempt to simplify complex paths.
+ */
class PathFusion {
enum class Type { unknown, nothing, object, array, boolean, any };
enum class Kind { project, filter };
@@ -83,7 +87,10 @@ private:
}
bool fuse(ABT& lhs, const ABT& rhs);
- void tryFuseComposition(ABT& n, const ABT& input);
+ /**
+ * Attempt to eliminate a chain of Fields with constant children.
+ */
+ void tryFuseComposition(ABT& n, ABT& input);
VariableEnvironment& _env;
opt::unordered_map<const PathSyntaxSort*, CollectedInfo> _info;
diff --git a/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp b/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp
index 69e97412617..9a373a826f2 100644
--- a/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/rewrites/path_optimizer_test.cpp
@@ -564,6 +564,96 @@ TEST(Path, Fuse6) {
tree);
}
+TEST(Path, Fuse7) {
+ auto scanNode = make<ScanNode>("root", "test");
+
+ auto project1 = make<EvaluationNode>(
+ "px",
+ make<EvalPath>(make<PathGet>("x", make<PathIdentity>()), make<Variable>("root")),
+ std::move(scanNode));
+
+ auto project2 = make<EvaluationNode>(
+ "py",
+ make<EvalPath>(
+ make<PathComposeM>(
+ make<PathComposeM>(make<PathKeep>(PathKeep::NameSet{"a"}),
+ make<PathField>("a", make<PathConstant>(make<Variable>("px")))),
+ make<PathDefault>(Constant::emptyObject())),
+ make<Variable>("root")),
+ std::move(project1));
+
+ auto tree = make<RootNode>(properties::ProjectionRequirement{ProjectionNameVector{"py"}},
+ std::move(project2));
+
+ ASSERT_EXPLAIN(
+ "Root []\n"
+ " projections: \n"
+ " py\n"
+ " RefBlock: \n"
+ " Variable [py]\n"
+ " Evaluation []\n"
+ " BindBlock:\n"
+ " [py]\n"
+ " EvalPath []\n"
+ " PathComposeM []\n"
+ " PathComposeM []\n"
+ " PathKeep [a]\n"
+ " PathField [a]\n"
+ " PathConstant []\n"
+ " Variable [px]\n"
+ " PathDefault []\n"
+ " Const [{}]\n"
+ " Variable [root]\n"
+ " Evaluation []\n"
+ " BindBlock:\n"
+ " [px]\n"
+ " EvalPath []\n"
+ " PathGet [x]\n"
+ " PathIdentity []\n"
+ " Variable [root]\n"
+ " Scan [test]\n"
+ " BindBlock:\n"
+ " [root]\n"
+ " Source []\n",
+ tree);
+
+ auto env = VariableEnvironment::build(tree);
+ bool changed = false;
+ do {
+ changed = false;
+ if (PathFusion{env}.optimize(tree)) {
+ changed = true;
+ }
+ if (ConstEval{env}.optimize(tree)) {
+ changed = true;
+ }
+ } while (changed);
+
+ // Obtain "x" and directly assign at "a".
+ ASSERT_EXPLAIN(
+ "Root []\n"
+ " projections: \n"
+ " py\n"
+ " RefBlock: \n"
+ " Variable [py]\n"
+ " Evaluation []\n"
+ " BindBlock:\n"
+ " [py]\n"
+ " EvalPath []\n"
+ " PathField [a]\n"
+ " PathConstant []\n"
+ " EvalPath []\n"
+ " PathGet [x]\n"
+ " PathIdentity []\n"
+ " Variable [root]\n"
+ " Const [{}]\n"
+ " Scan [test]\n"
+ " BindBlock:\n"
+ " [root]\n"
+ " Source []\n",
+ tree);
+}
+
TEST(Path, Lower1) {
PrefixId prefixId;