summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHana Pearlman <hana.pearlman@mongodb.com>2022-11-14 21:23:22 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-14 22:23:42 +0000
commit0ec7e6199432904901a13bf82856074b1e4f4ab4 (patch)
tree20b679144fd779f3d6e376b74b5e7e33b7077cce
parent607e613725fffbab18c372627d74e8607077ee82 (diff)
downloadmongo-0ec7e6199432904901a13bf82856074b1e4f4ab4.tar.gz
SERVER-66570: Ensure that pushed down projections computed from the metadata do not hide dependent subfields referenced in the same project
-rw-r--r--jstests/core/timeseries/timeseries_project.js11
-rw-r--r--src/mongo/db/exec/add_fields_projection_executor_test.cpp57
-rw-r--r--src/mongo/db/exec/inclusion_projection_executor.cpp104
-rw-r--r--src/mongo/db/exec/inclusion_projection_executor_test.cpp56
4 files changed, 197 insertions, 31 deletions
diff --git a/jstests/core/timeseries/timeseries_project.js b/jstests/core/timeseries/timeseries_project.js
index b5d615ebde5..1352501d221 100644
--- a/jstests/core/timeseries/timeseries_project.js
+++ b/jstests/core/timeseries/timeseries_project.js
@@ -97,6 +97,7 @@ const doc = {
time: new Date("2019-10-11T14:39:18.670Z"),
x: 5,
a: 3,
+ obj: {a: 3},
};
assert.commandWorked(tsColl.insert(doc));
assert.commandWorked(regColl.insert(doc));
@@ -107,12 +108,22 @@ let tsDoc = tsColl.aggregate(pipeline).toArray();
let regDoc = regColl.aggregate(pipeline).toArray();
assert.docEq(tsDoc, regDoc);
+pipeline = [{$project: {_id: 0, obj: "$x", b: {$add: ["$obj.a", 1]}}}];
+tsDoc = tsColl.aggregate(pipeline).toArray();
+regDoc = regColl.aggregate(pipeline).toArray();
+assert.docEq(tsDoc, regDoc);
+
// Test $addFields.
pipeline = [{$addFields: {a: "$x", b: "$a"}}, {$project: {_id: 0}}];
tsDoc = tsColl.aggregate(pipeline).toArray();
regDoc = regColl.aggregate(pipeline).toArray();
assert.docEq(tsDoc, regDoc);
+pipeline = [{$addFields: {obj: "$x", b: {$add: ["$obj.a", 1]}}}, {$project: {_id: 0}}];
+tsDoc = tsColl.aggregate(pipeline).toArray();
+regDoc = regColl.aggregate(pipeline).toArray();
+assert.docEq(tsDoc, regDoc);
+
pipeline = [{$project: {a: 1, _id: 0}}, {$project: {newMeta: "$x"}}];
tsDoc = tsColl.aggregate(pipeline).toArray();
regDoc = regColl.aggregate(pipeline).toArray();
diff --git a/src/mongo/db/exec/add_fields_projection_executor_test.cpp b/src/mongo/db/exec/add_fields_projection_executor_test.cpp
index 0c5fd22c5f6..7230c6495c8 100644
--- a/src/mongo/db/exec/add_fields_projection_executor_test.cpp
+++ b/src/mongo/db/exec/add_fields_projection_executor_test.cpp
@@ -681,5 +681,62 @@ TEST(AddFieldsProjectionExecutorExecutionTest, DoNotExtractComputedProjectionWit
addFields.serializeTransformation(boost::none));
}
+TEST(AddFieldsProjectionExecutorExecutionTest,
+ ExtractComputedProjectionShouldNotHideDependentSubFields) {
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ AddFieldsProjectionExecutor addFields(expCtx);
+ addFields.parse(BSON("obj"
+ << "$myMeta"
+ << "b" << BSON("$add" << BSON_ARRAY("$obj.a" << 1))));
+
+ const std::set<StringData> reservedNames{};
+ auto [extractedAddFields, deleteFlag] =
+ addFields.extractComputedProjections("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(extractedAddFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection =
+ Document(fromjson("{obj: '$myMeta', b: {$add: ['$obj.a', {$const: 1}]}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none));
+}
+
+TEST(AddFieldsProjectionExecutorExecutionTest,
+ ExtractComputedProjectionShouldNotHideDependentFieldsWithDottedSibling) {
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ AddFieldsProjectionExecutor addFields(expCtx);
+ addFields.parse(BSON("a"
+ << "$myMeta"
+ << "c.b"
+ << "$a.x"));
+
+ const std::set<StringData> reservedNames{};
+ auto [extractedAddFields, deleteFlag] =
+ addFields.extractComputedProjections("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(extractedAddFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{a: '$myMeta', c: {b: '$a.x'}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none));
+}
+
+TEST(AddFieldsProjectionExecutorExecutionTest, ExtractComputedProjectionShouldNotIncludeId) {
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ AddFieldsProjectionExecutor addFields(expCtx);
+ addFields.parse(BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta"
+ << "$_id"))));
+
+ const std::set<StringData> reservedNames{};
+ auto [extractedAddFields, deleteFlag] =
+ addFields.extractComputedProjections("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(extractedAddFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{a: {$sum: ['$myMeta', '$_id']}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none));
+}
+
} // namespace
} // namespace mongo::projection_executor
diff --git a/src/mongo/db/exec/inclusion_projection_executor.cpp b/src/mongo/db/exec/inclusion_projection_executor.cpp
index 30e06a76d82..594de7bcb26 100644
--- a/src/mongo/db/exec/inclusion_projection_executor.cpp
+++ b/src/mongo/db/exec/inclusion_projection_executor.cpp
@@ -100,6 +100,49 @@ boost::intrusive_ptr<Expression> substituteInExpr(boost::intrusive_ptr<Expressio
}
return ex;
};
+
+/**
+ * Returns a vector of top-level dependencies where each index i in the vector corresponds to the
+ * dependencies from the ith expression according to 'orderToProcess'.
+ */
+std::vector<OrderedPathSet> getTopLevelDeps(
+ const std::vector<std::string>& orderToProcess,
+ const StringMap<boost::intrusive_ptr<Expression>>& expressions,
+ const StringMap<std::unique_ptr<ProjectionNode>>& children) {
+ std::vector<OrderedPathSet> topLevelDeps;
+ for (const auto& field : orderToProcess) {
+ DepsTracker deps;
+ if (auto exprIt = expressions.find(field); exprIt != expressions.end()) {
+ expression::addDependencies(exprIt->second.get(), &deps);
+ } else {
+ // Each expression in orderToProcess should either be in expressions or children.
+ auto childIt = children.find(field);
+ tassert(6657000, "Unable to calculate dependencies", childIt != children.end());
+ childIt->second->reportDependencies(&deps);
+ }
+
+ topLevelDeps.push_back(
+ DepsTracker::simplifyDependencies(deps.fields, DepsTracker::TruncateToRootLevel::yes));
+ }
+ return topLevelDeps;
+}
+
+/**
+ * Returns whether or not there is an expression in the projection which depends on 'field' other
+ * than the expression which computes 'field'. For example, given field "a" and projection
+ * {a: "$b", c: {$sum: ["$a", 5]}}, return true. Given field "a" and projection
+ * {a: {$sum: ["$a", 5]}, c: "$b"}, return false. 'field' should be a top level path.
+ */
+bool computedExprDependsOnField(const std::vector<OrderedPathSet>& topLevelDeps,
+ const std::string& field,
+ const size_t fieldIndex) {
+ for (size_t i = 0; i < topLevelDeps.size(); i++) {
+ if (i != fieldIndex && topLevelDeps[i].count(field) > 0) {
+ return true;
+ }
+ }
+ return false;
+}
} // namespace
std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
@@ -110,8 +153,8 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
return {BSONObj{}, false};
}
- DepsTracker allDeps;
- reportDependencies(&allDeps);
+ std::vector<OrderedPathSet> topLevelDeps =
+ getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children);
// Auxiliary vector with extracted computed projections: <name, expression, replacement
// strategy>. If the replacement strategy flag is true, the expression is replaced with a
@@ -119,19 +162,15 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
std::vector<std::tuple<StringData, boost::intrusive_ptr<Expression>, bool>>
addFieldsExpressions;
bool replaceWithProjField = true;
- for (auto&& field : _orderToProcessAdditionsAndChildren) {
+ for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) {
+ auto&& field = _orderToProcessAdditionsAndChildren[i];
+
if (reservedNames.count(field) > 0) {
// Do not pushdown computed projection with reserved name.
replaceWithProjField = false;
continue;
}
- if (allDeps.fields.count(field) > 0) {
- // Do not extract a computed projection if its name is the same as a dependent field. If
- // the extracted $addFields were to be placed before this projection, the dependency
- // with the common name would be shadowed by the computed projection.
- replaceWithProjField = false;
- continue;
- }
+
auto expressionIt = _expressions.find(field);
if (expressionIt == _expressions.end()) {
// After seeing the first dotted path expression we need to replace computed
@@ -139,13 +178,17 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
replaceWithProjField = false;
continue;
}
- DepsTracker deps;
- expression::addDependencies(expressionIt->second.get(), &deps);
- auto topLevelFieldNames =
- deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes)
- .getFieldNames<std::set<std::string>>();
- topLevelFieldNames.erase("_id");
+ // Do not extract a computed projection if it is computing a value that other fields in the
+ // same projection depend on. If the extracted $addFields were to be placed before this
+ // projection, the dependency with the common name would be shadowed by the computed
+ // projection.
+ if (computedExprDependsOnField(topLevelDeps, field, i)) {
+ replaceWithProjField = false;
+ continue;
+ }
+
+ const auto& topLevelFieldNames = topLevelDeps[i];
if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) {
// Substitute newName for oldName in the expression.
StringMap<std::string> renames;
@@ -199,35 +242,34 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInAddFields(
return {BSONObj{}, false};
}
- DepsTracker allDeps;
- reportDependencies(&allDeps);
+ std::vector<OrderedPathSet> topLevelDeps =
+ getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children);
// Auxiliary vector with extracted computed projections: <name, expression>.
// To preserve the original fields order, only projections at the beginning of the
// _orderToProcessAdditionsAndChildren list can be extracted for pushdown.
std::vector<std::pair<StringData, boost::intrusive_ptr<Expression>>> addFieldsExpressions;
- for (auto&& field : _orderToProcessAdditionsAndChildren) {
+ for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) {
+ auto&& field = _orderToProcessAdditionsAndChildren[i];
// Do not extract for pushdown computed projection with reserved name.
if (reservedNames.count(field) > 0) {
break;
}
- if (allDeps.fields.count(field) > 0) {
- // Do not extract a computed projection if its name is the same as a dependent field. If
- // the extracted $addFields were to be placed before this $addFields, the dependency
- // with the common name would be shadowed by the computed projection.
- break;
- }
+
auto expressionIt = _expressions.find(field);
if (expressionIt == _expressions.end()) {
break;
}
- DepsTracker deps;
- expression::addDependencies(expressionIt->second.get(), &deps);
- auto topLevelFieldNames =
- deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes)
- .getFieldNames<std::set<std::string>>();
- topLevelFieldNames.erase("_id");
+ // Do not extract a computed projection if it is computing a value that other fields in the
+ // same projection depend on. If the extracted $addFields were to be placed before this
+ // projection, the dependency with the common name would be shadowed by the computed
+ // projection.
+ if (computedExprDependsOnField(topLevelDeps, field, i)) {
+ break;
+ }
+
+ auto& topLevelFieldNames = topLevelDeps[i];
if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) {
// Substitute newName for oldName in the expression.
StringMap<std::string> renames;
diff --git a/src/mongo/db/exec/inclusion_projection_executor_test.cpp b/src/mongo/db/exec/inclusion_projection_executor_test.cpp
index efd20cda382..356cced5d60 100644
--- a/src/mongo/db/exec/inclusion_projection_executor_test.cpp
+++ b/src/mongo/db/exec/inclusion_projection_executor_test.cpp
@@ -1092,6 +1092,62 @@ TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
}
+TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
+ ExtractComputedProjectionInProjectShouldNotIncludeId) {
+ auto inclusion = makeInclusionProjectionWithDefaultPolicies(
+ BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta"
+ << "$_id"))));
+
+ auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot();
+ const std::set<StringData> reservedNames{};
+ auto [addFields, deleteFlag] =
+ r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(addFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{_id: true, a: {$sum: ['$myMeta', '$_id']}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
+}
+
+TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
+ ExtractComputedProjectionInProjectShouldNotHideDependentSubFields) {
+ auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a"
+ << "$myMeta"
+ << "b"
+ << "$a.x"));
+
+ auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot();
+ const std::set<StringData> reservedNames{};
+ auto [addFields, deleteFlag] =
+ r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(addFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', b: '$a.x'}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
+}
+
+TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
+ ExtractComputedProjectionInProjectShouldNotHideDependentSubFieldsWithDottedSibling) {
+ auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a"
+ << "$myMeta"
+ << "c.b"
+ << "$a.x"));
+
+ auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot();
+ const std::set<StringData> reservedNames{};
+ auto [addFields, deleteFlag] =
+ r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(addFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', c: {b: '$a.x'}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
+}
+
TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ApplyProjectionAfterSplit) {
auto inclusion = makeInclusionProjectionWithDefaultPolicies(
BSON("a" << true << "computedMeta1"