summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHana Pearlman <hana.pearlman@mongodb.com>2023-01-05 16:28:19 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-01-05 17:04:41 +0000
commita3657539f4340d38b8d77058f588140904d2f81e (patch)
treeea4c896d09d9f8e4d2a5f339cfd37b462b1f2329
parent8ce31326668592d9121897bd777d017dd24a6fee (diff)
downloadmongo-a3657539f4340d38b8d77058f588140904d2f81e.tar.gz
SERVER-66570: Ensure that pushed down projections computed from the metadata do not hide dependent subfields referenced in the same project
-rw-r--r--jstests/core/timeseries/timeseries_project.js11
-rw-r--r--src/mongo/db/exec/add_fields_projection_executor_test.cpp57
-rw-r--r--src/mongo/db/exec/inclusion_projection_executor.cpp105
-rw-r--r--src/mongo/db/exec/inclusion_projection_executor_test.cpp56
-rw-r--r--src/mongo/db/pipeline/dependencies.cpp28
-rw-r--r--src/mongo/db/pipeline/dependencies.h31
6 files changed, 249 insertions, 39 deletions
diff --git a/jstests/core/timeseries/timeseries_project.js b/jstests/core/timeseries/timeseries_project.js
index 44c42b03e1b..11ca90f2efe 100644
--- a/jstests/core/timeseries/timeseries_project.js
+++ b/jstests/core/timeseries/timeseries_project.js
@@ -97,6 +97,7 @@ const doc = {
time: new Date("2019-10-11T14:39:18.670Z"),
x: 5,
a: 3,
+ obj: {a: 3},
};
assert.commandWorked(tsColl.insert(doc));
assert.commandWorked(regColl.insert(doc));
@@ -107,12 +108,22 @@ let tsDoc = tsColl.aggregate(pipeline).toArray();
let regDoc = regColl.aggregate(pipeline).toArray();
assert.docEq(tsDoc, regDoc);
+pipeline = [{$project: {_id: 0, obj: "$x", b: {$add: ["$obj.a", 1]}}}];
+tsDoc = tsColl.aggregate(pipeline).toArray();
+regDoc = regColl.aggregate(pipeline).toArray();
+assert.docEq(tsDoc, regDoc);
+
// Test $addFields.
pipeline = [{$addFields: {a: "$x", b: "$a"}}, {$project: {_id: 0}}];
tsDoc = tsColl.aggregate(pipeline).toArray();
regDoc = regColl.aggregate(pipeline).toArray();
assert.docEq(tsDoc, regDoc);
+pipeline = [{$addFields: {obj: "$x", b: {$add: ["$obj.a", 1]}}}, {$project: {_id: 0}}];
+tsDoc = tsColl.aggregate(pipeline).toArray();
+regDoc = regColl.aggregate(pipeline).toArray();
+assert.docEq(tsDoc, regDoc);
+
pipeline = [{$project: {a: 1, _id: 0}}, {$project: {newMeta: "$x"}}];
tsDoc = tsColl.aggregate(pipeline).toArray();
regDoc = regColl.aggregate(pipeline).toArray();
diff --git a/src/mongo/db/exec/add_fields_projection_executor_test.cpp b/src/mongo/db/exec/add_fields_projection_executor_test.cpp
index 007e5cc1ffb..0870c53751a 100644
--- a/src/mongo/db/exec/add_fields_projection_executor_test.cpp
+++ b/src/mongo/db/exec/add_fields_projection_executor_test.cpp
@@ -685,5 +685,62 @@ TEST(AddFieldsProjectionExecutorExecutionTest, DoNotExtractComputedProjectionWit
addFields.serializeTransformation(boost::none));
}
+TEST(AddFieldsProjectionExecutorExecutionTest,
+ ExtractComputedProjectionShouldNotHideDependentSubFields) {
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ AddFieldsProjectionExecutor addFields(expCtx);
+ addFields.parse(BSON("obj"
+ << "$myMeta"
+ << "b" << BSON("$add" << BSON_ARRAY("$obj.a" << 1))));
+
+ const std::set<StringData> reservedNames{};
+ auto [extractedAddFields, deleteFlag] =
+ addFields.extractComputedProjections("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(extractedAddFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection =
+ Document(fromjson("{obj: '$myMeta', b: {$add: ['$obj.a', {$const: 1}]}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none));
+}
+
+TEST(AddFieldsProjectionExecutorExecutionTest,
+ ExtractComputedProjectionShouldNotHideDependentFieldsWithDottedSibling) {
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ AddFieldsProjectionExecutor addFields(expCtx);
+ addFields.parse(BSON("a"
+ << "$myMeta"
+ << "c.b"
+ << "$a.x"));
+
+ const std::set<StringData> reservedNames{};
+ auto [extractedAddFields, deleteFlag] =
+ addFields.extractComputedProjections("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(extractedAddFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{a: '$myMeta', c: {b: '$a.x'}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none));
+}
+
+TEST(AddFieldsProjectionExecutorExecutionTest, ExtractComputedProjectionShouldNotIncludeId) {
+ boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest());
+ AddFieldsProjectionExecutor addFields(expCtx);
+ addFields.parse(BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta"
+ << "$_id"))));
+
+ const std::set<StringData> reservedNames{};
+ auto [extractedAddFields, deleteFlag] =
+ addFields.extractComputedProjections("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(extractedAddFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{a: {$sum: ['$myMeta', '$_id']}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none));
+}
+
} // namespace
} // namespace mongo::projection_executor
diff --git a/src/mongo/db/exec/inclusion_projection_executor.cpp b/src/mongo/db/exec/inclusion_projection_executor.cpp
index d7384be8109..c2a759be998 100644
--- a/src/mongo/db/exec/inclusion_projection_executor.cpp
+++ b/src/mongo/db/exec/inclusion_projection_executor.cpp
@@ -99,6 +99,50 @@ boost::intrusive_ptr<Expression> substituteInExpr(boost::intrusive_ptr<Expressio
}
return ex;
};
+
+/**
+ * Returns a vector of top-level dependencies where each index i in the vector corresponds to the
+ * dependencies from the ith expression according to 'orderToProcess'.
+ */
+std::vector<OrderedPathSet> getTopLevelDeps(
+ const std::vector<std::string>& orderToProcess,
+ const StringMap<boost::intrusive_ptr<Expression>>& expressions,
+ const StringMap<std::unique_ptr<ProjectionNode>>& children) {
+ std::vector<OrderedPathSet> topLevelDeps;
+ for (const auto& field : orderToProcess) {
+ DepsTracker deps;
+ if (auto exprIt = expressions.find(field); exprIt != expressions.end()) {
+ exprIt->second->addDependencies(&deps);
+ } else {
+ // Each expression in orderToProcess should either be in expressions or children.
+ auto childIt = children.find(field);
+ tassert(6657000, "Unable to calculate dependencies", childIt != children.end());
+ childIt->second->reportDependencies(&deps);
+ }
+
+ OrderedPathSet ops{deps.fields.begin(), deps.fields.end()};
+ topLevelDeps.push_back(
+ DepsTracker::simplifyDependencies(ops, DepsTracker::TruncateToRootLevel::yes));
+ }
+ return topLevelDeps;
+}
+
+/**
+ * Returns whether or not there is an expression in the projection which depends on 'field' other
+ * than the expression which computes 'field'. For example, given field "a" and projection
+ * {a: "$b", c: {$sum: ["$a", 5]}}, return true. Given field "a" and projection
+ * {a: {$sum: ["$a", 5]}, c: "$b"}, return false. 'field' should be a top level path.
+ */
+bool computedExprDependsOnField(const std::vector<OrderedPathSet>& topLevelDeps,
+ const std::string& field,
+ const size_t fieldIndex) {
+ for (size_t i = 0; i < topLevelDeps.size(); i++) {
+ if (i != fieldIndex && topLevelDeps[i].count(field) > 0) {
+ return true;
+ }
+ }
+ return false;
+}
} // namespace
std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
@@ -109,8 +153,8 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
return {BSONObj{}, false};
}
- DepsTracker allDeps;
- reportDependencies(&allDeps);
+ std::vector<OrderedPathSet> topLevelDeps =
+ getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children);
// Auxiliary vector with extracted computed projections: <name, expression, replacement
// strategy>. If the replacement strategy flag is true, the expression is replaced with a
@@ -118,19 +162,15 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
std::vector<std::tuple<StringData, boost::intrusive_ptr<Expression>, bool>>
addFieldsExpressions;
bool replaceWithProjField = true;
- for (auto&& field : _orderToProcessAdditionsAndChildren) {
+ for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) {
+ auto&& field = _orderToProcessAdditionsAndChildren[i];
+
if (reservedNames.count(field) > 0) {
// Do not pushdown computed projection with reserved name.
replaceWithProjField = false;
continue;
}
- if (allDeps.fields.count(field) > 0) {
- // Do not extract a computed projection if its name is the same as a dependent field. If
- // the extracted $addFields were to be placed before this projection, the dependency
- // with the common name would be shadowed by the computed projection.
- replaceWithProjField = false;
- continue;
- }
+
auto expressionIt = _expressions.find(field);
if (expressionIt == _expressions.end()) {
// After seeing the first dotted path expression we need to replace computed
@@ -138,13 +178,17 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject(
replaceWithProjField = false;
continue;
}
- DepsTracker deps;
- expressionIt->second->addDependencies(&deps);
- auto topLevelFieldNames =
- deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes)
- .getFieldNames<std::set<std::string>>();
- topLevelFieldNames.erase("_id");
+ // Do not extract a computed projection if it is computing a value that other fields in the
+ // same projection depend on. If the extracted $addFields were to be placed before this
+ // projection, the dependency with the common name would be shadowed by the computed
+ // projection.
+ if (computedExprDependsOnField(topLevelDeps, field, i)) {
+ replaceWithProjField = false;
+ continue;
+ }
+
+ const auto& topLevelFieldNames = topLevelDeps[i];
if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) {
// Substitute newName for oldName in the expression.
StringMap<std::string> renames;
@@ -197,35 +241,34 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInAddFields(
return {BSONObj{}, false};
}
- DepsTracker allDeps;
- reportDependencies(&allDeps);
+ std::vector<OrderedPathSet> topLevelDeps =
+ getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children);
// Auxiliary vector with extracted computed projections: <name, expression>.
// To preserve the original fields order, only projections at the beginning of the
// _orderToProcessAdditionsAndChildren list can be extracted for pushdown.
std::vector<std::pair<StringData, boost::intrusive_ptr<Expression>>> addFieldsExpressions;
- for (auto&& field : _orderToProcessAdditionsAndChildren) {
+ for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) {
+ auto&& field = _orderToProcessAdditionsAndChildren[i];
// Do not extract for pushdown computed projection with reserved name.
if (reservedNames.count(field) > 0) {
break;
}
- if (allDeps.fields.count(field) > 0) {
- // Do not extract a computed projection if its name is the same as a dependent field. If
- // the extracted $addFields were to be placed before this $addFields, the dependency
- // with the common name would be shadowed by the computed projection.
- break;
- }
+
auto expressionIt = _expressions.find(field);
if (expressionIt == _expressions.end()) {
break;
}
- DepsTracker deps;
- expressionIt->second->addDependencies(&deps);
- auto topLevelFieldNames =
- deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes)
- .getFieldNames<std::set<std::string>>();
- topLevelFieldNames.erase("_id");
+ // Do not extract a computed projection if it is computing a value that other fields in the
+ // same projection depend on. If the extracted $addFields were to be placed before this
+ // projection, the dependency with the common name would be shadowed by the computed
+ // projection.
+ if (computedExprDependsOnField(topLevelDeps, field, i)) {
+ break;
+ }
+
+ auto& topLevelFieldNames = topLevelDeps[i];
if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) {
// Substitute newName for oldName in the expression.
StringMap<std::string> renames;
diff --git a/src/mongo/db/exec/inclusion_projection_executor_test.cpp b/src/mongo/db/exec/inclusion_projection_executor_test.cpp
index 84ed31a9bb4..53ebf3af4a3 100644
--- a/src/mongo/db/exec/inclusion_projection_executor_test.cpp
+++ b/src/mongo/db/exec/inclusion_projection_executor_test.cpp
@@ -1090,6 +1090,62 @@ TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
}
+TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
+ ExtractComputedProjectionInProjectShouldNotIncludeId) {
+ auto inclusion = makeInclusionProjectionWithDefaultPolicies(
+ BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta"
+ << "$_id"))));
+
+ auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot();
+ const std::set<StringData> reservedNames{};
+ auto [addFields, deleteFlag] =
+ r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(addFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{_id: true, a: {$sum: ['$myMeta', '$_id']}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
+}
+
+TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
+ ExtractComputedProjectionInProjectShouldNotHideDependentSubFields) {
+ auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a"
+ << "$myMeta"
+ << "b"
+ << "$a.x"));
+
+ auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot();
+ const std::set<StringData> reservedNames{};
+ auto [addFields, deleteFlag] =
+ r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(addFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', b: '$a.x'}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
+}
+
+TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault,
+ ExtractComputedProjectionInProjectShouldNotHideDependentSubFieldsWithDottedSibling) {
+ auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a"
+ << "$myMeta"
+ << "c.b"
+ << "$a.x"));
+
+ auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot();
+ const std::set<StringData> reservedNames{};
+ auto [addFields, deleteFlag] =
+ r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames);
+
+ ASSERT_EQ(addFields.nFields(), 0);
+ ASSERT_EQ(deleteFlag, false);
+
+ auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', c: {b: '$a.x'}}"));
+ ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none));
+}
+
TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ApplyProjectionAfterSplit) {
auto inclusion = makeInclusionProjectionWithDefaultPolicies(
BSON("a" << true << "computedMeta1"
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index d2a5563c7c7..479c1876471 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -37,6 +37,34 @@
namespace mongo {
+OrderedPathSet DepsTracker::simplifyDependencies(OrderedPathSet dependencies,
+ TruncateToRootLevel truncateToRootLevel) {
+ // The key operation here is folding dependencies into ancestor dependencies, wherever possible.
+ // This is assisted by a special sort in OrderedPathSet that treats '.'
+ // as the first char and thus places parent paths directly before their children.
+ OrderedPathSet returnSet;
+ std::string last;
+ for (const auto& path : dependencies) {
+ if (!last.empty() && str::startsWith(path, last)) {
+ // We are including a parent of this field, so we can skip this field.
+ continue;
+ }
+
+ // Check that the field requested is a valid field name in the agg language. This
+ // constructor will throw if it isn't.
+ FieldPath fp(path);
+
+ if (truncateToRootLevel == TruncateToRootLevel::yes) {
+ last = fp.front().toString() + '.';
+ returnSet.insert(fp.front().toString());
+ } else {
+ last = path + '.';
+ returnSet.insert(path);
+ }
+ }
+ return returnSet;
+}
+
std::list<std::string> DepsTracker::sortedFields() const {
// Use a special comparator to put parent fieldpaths before their children.
std::list<std::string> sortedFields(fields.begin(), fields.end());
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index 3c892de8181..94f4489b71f 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -38,6 +38,19 @@
namespace mongo {
+/** Custom comparator that orders fieldpath strings by path prefix first, then by field.
+ * This ensures that a parent field is ordered directly before its children.
+ */
+struct PathPrefixComparator {
+ /* Returns true if the lhs value should sort before the rhs, false otherwise. */
+ bool operator()(const std::string& lhs, const std::string& rhs) const;
+};
+/**
+ * Set of field paths strings. When iterated over, a parent path is seen directly before its
+ * children (or descendants, more generally). Eg., "a", "a.a", "a.b", "a-plus", "b".
+ */
+typedef std::set<std::string, PathPrefixComparator> OrderedPathSet;
+
/**
* This struct allows components in an agg pipeline to report what they need from their input.
*/
@@ -104,6 +117,16 @@ struct DepsTracker {
enum class TruncateToRootLevel : bool { no, yes };
/**
+ * Return the set of dependencies with descendant paths removed.
+ * For example ["a.b", "a.b.f", "c"] --> ["a.b", "c"].
+ *
+ * TruncateToRootLevel::yes requires all dependencies to be top-level.
+ * The example above would return ["a", "c"]
+ */
+ static OrderedPathSet simplifyDependencies(OrderedPathSet dependencies,
+ TruncateToRootLevel truncation);
+
+ /**
* Returns a projection object covering the non-metadata dependencies tracked by this class, or
* empty BSONObj if the entire document is required. By default, the resulting project will
* include the full, dotted field names of the dependencies. If 'truncationBehavior' is set to
@@ -207,12 +230,4 @@ private:
QueryMetadataBitSet _metadataDeps;
};
-
-/** Custom comparator that orders fieldpath strings by path prefix first, then by field.
- * This ensures that a parent field is ordered directly before its children.
- */
-struct PathPrefixComparator {
- /* Returns true if the lhs value should sort before the rhs, false otherwise. */
- bool operator()(const std::string& lhs, const std::string& rhs) const;
-};
} // namespace mongo