diff options
author | Hana Pearlman <hana.pearlman@mongodb.com> | 2023-01-05 16:28:19 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-01-05 17:04:41 +0000 |
commit | a3657539f4340d38b8d77058f588140904d2f81e (patch) | |
tree | ea4c896d09d9f8e4d2a5f339cfd37b462b1f2329 | |
parent | 8ce31326668592d9121897bd777d017dd24a6fee (diff) | |
download | mongo-a3657539f4340d38b8d77058f588140904d2f81e.tar.gz |
SERVER-66570: Ensure that pushed down projections computed from the metadata do not hide dependent subfields referenced in the same project
-rw-r--r-- | jstests/core/timeseries/timeseries_project.js | 11 | ||||
-rw-r--r-- | src/mongo/db/exec/add_fields_projection_executor_test.cpp | 57 | ||||
-rw-r--r-- | src/mongo/db/exec/inclusion_projection_executor.cpp | 105 | ||||
-rw-r--r-- | src/mongo/db/exec/inclusion_projection_executor_test.cpp | 56 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies.cpp | 28 | ||||
-rw-r--r-- | src/mongo/db/pipeline/dependencies.h | 31 |
6 files changed, 249 insertions, 39 deletions
diff --git a/jstests/core/timeseries/timeseries_project.js b/jstests/core/timeseries/timeseries_project.js index 44c42b03e1b..11ca90f2efe 100644 --- a/jstests/core/timeseries/timeseries_project.js +++ b/jstests/core/timeseries/timeseries_project.js @@ -97,6 +97,7 @@ const doc = { time: new Date("2019-10-11T14:39:18.670Z"), x: 5, a: 3, + obj: {a: 3}, }; assert.commandWorked(tsColl.insert(doc)); assert.commandWorked(regColl.insert(doc)); @@ -107,12 +108,22 @@ let tsDoc = tsColl.aggregate(pipeline).toArray(); let regDoc = regColl.aggregate(pipeline).toArray(); assert.docEq(tsDoc, regDoc); +pipeline = [{$project: {_id: 0, obj: "$x", b: {$add: ["$obj.a", 1]}}}]; +tsDoc = tsColl.aggregate(pipeline).toArray(); +regDoc = regColl.aggregate(pipeline).toArray(); +assert.docEq(tsDoc, regDoc); + // Test $addFields. pipeline = [{$addFields: {a: "$x", b: "$a"}}, {$project: {_id: 0}}]; tsDoc = tsColl.aggregate(pipeline).toArray(); regDoc = regColl.aggregate(pipeline).toArray(); assert.docEq(tsDoc, regDoc); +pipeline = [{$addFields: {obj: "$x", b: {$add: ["$obj.a", 1]}}}, {$project: {_id: 0}}]; +tsDoc = tsColl.aggregate(pipeline).toArray(); +regDoc = regColl.aggregate(pipeline).toArray(); +assert.docEq(tsDoc, regDoc); + pipeline = [{$project: {a: 1, _id: 0}}, {$project: {newMeta: "$x"}}]; tsDoc = tsColl.aggregate(pipeline).toArray(); regDoc = regColl.aggregate(pipeline).toArray(); diff --git a/src/mongo/db/exec/add_fields_projection_executor_test.cpp b/src/mongo/db/exec/add_fields_projection_executor_test.cpp index 007e5cc1ffb..0870c53751a 100644 --- a/src/mongo/db/exec/add_fields_projection_executor_test.cpp +++ b/src/mongo/db/exec/add_fields_projection_executor_test.cpp @@ -685,5 +685,62 @@ TEST(AddFieldsProjectionExecutorExecutionTest, DoNotExtractComputedProjectionWit addFields.serializeTransformation(boost::none)); } +TEST(AddFieldsProjectionExecutorExecutionTest, + ExtractComputedProjectionShouldNotHideDependentSubFields) { + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + AddFieldsProjectionExecutor addFields(expCtx); + addFields.parse(BSON("obj" + << "$myMeta" + << "b" << BSON("$add" << BSON_ARRAY("$obj.a" << 1)))); + + const std::set<StringData> reservedNames{}; + auto [extractedAddFields, deleteFlag] = + addFields.extractComputedProjections("myMeta", "meta", reservedNames); + + ASSERT_EQ(extractedAddFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = + Document(fromjson("{obj: '$myMeta', b: {$add: ['$obj.a', {$const: 1}]}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none)); +} + +TEST(AddFieldsProjectionExecutorExecutionTest, + ExtractComputedProjectionShouldNotHideDependentFieldsWithDottedSibling) { + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + AddFieldsProjectionExecutor addFields(expCtx); + addFields.parse(BSON("a" + << "$myMeta" + << "c.b" + << "$a.x")); + + const std::set<StringData> reservedNames{}; + auto [extractedAddFields, deleteFlag] = + addFields.extractComputedProjections("myMeta", "meta", reservedNames); + + ASSERT_EQ(extractedAddFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{a: '$myMeta', c: {b: '$a.x'}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none)); +} + +TEST(AddFieldsProjectionExecutorExecutionTest, ExtractComputedProjectionShouldNotIncludeId) { + boost::intrusive_ptr<ExpressionContextForTest> expCtx(new ExpressionContextForTest()); + AddFieldsProjectionExecutor addFields(expCtx); + addFields.parse(BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta" + << "$_id")))); + + const std::set<StringData> reservedNames{}; + auto [extractedAddFields, deleteFlag] = + addFields.extractComputedProjections("myMeta", "meta", reservedNames); + + ASSERT_EQ(extractedAddFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{a: {$sum: ['$myMeta', '$_id']}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none)); +} + } // namespace } // namespace mongo::projection_executor diff --git a/src/mongo/db/exec/inclusion_projection_executor.cpp b/src/mongo/db/exec/inclusion_projection_executor.cpp index d7384be8109..c2a759be998 100644 --- a/src/mongo/db/exec/inclusion_projection_executor.cpp +++ b/src/mongo/db/exec/inclusion_projection_executor.cpp @@ -99,6 +99,50 @@ boost::intrusive_ptr<Expression> substituteInExpr(boost::intrusive_ptr<Expressio } return ex; }; + +/** + * Returns a vector of top-level dependencies where each index i in the vector corresponds to the + * dependencies from the ith expression according to 'orderToProcess'. + */ +std::vector<OrderedPathSet> getTopLevelDeps( + const std::vector<std::string>& orderToProcess, + const StringMap<boost::intrusive_ptr<Expression>>& expressions, + const StringMap<std::unique_ptr<ProjectionNode>>& children) { + std::vector<OrderedPathSet> topLevelDeps; + for (const auto& field : orderToProcess) { + DepsTracker deps; + if (auto exprIt = expressions.find(field); exprIt != expressions.end()) { + exprIt->second->addDependencies(&deps); + } else { + // Each expression in orderToProcess should either be in expressions or children. + auto childIt = children.find(field); + tassert(6657000, "Unable to calculate dependencies", childIt != children.end()); + childIt->second->reportDependencies(&deps); + } + + OrderedPathSet ops{deps.fields.begin(), deps.fields.end()}; + topLevelDeps.push_back( + DepsTracker::simplifyDependencies(ops, DepsTracker::TruncateToRootLevel::yes)); + } + return topLevelDeps; +} + +/** + * Returns whether or not there is an expression in the projection which depends on 'field' other + * than the expression which computes 'field'. For example, given field "a" and projection + * {a: "$b", c: {$sum: ["$a", 5]}}, return true. Given field "a" and projection + * {a: {$sum: ["$a", 5]}, c: "$b"}, return false. 'field' should be a top level path. + */ +bool computedExprDependsOnField(const std::vector<OrderedPathSet>& topLevelDeps, + const std::string& field, + const size_t fieldIndex) { + for (size_t i = 0; i < topLevelDeps.size(); i++) { + if (i != fieldIndex && topLevelDeps[i].count(field) > 0) { + return true; + } + } + return false; +} } // namespace std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject( @@ -109,8 +153,8 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject( return {BSONObj{}, false}; } - DepsTracker allDeps; - reportDependencies(&allDeps); + std::vector<OrderedPathSet> topLevelDeps = + getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children); // Auxiliary vector with extracted computed projections: <name, expression, replacement // strategy>. If the replacement strategy flag is true, the expression is replaced with a @@ -118,19 +162,15 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject( std::vector<std::tuple<StringData, boost::intrusive_ptr<Expression>, bool>> addFieldsExpressions; bool replaceWithProjField = true; - for (auto&& field : _orderToProcessAdditionsAndChildren) { + for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) { + auto&& field = _orderToProcessAdditionsAndChildren[i]; + if (reservedNames.count(field) > 0) { // Do not pushdown computed projection with reserved name. replaceWithProjField = false; continue; } - if (allDeps.fields.count(field) > 0) { - // Do not extract a computed projection if its name is the same as a dependent field. If - // the extracted $addFields were to be placed before this projection, the dependency - // with the common name would be shadowed by the computed projection. - replaceWithProjField = false; - continue; - } + auto expressionIt = _expressions.find(field); if (expressionIt == _expressions.end()) { // After seeing the first dotted path expression we need to replace computed @@ -138,13 +178,17 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInProject( replaceWithProjField = false; continue; } - DepsTracker deps; - expressionIt->second->addDependencies(&deps); - auto topLevelFieldNames = - deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes) - .getFieldNames<std::set<std::string>>(); - topLevelFieldNames.erase("_id"); + // Do not extract a computed projection if it is computing a value that other fields in the + // same projection depend on. If the extracted $addFields were to be placed before this + // projection, the dependency with the common name would be shadowed by the computed + // projection. + if (computedExprDependsOnField(topLevelDeps, field, i)) { + replaceWithProjField = false; + continue; + } + + const auto& topLevelFieldNames = topLevelDeps[i]; if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) { // Substitute newName for oldName in the expression. StringMap<std::string> renames; @@ -197,35 +241,34 @@ std::pair<BSONObj, bool> InclusionNode::extractComputedProjectionsInAddFields( return {BSONObj{}, false}; } - DepsTracker allDeps; - reportDependencies(&allDeps); + std::vector<OrderedPathSet> topLevelDeps = + getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children); // Auxiliary vector with extracted computed projections: <name, expression>. // To preserve the original fields order, only projections at the beginning of the // _orderToProcessAdditionsAndChildren list can be extracted for pushdown. std::vector<std::pair<StringData, boost::intrusive_ptr<Expression>>> addFieldsExpressions; - for (auto&& field : _orderToProcessAdditionsAndChildren) { + for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) { + auto&& field = _orderToProcessAdditionsAndChildren[i]; // Do not extract for pushdown computed projection with reserved name. if (reservedNames.count(field) > 0) { break; } - if (allDeps.fields.count(field) > 0) { - // Do not extract a computed projection if its name is the same as a dependent field. If - // the extracted $addFields were to be placed before this $addFields, the dependency - // with the common name would be shadowed by the computed projection. - break; - } + auto expressionIt = _expressions.find(field); if (expressionIt == _expressions.end()) { break; } - DepsTracker deps; - expressionIt->second->addDependencies(&deps); - auto topLevelFieldNames = - deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes) - .getFieldNames<std::set<std::string>>(); - topLevelFieldNames.erase("_id"); + // Do not extract a computed projection if it is computing a value that other fields in the + // same projection depend on. If the extracted $addFields were to be placed before this + // projection, the dependency with the common name would be shadowed by the computed + // projection. + if (computedExprDependsOnField(topLevelDeps, field, i)) { + break; + } + + auto& topLevelFieldNames = topLevelDeps[i]; if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) { // Substitute newName for oldName in the expression. StringMap<std::string> renames; diff --git a/src/mongo/db/exec/inclusion_projection_executor_test.cpp b/src/mongo/db/exec/inclusion_projection_executor_test.cpp index 84ed31a9bb4..53ebf3af4a3 100644 --- a/src/mongo/db/exec/inclusion_projection_executor_test.cpp +++ b/src/mongo/db/exec/inclusion_projection_executor_test.cpp @@ -1090,6 +1090,62 @@ TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); } +TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, + ExtractComputedProjectionInProjectShouldNotIncludeId) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies( + BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta" + << "$_id")))); + + auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot(); + const std::set<StringData> reservedNames{}; + auto [addFields, deleteFlag] = + r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames); + + ASSERT_EQ(addFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{_id: true, a: {$sum: ['$myMeta', '$_id']}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); +} + +TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, + ExtractComputedProjectionInProjectShouldNotHideDependentSubFields) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a" + << "$myMeta" + << "b" + << "$a.x")); + + auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot(); + const std::set<StringData> reservedNames{}; + auto [addFields, deleteFlag] = + r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames); + + ASSERT_EQ(addFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', b: '$a.x'}")); + ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); +} + +TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, + ExtractComputedProjectionInProjectShouldNotHideDependentSubFieldsWithDottedSibling) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a" + << "$myMeta" + << "c.b" + << "$a.x")); + + auto r = static_cast<InclusionProjectionExecutor*>(inclusion.get())->getRoot(); + const std::set<StringData> reservedNames{}; + auto [addFields, deleteFlag] = + r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames); + + ASSERT_EQ(addFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', c: {b: '$a.x'}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); +} + TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ApplyProjectionAfterSplit) { auto inclusion = makeInclusionProjectionWithDefaultPolicies( BSON("a" << true << "computedMeta1" diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp index d2a5563c7c7..479c1876471 100644 --- a/src/mongo/db/pipeline/dependencies.cpp +++ b/src/mongo/db/pipeline/dependencies.cpp @@ -37,6 +37,34 @@ namespace mongo { +OrderedPathSet DepsTracker::simplifyDependencies(OrderedPathSet dependencies, + TruncateToRootLevel truncateToRootLevel) { + // The key operation here is folding dependencies into ancestor dependencies, wherever possible. + // This is assisted by a special sort in OrderedPathSet that treats '.' + // as the first char and thus places parent paths directly before their children. + OrderedPathSet returnSet; + std::string last; + for (const auto& path : dependencies) { + if (!last.empty() && str::startsWith(path, last)) { + // We are including a parent of this field, so we can skip this field. + continue; + } + + // Check that the field requested is a valid field name in the agg language. This + // constructor will throw if it isn't. + FieldPath fp(path); + + if (truncateToRootLevel == TruncateToRootLevel::yes) { + last = fp.front().toString() + '.'; + returnSet.insert(fp.front().toString()); + } else { + last = path + '.'; + returnSet.insert(path); + } + } + return returnSet; +} + std::list<std::string> DepsTracker::sortedFields() const { // Use a special comparator to put parent fieldpaths before their children. std::list<std::string> sortedFields(fields.begin(), fields.end()); diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h index 3c892de8181..94f4489b71f 100644 --- a/src/mongo/db/pipeline/dependencies.h +++ b/src/mongo/db/pipeline/dependencies.h @@ -38,6 +38,19 @@ namespace mongo { +/** Custom comparator that orders fieldpath strings by path prefix first, then by field. + * This ensures that a parent field is ordered directly before its children. + */ +struct PathPrefixComparator { + /* Returns true if the lhs value should sort before the rhs, false otherwise. */ + bool operator()(const std::string& lhs, const std::string& rhs) const; +}; +/** + * Set of field paths strings. When iterated over, a parent path is seen directly before its + * children (or descendants, more generally). Eg., "a", "a.a", "a.b", "a-plus", "b". + */ +typedef std::set<std::string, PathPrefixComparator> OrderedPathSet; + /** * This struct allows components in an agg pipeline to report what they need from their input. */ @@ -104,6 +117,16 @@ struct DepsTracker { enum class TruncateToRootLevel : bool { no, yes }; /** + * Return the set of dependencies with descendant paths removed. + * For example ["a.b", "a.b.f", "c"] --> ["a.b", "c"]. + * + * TruncateToRootLevel::yes requires all dependencies to be top-level. + * The example above would return ["a", "c"] + */ + static OrderedPathSet simplifyDependencies(OrderedPathSet dependencies, + TruncateToRootLevel truncation); + + /** * Returns a projection object covering the non-metadata dependencies tracked by this class, or * empty BSONObj if the entire document is required. By default, the resulting project will * include the full, dotted field names of the dependencies. If 'truncationBehavior' is set to @@ -207,12 +230,4 @@ private: QueryMetadataBitSet _metadataDeps; }; - -/** Custom comparator that orders fieldpath strings by path prefix first, then by field. - * This ensures that a parent field is ordered directly before its children. - */ -struct PathPrefixComparator { - /* Returns true if the lhs value should sort before the rhs, false otherwise. */ - bool operator()(const std::string& lhs, const std::string& rhs) const; -}; } // namespace mongo |