From 0ec7e6199432904901a13bf82856074b1e4f4ab4 Mon Sep 17 00:00:00 2001 From: Hana Pearlman Date: Mon, 14 Nov 2022 21:23:22 +0000 Subject: SERVER-66570: Ensure that pushed down projections computed from the metadata do not hide dependent subfields referenced in the same project --- jstests/core/timeseries/timeseries_project.js | 11 +++ .../exec/add_fields_projection_executor_test.cpp | 57 +++++++++++ .../db/exec/inclusion_projection_executor.cpp | 104 +++++++++++++++------ .../db/exec/inclusion_projection_executor_test.cpp | 56 +++++++++++ 4 files changed, 197 insertions(+), 31 deletions(-) diff --git a/jstests/core/timeseries/timeseries_project.js b/jstests/core/timeseries/timeseries_project.js index b5d615ebde5..1352501d221 100644 --- a/jstests/core/timeseries/timeseries_project.js +++ b/jstests/core/timeseries/timeseries_project.js @@ -97,6 +97,7 @@ const doc = { time: new Date("2019-10-11T14:39:18.670Z"), x: 5, a: 3, + obj: {a: 3}, }; assert.commandWorked(tsColl.insert(doc)); assert.commandWorked(regColl.insert(doc)); @@ -107,12 +108,22 @@ let tsDoc = tsColl.aggregate(pipeline).toArray(); let regDoc = regColl.aggregate(pipeline).toArray(); assert.docEq(tsDoc, regDoc); +pipeline = [{$project: {_id: 0, obj: "$x", b: {$add: ["$obj.a", 1]}}}]; +tsDoc = tsColl.aggregate(pipeline).toArray(); +regDoc = regColl.aggregate(pipeline).toArray(); +assert.docEq(tsDoc, regDoc); + // Test $addFields. pipeline = [{$addFields: {a: "$x", b: "$a"}}, {$project: {_id: 0}}]; tsDoc = tsColl.aggregate(pipeline).toArray(); regDoc = regColl.aggregate(pipeline).toArray(); assert.docEq(tsDoc, regDoc); +pipeline = [{$addFields: {obj: "$x", b: {$add: ["$obj.a", 1]}}}, {$project: {_id: 0}}]; +tsDoc = tsColl.aggregate(pipeline).toArray(); +regDoc = regColl.aggregate(pipeline).toArray(); +assert.docEq(tsDoc, regDoc); + pipeline = [{$project: {a: 1, _id: 0}}, {$project: {newMeta: "$x"}}]; tsDoc = tsColl.aggregate(pipeline).toArray(); regDoc = regColl.aggregate(pipeline).toArray(); diff --git a/src/mongo/db/exec/add_fields_projection_executor_test.cpp b/src/mongo/db/exec/add_fields_projection_executor_test.cpp index 0c5fd22c5f6..7230c6495c8 100644 --- a/src/mongo/db/exec/add_fields_projection_executor_test.cpp +++ b/src/mongo/db/exec/add_fields_projection_executor_test.cpp @@ -681,5 +681,62 @@ TEST(AddFieldsProjectionExecutorExecutionTest, DoNotExtractComputedProjectionWit addFields.serializeTransformation(boost::none)); } +TEST(AddFieldsProjectionExecutorExecutionTest, + ExtractComputedProjectionShouldNotHideDependentSubFields) { + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + AddFieldsProjectionExecutor addFields(expCtx); + addFields.parse(BSON("obj" + << "$myMeta" + << "b" << BSON("$add" << BSON_ARRAY("$obj.a" << 1)))); + + const std::set reservedNames{}; + auto [extractedAddFields, deleteFlag] = + addFields.extractComputedProjections("myMeta", "meta", reservedNames); + + ASSERT_EQ(extractedAddFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = + Document(fromjson("{obj: '$myMeta', b: {$add: ['$obj.a', {$const: 1}]}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none)); +} + +TEST(AddFieldsProjectionExecutorExecutionTest, + ExtractComputedProjectionShouldNotHideDependentFieldsWithDottedSibling) { + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + AddFieldsProjectionExecutor addFields(expCtx); + addFields.parse(BSON("a" + << "$myMeta" + << "c.b" + << "$a.x")); + + const std::set reservedNames{}; + auto [extractedAddFields, deleteFlag] = + addFields.extractComputedProjections("myMeta", "meta", reservedNames); + + ASSERT_EQ(extractedAddFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{a: '$myMeta', c: {b: '$a.x'}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none)); +} + +TEST(AddFieldsProjectionExecutorExecutionTest, ExtractComputedProjectionShouldNotIncludeId) { + boost::intrusive_ptr expCtx(new ExpressionContextForTest()); + AddFieldsProjectionExecutor addFields(expCtx); + addFields.parse(BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta" + << "$_id")))); + + const std::set reservedNames{}; + auto [extractedAddFields, deleteFlag] = + addFields.extractComputedProjections("myMeta", "meta", reservedNames); + + ASSERT_EQ(extractedAddFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{a: {$sum: ['$myMeta', '$_id']}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, addFields.serializeTransformation(boost::none)); +} + } // namespace } // namespace mongo::projection_executor diff --git a/src/mongo/db/exec/inclusion_projection_executor.cpp b/src/mongo/db/exec/inclusion_projection_executor.cpp index 30e06a76d82..594de7bcb26 100644 --- a/src/mongo/db/exec/inclusion_projection_executor.cpp +++ b/src/mongo/db/exec/inclusion_projection_executor.cpp @@ -100,6 +100,49 @@ boost::intrusive_ptr substituteInExpr(boost::intrusive_ptr getTopLevelDeps( + const std::vector& orderToProcess, + const StringMap>& expressions, + const StringMap>& children) { + std::vector topLevelDeps; + for (const auto& field : orderToProcess) { + DepsTracker deps; + if (auto exprIt = expressions.find(field); exprIt != expressions.end()) { + expression::addDependencies(exprIt->second.get(), &deps); + } else { + // Each expression in orderToProcess should either be in expressions or children. + auto childIt = children.find(field); + tassert(6657000, "Unable to calculate dependencies", childIt != children.end()); + childIt->second->reportDependencies(&deps); + } + + topLevelDeps.push_back( + DepsTracker::simplifyDependencies(deps.fields, DepsTracker::TruncateToRootLevel::yes)); + } + return topLevelDeps; +} + +/** + * Returns whether or not there is an expression in the projection which depends on 'field' other + * than the expression which computes 'field'. For example, given field "a" and projection + * {a: "$b", c: {$sum: ["$a", 5]}}, return true. Given field "a" and projection + * {a: {$sum: ["$a", 5]}, c: "$b"}, return false. 'field' should be a top level path. + */ +bool computedExprDependsOnField(const std::vector& topLevelDeps, + const std::string& field, + const size_t fieldIndex) { + for (size_t i = 0; i < topLevelDeps.size(); i++) { + if (i != fieldIndex && topLevelDeps[i].count(field) > 0) { + return true; + } + } + return false; +} } // namespace std::pair InclusionNode::extractComputedProjectionsInProject( @@ -110,8 +153,8 @@ std::pair InclusionNode::extractComputedProjectionsInProject( return {BSONObj{}, false}; } - DepsTracker allDeps; - reportDependencies(&allDeps); + std::vector topLevelDeps = + getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children); // Auxiliary vector with extracted computed projections: . If the replacement strategy flag is true, the expression is replaced with a @@ -119,19 +162,15 @@ std::pair InclusionNode::extractComputedProjectionsInProject( std::vector, bool>> addFieldsExpressions; bool replaceWithProjField = true; - for (auto&& field : _orderToProcessAdditionsAndChildren) { + for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) { + auto&& field = _orderToProcessAdditionsAndChildren[i]; + if (reservedNames.count(field) > 0) { // Do not pushdown computed projection with reserved name. replaceWithProjField = false; continue; } - if (allDeps.fields.count(field) > 0) { - // Do not extract a computed projection if its name is the same as a dependent field. If - // the extracted $addFields were to be placed before this projection, the dependency - // with the common name would be shadowed by the computed projection. - replaceWithProjField = false; - continue; - } + auto expressionIt = _expressions.find(field); if (expressionIt == _expressions.end()) { // After seeing the first dotted path expression we need to replace computed @@ -139,13 +178,17 @@ std::pair InclusionNode::extractComputedProjectionsInProject( replaceWithProjField = false; continue; } - DepsTracker deps; - expression::addDependencies(expressionIt->second.get(), &deps); - auto topLevelFieldNames = - deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes) - .getFieldNames>(); - topLevelFieldNames.erase("_id"); + // Do not extract a computed projection if it is computing a value that other fields in the + // same projection depend on. If the extracted $addFields were to be placed before this + // projection, the dependency with the common name would be shadowed by the computed + // projection. + if (computedExprDependsOnField(topLevelDeps, field, i)) { + replaceWithProjField = false; + continue; + } + + const auto& topLevelFieldNames = topLevelDeps[i]; if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) { // Substitute newName for oldName in the expression. StringMap renames; @@ -199,35 +242,34 @@ std::pair InclusionNode::extractComputedProjectionsInAddFields( return {BSONObj{}, false}; } - DepsTracker allDeps; - reportDependencies(&allDeps); + std::vector topLevelDeps = + getTopLevelDeps(_orderToProcessAdditionsAndChildren, _expressions, _children); // Auxiliary vector with extracted computed projections: . // To preserve the original fields order, only projections at the beginning of the // _orderToProcessAdditionsAndChildren list can be extracted for pushdown. std::vector>> addFieldsExpressions; - for (auto&& field : _orderToProcessAdditionsAndChildren) { + for (size_t i = 0; i < _orderToProcessAdditionsAndChildren.size(); i++) { + auto&& field = _orderToProcessAdditionsAndChildren[i]; // Do not extract for pushdown computed projection with reserved name. if (reservedNames.count(field) > 0) { break; } - if (allDeps.fields.count(field) > 0) { - // Do not extract a computed projection if its name is the same as a dependent field. If - // the extracted $addFields were to be placed before this $addFields, the dependency - // with the common name would be shadowed by the computed projection. - break; - } + auto expressionIt = _expressions.find(field); if (expressionIt == _expressions.end()) { break; } - DepsTracker deps; - expression::addDependencies(expressionIt->second.get(), &deps); - auto topLevelFieldNames = - deps.toProjectionWithoutMetadata(DepsTracker::TruncateToRootLevel::yes) - .getFieldNames>(); - topLevelFieldNames.erase("_id"); + // Do not extract a computed projection if it is computing a value that other fields in the + // same projection depend on. If the extracted $addFields were to be placed before this + // projection, the dependency with the common name would be shadowed by the computed + // projection. + if (computedExprDependsOnField(topLevelDeps, field, i)) { + break; + } + + auto& topLevelFieldNames = topLevelDeps[i]; if (topLevelFieldNames.size() == 1 && topLevelFieldNames.count(oldName.toString()) == 1) { // Substitute newName for oldName in the expression. StringMap renames; diff --git a/src/mongo/db/exec/inclusion_projection_executor_test.cpp b/src/mongo/db/exec/inclusion_projection_executor_test.cpp index efd20cda382..356cced5d60 100644 --- a/src/mongo/db/exec/inclusion_projection_executor_test.cpp +++ b/src/mongo/db/exec/inclusion_projection_executor_test.cpp @@ -1092,6 +1092,62 @@ TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); } +TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, + ExtractComputedProjectionInProjectShouldNotIncludeId) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies( + BSON("a" << BSON("$sum" << BSON_ARRAY("$myMeta" + << "$_id")))); + + auto r = static_cast(inclusion.get())->getRoot(); + const std::set reservedNames{}; + auto [addFields, deleteFlag] = + r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames); + + ASSERT_EQ(addFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{_id: true, a: {$sum: ['$myMeta', '$_id']}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); +} + +TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, + ExtractComputedProjectionInProjectShouldNotHideDependentSubFields) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a" + << "$myMeta" + << "b" + << "$a.x")); + + auto r = static_cast(inclusion.get())->getRoot(); + const std::set reservedNames{}; + auto [addFields, deleteFlag] = + r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames); + + ASSERT_EQ(addFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', b: '$a.x'}")); + ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); +} + +TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, + ExtractComputedProjectionInProjectShouldNotHideDependentSubFieldsWithDottedSibling) { + auto inclusion = makeInclusionProjectionWithDefaultPolicies(BSON("a" + << "$myMeta" + << "c.b" + << "$a.x")); + + auto r = static_cast(inclusion.get())->getRoot(); + const std::set reservedNames{}; + auto [addFields, deleteFlag] = + r->extractComputedProjectionsInProject("myMeta", "meta", reservedNames); + + ASSERT_EQ(addFields.nFields(), 0); + ASSERT_EQ(deleteFlag, false); + + auto expectedProjection = Document(fromjson("{_id: true, a: '$myMeta', c: {b: '$a.x'}}")); + ASSERT_DOCUMENT_EQ(expectedProjection, inclusion->serializeTransformation(boost::none)); +} + TEST_F(InclusionProjectionExecutionTestWithFallBackToDefault, ApplyProjectionAfterSplit) { auto inclusion = makeInclusionProjectionWithDefaultPolicies( BSON("a" << true << "computedMeta1" -- cgit v1.2.1