diff options
author | Arun Banala <arun.banala@mongodb.com> | 2020-06-30 16:37:27 +0100 |
---|---|---|
committer | Arun Banala <arun.banala@mongodb.com> | 2020-06-30 23:19:35 +0100 |
commit | 6c7c37470bdf4b9dc5ed0215ed161945f9553f0f (patch) | |
tree | 4fc4a3891a4212cf63a952ad2f6a46104e6bb931 | |
parent | ee10647df9a99f3965350de6a0c0a6959b6d838f (diff) | |
download | mongo-6c7c37470bdf4b9dc5ed0215ed161945f9553f0f.tar.gz |
SERVER-48684 Pipeline stage $set fails to set a non-existent dotted field to an object
(cherry picked from commit d21425a2fe6d8179815ad22a8d0047fcaca84ae3)
-rw-r--r-- | jstests/aggregation/sources/addFields/dotted_paths.js | 98 | ||||
-rw-r--r-- | jstests/aggregation/sources/project/project_with_expressions.js | 39 | ||||
-rw-r--r-- | src/mongo/db/exec/add_fields_projection_executor.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/exec/add_fields_projection_executor.h | 8 | ||||
-rw-r--r-- | src/mongo/db/exec/projection_executor_builder_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/exec/projection_node.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/exec/projection_node.h | 29 |
7 files changed, 202 insertions, 42 deletions
diff --git a/jstests/aggregation/sources/addFields/dotted_paths.js b/jstests/aggregation/sources/addFields/dotted_paths.js new file mode 100644 index 00000000000..e8ecdc39d46 --- /dev/null +++ b/jstests/aggregation/sources/addFields/dotted_paths.js @@ -0,0 +1,98 @@ +/** + * Test the behavior of $addFields in the presence of a dotted field path. + */ +(function() { +const coll = db.add_fields_dotted_paths; +coll.drop(); + +let initObj = { + _id: 1, + arrayField: [1, {subField: [2, {}]}, [1]], + objField: {p: {q: 1}, subArr: [1]}, + otherField: "value" +}; +assert.commandWorked(coll.insert(initObj)); + +function assertAddFieldsResult(projection, expectedResults) { + assert.eq(coll.aggregate([{$addFields: projection}]).toArray(), [expectedResults]); +} + +// Test that the value gets overwritten when a field exists at a given path. +initObj["objField"]["subArr"] = "newValue"; +assertAddFieldsResult({"objField.subArr": "newValue"}, initObj); +assertAddFieldsResult({"objField.subArr": {$literal: "newValue"}}, initObj); + +// Test that a new sub-object is created when a field does not exist at a given path. All the +// existing sibling fields are retained. +initObj["objField"] = { + p: {q: 1}, + subArr: [1], // Existing fields are retained. + newSubPath: {b: "newValue"} +}; +assertAddFieldsResult({"objField.newSubPath.b": {$literal: "newValue"}}, initObj); +assertAddFieldsResult({"objField.newSubPath.b": "newValue"}, initObj); + +// When the value is a nested object. +const valueWithNestedObject = { + newSubObj: [{p: "newValue"}] +}; +initObj["objField"]["newSubPath"] = { + b: valueWithNestedObject +}; +assertAddFieldsResult({"objField.newSubPath.b": valueWithNestedObject}, initObj); +assertAddFieldsResult({"objField.newSubPath.b": {$literal: valueWithNestedObject}}, initObj); +initObj["objField"] = { + p: {q: 1}, + subArr: [1] +}; // Reset input object. + +// When the top level field doesn"t exist, a new nested object is created based on the given path. +initObj["newField"] = { + newSubPath: {b: "newValue"} +}; +assertAddFieldsResult({"newField.newSubPath.b": {$literal: "newValue"}}, initObj); +assertAddFieldsResult({"newField.newSubPath.b": "newValue"}, initObj); + +// When the top level field doesn"t exist, a new nested object is created based on the given path +// and the structure of the object in the value. +initObj["newField"]["newSubPath"] = { + b: valueWithNestedObject +}; +assertAddFieldsResult({"newField.newSubPath.b": valueWithNestedObject}, initObj); +assertAddFieldsResult({"newField.newSubPath.b": {$literal: valueWithNestedObject}}, initObj); +delete initObj["newField"]; // Reset. + +// Test when the path encounters an array and the value is a scalar. +initObj["arrayField"] = { + newSubPath: {b: "newValue"} +}; +let expectedSubObj = {newSubPath: {b: "newValue"}}; +initObj["arrayField"] = + [expectedSubObj, Object.assign({subField: [2, {}]}, expectedSubObj), [expectedSubObj]]; +assertAddFieldsResult({"arrayField.newSubPath.b": {$literal: "newValue"}}, initObj); +assertAddFieldsResult({"arrayField.newSubPath.b": "newValue"}, initObj); + +// Test when the path encounters an array and the value is a nested object. +expectedSubObj = { + newSubPath: {b: valueWithNestedObject} +}; +initObj["arrayField"] = + [expectedSubObj, Object.assign({subField: [2, {}]}, expectedSubObj), [expectedSubObj]]; +assertAddFieldsResult({"arrayField.newSubPath.b": valueWithNestedObject}, initObj); +assertAddFieldsResult({"arrayField.newSubPath.b": {$literal: valueWithNestedObject}}, initObj); + +// Test when the path encounters multiple arrays and the value is a nested object. +expectedSubObj = { + subField: {b: valueWithNestedObject} +}; +initObj["arrayField"] = [ + expectedSubObj, + { + subField: + [{b: valueWithNestedObject}, {b: valueWithNestedObject}] // Sub-array is also exploded. + }, + [expectedSubObj] +]; +assertAddFieldsResult({"arrayField.subField.b": valueWithNestedObject}, initObj); +assertAddFieldsResult({"arrayField.subField.b": {$literal: valueWithNestedObject}}, initObj); +})(); diff --git a/jstests/aggregation/sources/project/project_with_expressions.js b/jstests/aggregation/sources/project/project_with_expressions.js new file mode 100644 index 00000000000..0bff4ad1b22 --- /dev/null +++ b/jstests/aggregation/sources/project/project_with_expressions.js @@ -0,0 +1,39 @@ +/** + * Test that a $project with a combination of expressions and field projections gets evaluted + * correctly, and overwrites the data present in the input document when necessary. + */ +(function() { +const coll = db.project_with_expressions; +coll.drop(); + +assert.commandWorked(coll.insert({_id: 0, a: {subObj1: {p: 1}, subObj2: {p: 1}, subObj3: {p: 1}}})); + +function assertProjectionResultForFindAndAgg(projection, expectedResults) { + const aggResults = coll.aggregate([{$project: projection}]).toArray(); + const aggNoPushdownResults = + coll.aggregate([{$_internalInhibitOptimization: {}}, {$project: projection}]).toArray(); + const findResults = coll.find({}, projection).toArray(); + + assert.eq(aggResults, expectedResults); + assert.eq(aggNoPushdownResults, expectedResults); + assert.eq(findResults, expectedResults); +} + +// Case where a project with a valid sub-object, a project with missing sub-object and a project +// with sub-expression share a common parent, and the projection is represented using a dotted path. +assertProjectionResultForFindAndAgg( + {_id: 0, "a.subObj1": {$literal: 1}, "a.subObj2": 1, "a.subObj3.q": 1}, + [{a: {subObj2: {p: 1}, subObj3: {}, subObj1: 1}}]); +assertProjectionResultForFindAndAgg( + {_id: 0, "a.subObj2": 1, "a.subObj1": {$literal: 1}, "a.subObj3.q": {r: 1}}, + [{a: {subObj2: {p: 1}, subObj3: {}, subObj1: 1}}]); + +// Case where a project with a valid sub-object, a project with missing sub-object and a project +// with sub-expression share a common parent, and the projection is represented using sub-objects. +assertProjectionResultForFindAndAgg( + {_id: 0, a: {subObj1: {$literal: 1}, subObj2: 1, subObj3: {q: {r: 1}}}}, + [{a: {subObj2: {p: 1}, subObj3: {}, subObj1: 1}}]); +assertProjectionResultForFindAndAgg( + {_id: 0, a: {subObj2: 1, subObj1: {$literal: 1}, subObj3: {q: 1}}}, + [{a: {subObj2: {p: 1}, subObj3: {}, subObj1: 1}}]); +})(); diff --git a/src/mongo/db/exec/add_fields_projection_executor.cpp b/src/mongo/db/exec/add_fields_projection_executor.cpp index ec3acc136bc..33970dcbb51 100644 --- a/src/mongo/db/exec/add_fields_projection_executor.cpp +++ b/src/mongo/db/exec/add_fields_projection_executor.cpp @@ -223,31 +223,20 @@ std::unique_ptr<AddFieldsProjectionExecutor> AddFieldsProjectionExecutor::create void AddFieldsProjectionExecutor::parse(const BSONObj& spec) { for (auto elem : spec) { - auto fieldName = elem.fieldNameStringData(); + // The field name might be a dotted path. + auto fieldPath = FieldPath(elem.fieldNameStringData()); if (elem.type() == BSONType::Object) { // This is either an expression, or a nested specification. - if (parseObjectAsExpression(fieldName, elem.Obj(), _expCtx->variablesParseState)) { + if (parseObjectAsExpression(fieldPath, elem.Obj(), _expCtx->variablesParseState)) { // It was an expression. } else { - // The field name might be a dotted path. If so, we need to keep adding children - // to our tree until we create a child that represents that path. - auto remainingPath = FieldPath(elem.fieldName()); - auto* child = _root.get(); - while (remainingPath.getPathLength() > 1) { - child = child->addOrGetChild(remainingPath.getFieldName(0).toString()); - remainingPath = remainingPath.tail(); - } - // It is illegal to construct an empty FieldPath, so the above loop ends one - // iteration too soon. Add the last path here. - child = child->addOrGetChild(remainingPath.fullPath()); - parseSubObject(elem.Obj(), _expCtx->variablesParseState, child); + parseSubObject(elem.Obj(), _expCtx->variablesParseState, fieldPath); } } else { // This is a literal or regular value. _root->addExpressionForPath( - FieldPath(elem.fieldName()), - Expression::parseOperand(_expCtx, elem, _expCtx->variablesParseState)); + fieldPath, Expression::parseOperand(_expCtx, elem, _expCtx->variablesParseState)); } } } @@ -263,7 +252,7 @@ Document AddFieldsProjectionExecutor::applyProjection(const Document& inputDoc) } bool AddFieldsProjectionExecutor::parseObjectAsExpression( - StringData pathToObject, + const FieldPath& pathToObject, const BSONObj& objSpec, const VariablesParseState& variablesParseState) { if (objSpec.firstElementFieldName()[0] == '$') { @@ -278,29 +267,25 @@ bool AddFieldsProjectionExecutor::parseObjectAsExpression( void AddFieldsProjectionExecutor::parseSubObject(const BSONObj& subObj, const VariablesParseState& variablesParseState, - InclusionNode* node) { + const FieldPath& pathToObj) { for (auto&& elem : subObj) { - invariant(elem.fieldName()[0] != '$'); + auto fieldName = elem.fieldNameStringData(); + invariant(fieldName[0] != '$'); // Dotted paths in a sub-object have already been detected and disallowed by the function // ProjectionSpecValidator::validate(). - invariant(elem.fieldNameStringData().find('.') == std::string::npos); + invariant(fieldName.find('.') == std::string::npos); + auto currentPath = pathToObj.concat(fieldName); if (elem.type() == BSONType::Object) { // This is either an expression, or a nested specification. - auto fieldName = elem.fieldNameStringData().toString(); - if (!parseObjectAsExpression( - FieldPath::getFullyQualifiedPath(node->getPath(), fieldName), - elem.Obj(), - variablesParseState)) { + if (!parseObjectAsExpression(currentPath, elem.Obj(), variablesParseState)) { // It was a nested subobject - auto* child = node->addOrGetChild(fieldName); - parseSubObject(elem.Obj(), variablesParseState, child); + parseSubObject(elem.Obj(), variablesParseState, currentPath); } } else { // This is a literal or regular value. - node->addExpressionForPath( - FieldPath(elem.fieldName()), - Expression::parseOperand(_expCtx, elem, variablesParseState)); + _root->addExpressionForPath( + currentPath, Expression::parseOperand(_expCtx, elem, variablesParseState)); } } } diff --git a/src/mongo/db/exec/add_fields_projection_executor.h b/src/mongo/db/exec/add_fields_projection_executor.h index 00ba580c2c7..fd3ac8decab 100644 --- a/src/mongo/db/exec/add_fields_projection_executor.h +++ b/src/mongo/db/exec/add_fields_projection_executor.h @@ -130,17 +130,17 @@ private: * Throws an error if it was determined to be an expression specification, but failed to parse * as a valid expression. */ - bool parseObjectAsExpression(StringData pathToObject, + bool parseObjectAsExpression(const FieldPath& pathToObject, const BSONObj& objSpec, const VariablesParseState& variablesParseState); /** - * Traverses 'subObj' and parses each field. Adds any computed fields at this level - * to 'node'. + * Traverses 'subObj' and parses each field. Creates nodes along the 'pathToObj' when an + * expression/terminal path is found. */ void parseSubObject(const BSONObj& subObj, const VariablesParseState& variablesParseState, - InclusionNode* node); + const FieldPath& pathToObj); // The InclusionNode tree does most of the execution work once constructed. std::unique_ptr<InclusionNode> _root; diff --git a/src/mongo/db/exec/projection_executor_builder_test.cpp b/src/mongo/db/exec/projection_executor_builder_test.cpp index 695411a4939..5fa2668a6f5 100644 --- a/src/mongo/db/exec/projection_executor_builder_test.cpp +++ b/src/mongo/db/exec/projection_executor_builder_test.cpp @@ -170,6 +170,14 @@ TEST_F(ProjectionExecutorTestWithFallBackToDefault, CanProjectExpression) { executor->applyTransformation(Document{fromjson("{a: 1, b: 2}")})); } +TEST_F(ProjectionExecutorTestWithFallBackToDefault, CanProjectExpressionWithCommonParent) { + auto proj = parseWithDefaultPolicies( + fromjson("{'a.b.c': 1, 'b.c.d': 1, 'a.p.c' : {$add: ['$a.b.e', '$a.p']}, 'a.b.e': 1}")); + auto executor = createProjectionExecutor(proj); + ASSERT_DOCUMENT_EQ(Document{fromjson("{a: {b: {e: 4}, p: {c: 6}}}")}, + executor->applyTransformation(Document{fromjson("{a: {b: {e: 4}, p: 2}}")})); +} + TEST_F(ProjectionExecutorTestWithFallBackToDefault, CanProjectExclusionWithIdPath) { auto projWithoutId = parseWithDefaultPolicies(fromjson("{a: 0, _id: 0}")); auto executor = createProjectionExecutor(projWithoutId); diff --git a/src/mongo/db/exec/projection_node.cpp b/src/mongo/db/exec/projection_node.cpp index 9ea6e9afbf9..0cee88a78f6 100644 --- a/src/mongo/db/exec/projection_node.cpp +++ b/src/mongo/db/exec/projection_node.cpp @@ -40,17 +40,30 @@ ProjectionNode::ProjectionNode(ProjectionPolicies policies, std::string pathToNo : _policies(policies), _pathToNode(std::move(pathToNode)) {} void ProjectionNode::addProjectionForPath(const FieldPath& path) { + // Enforce that this method can only be called on the root node. + invariant(_pathToNode.empty()); + _addProjectionForPath(path); +} + +void ProjectionNode::_addProjectionForPath(const FieldPath& path) { makeOptimizationsStale(); if (path.getPathLength() == 1) { _projectedFields.insert(path.fullPath()); return; } // FieldPath can't be empty, so it is safe to obtain the first path component here. - addOrGetChild(path.getFieldName(0).toString())->addProjectionForPath(path.tail()); + addOrGetChild(path.getFieldName(0).toString())->_addProjectionForPath(path.tail()); } void ProjectionNode::addExpressionForPath(const FieldPath& path, boost::intrusive_ptr<Expression> expr) { + // Enforce that this method can only be called on the root node. + invariant(_pathToNode.empty()); + _addExpressionForPath(path, std::move(expr)); +} + +void ProjectionNode::_addExpressionForPath(const FieldPath& path, + boost::intrusive_ptr<Expression> expr) { makeOptimizationsStale(); // If the computed fields policy is 'kBanComputedFields', we should never reach here. invariant(_policies.computedFieldsPolicy == ComputedFieldsPolicy::kAllowComputedFields); @@ -66,7 +79,7 @@ void ProjectionNode::addExpressionForPath(const FieldPath& path, return; } // FieldPath can't be empty, so it is safe to obtain the first path component here. - addOrGetChild(path.getFieldName(0).toString())->addExpressionForPath(path.tail(), expr); + addOrGetChild(path.getFieldName(0).toString())->_addExpressionForPath(path.tail(), expr); } boost::intrusive_ptr<Expression> ProjectionNode::getExpressionForPath(const FieldPath& path) const { diff --git a/src/mongo/db/exec/projection_node.h b/src/mongo/db/exec/projection_node.h index 09c191226a1..45657307c62 100644 --- a/src/mongo/db/exec/projection_node.h +++ b/src/mongo/db/exec/projection_node.h @@ -58,6 +58,9 @@ public: * 'path' is allowed to be dotted, and is assumed not to conflict with another path already in * the tree. For example, it is an error to add the path "a.b" from a tree which has already * added a computed field "a". + * + * Note: This function can only be called from the root of the tree. This will ensure that all + * the node properties are set correctly along the path from root to expression leaf. */ void addProjectionForPath(const FieldPath& path); @@ -74,16 +77,14 @@ public: * 'path' is allowed to be dotted, and is assumed not to conflict with another path already in * the tree. For example, it is an error to add the path "a.b" as a computed field to a tree * which has already projected the field "a". + * + * Note: This function can only be called from the root of the tree. Every node needs to know + * whether it has an expression as a descendent, so by adding nodes only from the root, this + * flag can be set correctly along the path from root to expression leaf. */ void addExpressionForPath(const FieldPath& path, boost::intrusive_ptr<Expression> expr); /** - * Creates the child if it doesn't already exist. 'field' is not allowed to be dotted. Returns - * the child node if it already exists, or the newly-created child otherwise. - */ - ProjectionNode* addOrGetChild(const std::string& field); - - /** * Applies all projections and expressions, if applicable, and returns the resulting document. */ virtual Document applyToDocument(const Document& inputDoc) const; @@ -133,6 +134,12 @@ public: MutableDocument* output) const; protected: + /** + * Creates the child if it doesn't already exist. 'field' is not allowed to be dotted. Returns + * the child node if it already exists, or the newly-created child otherwise. + */ + ProjectionNode* addOrGetChild(const std::string& field); + // Returns a unique_ptr to a new instance of the implementing class for the given 'fieldName'. virtual std::unique_ptr<ProjectionNode> makeChild(const std::string& fieldName) const = 0; @@ -195,6 +202,16 @@ private: _maxFieldsToProject = boost::none; } + /** + * Internal helper function for addExpressionForPath(). + */ + void _addExpressionForPath(const FieldPath& path, boost::intrusive_ptr<Expression> expr); + + /** + * Internal helper function for addProjectionForPath(). + */ + void _addProjectionForPath(const FieldPath& path); + // Our projection semantics are such that all field additions need to be processed in the order // specified. '_orderToProcessAdditionsAndChildren' tracks that order. // |