diff options
author | Arun Banala <arun.banala@mongodb.com> | 2020-09-07 16:02:00 +0100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-09-17 18:19:50 +0000 |
commit | a7714cfefeb609b30bbf2d9340418ce809d32146 (patch) | |
tree | 68d0a1ddf5e7d8395601bf273668439e5b891922 /src/mongo/db/update | |
parent | b9841ae3e9c2772e1c5d2705448f6e5528599596 (diff) | |
download | mongo-a7714cfefeb609b30bbf2d9340418ce809d32146.tar.gz |
SERVER-50650 Merge V2LogBuilder and document diff calculation code
Diffstat (limited to 'src/mongo/db/update')
-rw-r--r-- | src/mongo/db/update/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_applier_test.cpp | 145 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_calculator.cpp | 81 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_serialization.cpp | 448 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_serialization.h | 490 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_serialization_test.cpp | 239 | ||||
-rw-r--r-- | src/mongo/db/update/v2_log_builder.cpp | 342 | ||||
-rw-r--r-- | src/mongo/db/update/v2_log_builder.h | 127 |
8 files changed, 848 insertions, 1025 deletions
diff --git a/src/mongo/db/update/SConscript b/src/mongo/db/update/SConscript index 18782d34ebc..500fdde1adc 100644 --- a/src/mongo/db/update/SConscript +++ b/src/mongo/db/update/SConscript @@ -103,6 +103,7 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/bson/mutable/mutable_bson', '$BUILD_DIR/mongo/db/common', '$BUILD_DIR/mongo/db/update_index_data', ], diff --git a/src/mongo/db/update/document_diff_applier_test.cpp b/src/mongo/db/update/document_diff_applier_test.cpp index 888803c35eb..212f9d106e2 100644 --- a/src/mongo/db/update/document_diff_applier_test.cpp +++ b/src/mongo/db/update/document_diff_applier_test.cpp @@ -58,12 +58,12 @@ void checkDiff(const BSONObj& preImage, const BSONObj& expectedPost, const Diff& TEST(DiffApplierTest, DeleteSimple) { const BSONObj preImage(BSON("f1" << 1 << "foo" << 2 << "f2" << 3)); - DocumentDiffBuilder builder; - builder.addDelete("f1"); - builder.addDelete("f2"); - builder.addDelete("f3"); + diff_tree::DocumentSubDiffNode diffNode; + diffNode.addDelete("f1"); + diffNode.addDelete("f2"); + diffNode.addDelete("f3"); - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); checkDiff(preImage, BSON("foo" << 2), diff); } @@ -74,11 +74,11 @@ TEST(DiffApplierTest, InsertSimple) { const BSONObj storage(BSON("a" << 1 << "b" << 2)); StringData newField = "newField"; - DocumentDiffBuilder builder; - builder.addInsert("f1", storage["a"]); - builder.addInsert(newField, storage["b"]); + diff_tree::DocumentSubDiffNode diffNode; + diffNode.addInsert("f1", storage["a"]); + diffNode.addInsert(newField, storage["b"]); - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); checkDiff(preImage, BSON("foo" << 2 << "f2" << 3 << "f1" << 1 << "newField" << 2), diff); } @@ -88,11 +88,11 @@ TEST(DiffApplierTest, UpdateSimple) { const BSONObj storage(BSON("a" << 1 << "b" << 2)); StringData newField = "newField"; - DocumentDiffBuilder builder; - builder.addUpdate("f1", storage["a"]); - builder.addUpdate(newField, storage["b"]); + diff_tree::DocumentSubDiffNode diffNode; + diffNode.addUpdate("f1", storage["a"]); + diffNode.addUpdate(newField, storage["b"]); - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); checkDiff(preImage, BSON("f1" << 1 << "foo" << 2 << "f2" << 3 << "newField" << 2), diff); } @@ -101,15 +101,16 @@ TEST(DiffApplierTest, SubObjDiffSimple) { BSON("obj" << BSON("dField" << 0 << "iField" << 0 << "uField" << 0) << "otherField" << 0)); const BSONObj storage(BSON("a" << 1 << "b" << 2)); - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubObjDiff("obj"); - subBuilderGuard.builder()->addDelete("dField"); - subBuilderGuard.builder()->addInsert("iField", storage["a"]); - subBuilderGuard.builder()->addUpdate("uField", storage["b"]); + auto subDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subDiffNode->addDelete("dField"); + subDiffNode->addInsert("iField", storage["a"]); + subDiffNode->addUpdate("uField", storage["b"]); + diffNode.addChild("obj", std::move(subDiffNode)); } - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); checkDiff( preImage, BSON("obj" << BSON("uField" << 2 << "iField" << 1) << "otherField" << 0), diff); } @@ -120,14 +121,15 @@ TEST(DiffApplierTest, SubArrayDiffSimpleWithAppend) { const BSONObj storage(BSON("a" << 1 << "b" << 2)); StringData arr = "arr"; - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubArrDiff(arr); - subBuilderGuard.builder()->addUpdate(1, storage["a"]); - subBuilderGuard.builder()->addUpdate(4, storage["b"]); + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->addUpdate(1, storage["a"]); + subDiffNode->addUpdate(4, storage["b"]); + diffNode.addChild(arr, std::move(subDiffNode)); } - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); checkDiff(preImage, BSON("arr" << BSON_ARRAY(999 << 1 << 999 << 999 << 2)), diff); } @@ -138,14 +140,15 @@ TEST(DiffApplierTest, SubArrayDiffSimpleWithTruncate) { const BSONObj storage(BSON("a" << 1 << "b" << 2)); StringData arr = "arr"; - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubArrDiff(arr); - subBuilderGuard.builder()->addUpdate(1, storage["a"]); - subBuilderGuard.builder()->setResize(3); + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->addUpdate(1, storage["a"]); + subDiffNode->setResize(3); + diffNode.addChild(arr, std::move(subDiffNode)); } - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); checkDiff(preImage, BSON("arr" << BSON_ARRAY(999 << 1 << 999)), diff); } @@ -155,13 +158,14 @@ TEST(DiffApplierTest, SubArrayDiffSimpleWithNullPadding) { BSONObj storage(BSON("a" << 1)); StringData arr = "arr"; - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubArrDiff(arr); - subBuilderGuard.builder()->addUpdate(3, storage["a"]); + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->addUpdate(3, storage["a"]); + diffNode.addChild(arr, std::move(subDiffNode)); } - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); checkDiff(preImage, BSON("arr" << BSON_ARRAY(0 << NullLabeler{} << NullLabeler{} << 1)), diff); } @@ -169,16 +173,18 @@ TEST(DiffApplierTest, SubArrayDiffSimpleWithNullPadding) { TEST(DiffApplierTest, NestedSubObjUpdateScalar) { BSONObj storage(BSON("a" << 1)); StringData subObj = "subObj"; - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubObjDiff(subObj); + auto subDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); { - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(subObj); - subSubBuilderGuard.builder()->addUpdate("a", storage["a"]); + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + + subSubDiffNode->addUpdate("a", storage["a"]); + subDiffNode->addChild(subObj, std::move(subSubDiffNode)); } + diffNode.addChild(subObj, std::move(subDiffNode)); } - - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); // Check the case where the object matches the structure we expect. BSONObj preImage(fromjson("{subObj: {subObj: {a: 0}}}")); @@ -217,33 +223,31 @@ TEST(DiffApplierTest, UpdateArrayOfObjectsSubDiff) { StringData dFieldB = "dFieldB"; StringData uField = "uField"; - DocumentDiffBuilder builder; - { - builder.addDelete(dFieldA); - builder.addDelete(dFieldB); - builder.addUpdate(uField, storage["uFieldNew"]); + diff_tree::DocumentSubDiffNode diffNode; + diffNode.addDelete(dFieldA); + diffNode.addDelete(dFieldB); + diffNode.addUpdate(uField, storage["uFieldNew"]); - auto subBuilderGuard = builder.startSubArrDiff(arr); - { - { - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(1); - subSubBuilderGuard.builder()->addUpdate("a", storage["a"]); - } - - { - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(2); - subSubBuilderGuard.builder()->addUpdate("b", storage["b"]); - } - - { - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(3); - subSubBuilderGuard.builder()->addUpdate("c", storage["c"]); - } - subBuilderGuard.builder()->addUpdate(4, storage["newObj"]); - } + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + { + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subSubDiffNode->addUpdate("a", storage["a"]); + subDiffNode->addChild(1, std::move(subSubDiffNode)); } + { + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subSubDiffNode->addUpdate("b", storage["b"]); + subDiffNode->addChild(2, std::move(subSubDiffNode)); + } + { + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subSubDiffNode->addUpdate("c", storage["c"]); + subDiffNode->addChild(3, std::move(subSubDiffNode)); + } + subDiffNode->addUpdate(4, storage["newObj"]); + diffNode.addChild(arr, std::move(subDiffNode)); - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); // Case where the object matches the structure we expect. BSONObj preImage( @@ -292,17 +296,16 @@ TEST(DiffApplierTest, UpdateArrayOfObjectsSubDiff) { TEST(DiffApplierTest, UpdateArrayOfObjectsWithUpdateOperationNonContiguous) { BSONObj storage(BSON("dummyA" << 997 << "dummyB" << BSON("newVal" << 998) << "dummyC" << 999)); StringData arr = "arr"; - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubArrDiff(arr); - { - subBuilderGuard.builder()->addUpdate(1, storage["dummyA"]); - subBuilderGuard.builder()->addUpdate(2, storage["dummyB"]); - subBuilderGuard.builder()->addUpdate(5, storage["dummyC"]); - } + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->addUpdate(1, storage["dummyA"]); + subDiffNode->addUpdate(2, storage["dummyB"]); + subDiffNode->addUpdate(5, storage["dummyC"]); + diffNode.addChild(arr, std::move(subDiffNode)); } - auto diff = builder.serialize(); + auto diff = diffNode.serialize(); // Case where the object matches the structure we expect. BSONObj preImage(fromjson("{arr: [null, {}, {}, {}, {}, {}]}")); diff --git a/src/mongo/db/update/document_diff_calculator.cpp b/src/mongo/db/update/document_diff_calculator.cpp index 35935372241..24a4a989d0e 100644 --- a/src/mongo/db/update/document_diff_calculator.cpp +++ b/src/mongo/db/update/document_diff_calculator.cpp @@ -35,32 +35,31 @@ namespace mongo::doc_diff { namespace { // Note: This function is mutually with computeArrayDiff() and computeDocDiff(). -template <class DiffBuilder, class T> +template <class Node, class T> void calculateSubDiffHelper(const BSONElement& preVal, const BSONElement& postVal, T fieldIdentifier, - DiffBuilder* builder); + Node* diffNode); -bool computeArrayDiff(const BSONObj& pre, - const BSONObj& post, - doc_diff::ArrayDiffBuilder* diffBuilder) { +std::unique_ptr<diff_tree::ArrayNode> computeArrayDiff(const BSONObj& pre, const BSONObj& post) { + auto diffNode = std::make_unique<diff_tree::ArrayNode>(); auto preItr = BSONObjIterator(pre); auto postItr = BSONObjIterator(post); const size_t postObjSize = static_cast<size_t>(post.objsize()); size_t nFieldsInPostArray = 0; for (; preItr.more() && postItr.more(); ++preItr, ++postItr, ++nFieldsInPostArray) { // Bailout if the generated diff so far is larger than the 'post' object. - if (postObjSize < diffBuilder->getObjSize()) { - return false; + if (postObjSize < diffNode->getObjSize()) { + return nullptr; } if (!(*preItr).binaryEqual(*postItr)) { // If both are arrays or objects, then recursively compute the diff of the respective // array or object. if ((*preItr).type() == (*postItr).type() && ((*preItr).type() == BSONType::Object || (*preItr).type() == BSONType::Array)) { - calculateSubDiffHelper(*preItr, *postItr, nFieldsInPostArray, diffBuilder); + calculateSubDiffHelper(*preItr, *postItr, nFieldsInPostArray, diffNode.get()); } else { - diffBuilder->addUpdate(nFieldsInPostArray, *postItr); + diffNode->addUpdate(nFieldsInPostArray, *postItr); } } } @@ -68,20 +67,21 @@ bool computeArrayDiff(const BSONObj& pre, // When we reach here, only one of postItr or preItr can have more fields. If postItr has more // fields, we need to add all the remaining fields. for (; postItr.more(); ++postItr, ++nFieldsInPostArray) { - diffBuilder->addUpdate(nFieldsInPostArray, *postItr); + diffNode->addUpdate(nFieldsInPostArray, *postItr); } // If preItr has more fields, we can ignore the remaining fields, since we only need to do a // resize operation. if (preItr.more()) { - diffBuilder->setResize(nFieldsInPostArray); + diffNode->setResize(nFieldsInPostArray); } - return postObjSize > diffBuilder->getObjSize(); + return (postObjSize > diffNode->getObjSize()) ? std::move(diffNode) : nullptr; } -bool computeDocDiff(const BSONObj& pre, - const BSONObj& post, - doc_diff::DocumentDiffBuilder* diffBuilder) { +std::unique_ptr<diff_tree::DocumentSubDiffNode> computeDocDiff(const BSONObj& pre, + const BSONObj& post, + size_t padding = 0) { + auto diffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(padding); BSONObjIterator preItr(pre); BSONObjIterator postItr(post); const size_t postObjSize = static_cast<size_t>(post.objsize()); @@ -91,8 +91,8 @@ bool computeDocDiff(const BSONObj& pre, auto postVal = *postItr; // Bailout if the generated diff so far is larger than the 'post' object. - if (postObjSize < diffBuilder->getObjSize()) { - return false; + if (postObjSize < diffNode->getObjSize()) { + return nullptr; } if (preVal.fieldNameStringData() == postVal.fieldNameStringData()) { if (preVal.binaryEqual(postVal)) { @@ -101,10 +101,11 @@ bool computeDocDiff(const BSONObj& pre, (preVal.type() == BSONType::Object || preVal.type() == BSONType::Array)) { // Both are either arrays or objects, recursively compute the diff of the respective // array or object. - calculateSubDiffHelper(preVal, postVal, preVal.fieldNameStringData(), diffBuilder); + calculateSubDiffHelper( + preVal, postVal, preVal.fieldNameStringData(), diffNode.get()); } else { // Any other case, just replace with the 'postVal'. - diffBuilder->addUpdate((*preItr).fieldNameStringData(), postVal); + diffNode->addUpdate((*preItr).fieldNameStringData(), postVal); } preItr.next(); postItr.next(); @@ -122,42 +123,37 @@ bool computeDocDiff(const BSONObj& pre, // Note that we don't need to record these into the 'deletes' set because there are no more // fields in the post image. invariant(!postItr.more()); - diffBuilder->addDelete((*preItr).fieldNameStringData()); + diffNode->addDelete((*preItr).fieldNameStringData()); } // Record remaining fields in postItr as creates. for (; postItr.more(); postItr.next()) { auto fieldName = (*postItr).fieldNameStringData(); - diffBuilder->addInsert(fieldName, *postItr); + diffNode->addInsert(fieldName, *postItr); deletes.erase(fieldName); } for (auto&& deleteField : deletes) { - diffBuilder->addDelete(deleteField); + diffNode->addDelete(deleteField); } - return postObjSize > diffBuilder->getObjSize(); + return (postObjSize > diffNode->getObjSize()) ? std::move(diffNode) : nullptr; } -template <class DiffBuilder, class T> +template <class Node, class T> void calculateSubDiffHelper(const BSONElement& preVal, const BSONElement& postVal, T fieldIdentifier, - DiffBuilder* builder) { - if (preVal.type() == BSONType::Object) { - auto subDiffBuilderGuard = builder->startSubObjDiff(fieldIdentifier); - const auto hasSubDiff = computeDocDiff( - preVal.embeddedObject(), postVal.embeddedObject(), subDiffBuilderGuard.builder()); - if (!hasSubDiff) { - subDiffBuilderGuard.abandon(); - builder->addUpdate(fieldIdentifier, postVal); - } + Node* diffNode) { + auto subDiff = (preVal.type() == BSONType::Object) + ? std::unique_ptr<diff_tree::InternalNode>( + computeDocDiff(preVal.embeddedObject(), postVal.embeddedObject())) + : std::unique_ptr<diff_tree::InternalNode>( + computeArrayDiff(preVal.embeddedObject(), postVal.embeddedObject())); + if (!subDiff) { + // We could not compute sub-diff because the potential sub-diff is bigger than the 'postVal' + // itself. So we just log the modification as an update. + diffNode->addUpdate(fieldIdentifier, postVal); } else { - auto subDiffBuilderGuard = builder->startSubArrDiff(fieldIdentifier); - const auto hasSubDiff = computeArrayDiff( - preVal.embeddedObject(), postVal.embeddedObject(), subDiffBuilderGuard.builder()); - if (!hasSubDiff) { - subDiffBuilderGuard.abandon(); - builder->addUpdate(fieldIdentifier, postVal); - } + diffNode->addChild(fieldIdentifier, std::move(subDiff)); } } } // namespace @@ -165,9 +161,8 @@ void calculateSubDiffHelper(const BSONElement& preVal, boost::optional<doc_diff::Diff> computeDiff(const BSONObj& pre, const BSONObj& post, size_t padding) { - doc_diff::DocumentDiffBuilder diffBuilder(padding); - if (computeDocDiff(pre, post, &diffBuilder)) { - auto diff = diffBuilder.serialize(); + if (auto diffNode = computeDocDiff(pre, post, padding)) { + auto diff = diffNode->serialize(); if (diff.objsize() < post.objsize()) { return diff; } diff --git a/src/mongo/db/update/document_diff_serialization.cpp b/src/mongo/db/update/document_diff_serialization.cpp index a775d59bb5f..ba7ac3964d6 100644 --- a/src/mongo/db/update/document_diff_serialization.cpp +++ b/src/mongo/db/update/document_diff_serialization.cpp @@ -31,128 +31,396 @@ #include "mongo/db/update/document_diff_serialization.h" +#include <stack> + #include "mongo/util/str.h" #include "mongo/util/string_map.h" -namespace mongo::doc_diff { -namespace { +namespace mongo { +namespace diff_tree { -void assertDiffNonEmpty(const BSONObjIterator& it) { - uassert(4770500, "Expected diff to be non-empty", it.more()); -} - -// Helper for taking a BSONObj and determining whether it's an array diff or an object diff. -doc_diff::DiffType identifyType(const BSONObj& diff) { - BSONObjIterator it(diff); - assertDiffNonEmpty(it); +void InternalNode::ApproxBSONSizeTracker::addEntry(size_t fieldSize, const Node* node) { + _size += fieldSize + 2; /* Type byte + null terminator for field name */ - if ((*it).fieldNameStringData() == kArrayHeader) { - return DiffType::kArray; + switch (node->type()) { + case (NodeType::kArray): + case (NodeType::kDocumentSubDiff): + case (NodeType::kDocumentInsert): { + _size += checked_cast<const InternalNode*>(node)->getObjSize(); + break; + } + case (NodeType::kUpdate): { + if (const auto* elem = + stdx::get_if<BSONElement>(&checked_cast<const UpdateNode*>(node)->elt)) { + _size += elem->valuesize(); + } + break; + } + case (NodeType::kInsert): { + if (const auto* elem = + stdx::get_if<BSONElement>(&checked_cast<const InsertNode*>(node)->elt)) { + _size += elem->valuesize(); + } + break; + } + case (NodeType::kDelete): { + _size += 1 /* boolean value */; + break; + } } - return DiffType::kDocument; } -stdx::variant<DocumentDiffReader, ArrayDiffReader> getReader(const Diff& diff) { - const auto type = identifyType(diff); - if (type == DiffType::kArray) { - return ArrayDiffReader(diff); +Node* DocumentSubDiffNode::addChild(StringData fieldName, std::unique_ptr<Node> node) { + auto* nodePtr = node.get(); + + // Add size of field name and the child element. + sizeTracker.addEntry(fieldName.size(), nodePtr); + + auto result = children.insert({fieldName.toString(), std::move(node)}); + invariant(result.second); + StringData storedFieldName = result.first->first; + switch (nodePtr->type()) { + case (NodeType::kArray): + case (NodeType::kDocumentSubDiff): { + sizeTracker.increment(1 /* kSubDiffSectionFieldPrefix */); + auto internalNode = checked_cast<InternalNode*>(nodePtr); + subDiffs.push_back({storedFieldName, internalNode}); + return nodePtr; + } + case (NodeType::kDocumentInsert): + case (NodeType::kInsert): { + if (inserts.empty()) { + sizeTracker.addSizeForWrapping(); + } + inserts.push_back({storedFieldName, nodePtr}); + return nodePtr; + } + case (NodeType::kDelete): { + if (deletes.empty()) { + sizeTracker.addSizeForWrapping(); + } + deletes.push_back({storedFieldName, checked_cast<DeleteNode*>(nodePtr)}); + return nodePtr; + } + case (NodeType::kUpdate): { + if (updates.empty()) { + sizeTracker.addSizeForWrapping(); + } + updates.push_back({storedFieldName, checked_cast<UpdateNode*>(nodePtr)}); + return nodePtr; + } } - return DocumentDiffReader(diff); + MONGO_UNREACHABLE; } -} // namespace -SubBuilderGuard<DocumentDiffBuilder> ArrayDiffBuilder::startSubObjDiff(size_t idx) { - auto subDiffBuilder = std::make_unique<DocumentDiffBuilder>(0); - DocumentDiffBuilder* subBuilderPtr = subDiffBuilder.get(); - _modifications.push_back({std::to_string(idx), std::move(subDiffBuilder)}); - return SubBuilderGuard<DocumentDiffBuilder>(this, subBuilderPtr); -} -SubBuilderGuard<ArrayDiffBuilder> ArrayDiffBuilder::startSubArrDiff(size_t idx) { - auto subDiffBuilder = std::unique_ptr<ArrayDiffBuilder>(new ArrayDiffBuilder()); - ArrayDiffBuilder* subBuilderPtr = subDiffBuilder.get(); - _modifications.push_back({std::to_string(idx), std::move(subDiffBuilder)}); - return SubBuilderGuard<ArrayDiffBuilder>(this, subBuilderPtr); +namespace { +void appendElementToBuilder(stdx::variant<mutablebson::Element, BSONElement> elem, + StringData fieldName, + BSONObjBuilder* builder) { + stdx::visit( + visit_helper::Overloaded{ + [&](const mutablebson::Element& element) { + if (element.hasValue()) { + builder->appendAs(element.getValue(), fieldName); + } else if (element.getType() == BSONType::Object) { + BSONObjBuilder subBuilder(builder->subobjStart(fieldName)); + element.writeChildrenTo(&subBuilder); + } else { + invariant(element.getType() == BSONType::Array); + BSONArrayBuilder subBuilder(builder->subarrayStart(fieldName)); + element.writeArrayTo(&subBuilder); + } + }, + [&](BSONElement element) { builder->appendAs(element, fieldName); }}, + elem); } -void ArrayDiffBuilder::addUpdate(size_t idx, BSONElement elem) { - auto fieldName = std::to_string(idx); - sizeTracker.addEntry(fieldName.size() + 1 /* kUpdateSectionFieldName */, elem.valuesize()); - _modifications.push_back({std::move(fieldName), elem}); -} +// Construction of the $v:2 diff needs to handle the same number of levels of recursion as the +// maximum permitted BSON depth. In order to avoid the possibility of stack overflow, which has +// been observed in configurations that use small stack sizes(such as 'dbg=on'), we use an explicit +// stack data structure stored on the heap instead. +// +// The Frame class represents one "frame" of this explicit stack. +class Frame { +public: + virtual ~Frame() {} + + // When execute() is called, the Frame may either return a new Frame to be placed on top of the + // stack, or return nullptr, indicating that the frame has finished and can be destroyed. + // + // If a Frame returns a new stack frame, it must be able to pick up where it left off when + // execute() is called on it again. + virtual std::unique_ptr<Frame> execute() = 0; +}; +using UniqueFrame = std::unique_ptr<Frame>; + +// Helper used for creating a new frame from a sub-diff node. Definition depends on some of the +// *Frame constructors. +UniqueFrame makeSubNodeFrameHelper(InternalNode* node, BSONObjBuilder builder); +// Given a 'node' stored in the 'inserts' section of an InternalNode, will either append that +// node's value to the given builder, or return a new stack frame which will build the object to be +// inserted. 'node' must be an InsertionNode or a DocumentInsertNode. +UniqueFrame handleInsertHelper(StringData fieldName, Node* node, BSONObjBuilder* bob); + +// Stack frame used to maintain state while serializing DocumentInsertionNodes. +class DocumentInsertFrame final : public Frame { +public: + DocumentInsertFrame(const DocumentInsertionNode& node, BSONObjBuilder bob) + : _node(node), _bob(std::move(bob)) {} + + UniqueFrame execute() override { + const auto& inserts = _node.getInserts(); + for (; _insertIdx < inserts.size(); ++_insertIdx) { + auto&& [fieldName, child] = inserts[_insertIdx]; + + if (auto newFrame = handleInsertHelper(fieldName, child, &_bob)) { + ++_insertIdx; + return newFrame; + } + } + return nullptr; + } -void ArrayDiffBuilder::serializeTo(BSONObjBuilder* output) const { - output->append(kArrayHeader, true); - if (_newSize) { - output->append(kResizeSectionFieldName, *_newSize); +private: + size_t _insertIdx = 0; + const DocumentInsertionNode& _node; + BSONObjBuilder _bob; +}; + +// Stack frame used to maintain state while serializing DocumentSubDiffNodes. +class DocumentSubDiffFrame final : public Frame { +public: + DocumentSubDiffFrame(const DocumentSubDiffNode& node, BSONObjBuilder bob) + : _node(node), _bob(std::move(bob)) {} + + UniqueFrame execute() override { + if (!_wroteDeletesAndUpdates) { + if (!_node.getDeletes().empty()) { + BSONObjBuilder subBob(_bob.subobjStart(doc_diff::kDeleteSectionFieldName)); + for (auto&& [fieldName, node] : _node.getDeletes()) { + // The deletes are logged in the form {fieldName: false} in $v:2 format. + subBob.append(fieldName, false); + } + } + if (!_node.getUpdates().empty()) { + BSONObjBuilder subBob(_bob.subobjStart(doc_diff::kUpdateSectionFieldName)); + for (auto&& [fieldName, node] : _node.getUpdates()) { + appendElementToBuilder(node->elt, fieldName, &subBob); + } + } + _wroteDeletesAndUpdates = true; + } + + const auto& inserts = _node.getInserts(); + for (; _insertIdx < inserts.size(); ++_insertIdx) { + if (!_insertBob) { + _insertBob.emplace(_bob.subobjStart(doc_diff::kInsertSectionFieldName)); + } + + auto&& [fieldName, child] = inserts[_insertIdx]; + + if (auto newFrame = handleInsertHelper(fieldName, child, _insertBob.get_ptr())) { + ++_insertIdx; + return newFrame; + } + } + + if (_insertBob) { + // All inserts have been written so we destroy the insert builder now. + _insertBob = boost::none; + } + + if (_subDiffIdx != _node.getSubDiffs().size()) { + auto&& [fieldName, child] = _node.getSubDiffs()[_subDiffIdx]; + + BSONObjBuilder childBuilder = + _bob.subobjStart(std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + fieldName); + ++_subDiffIdx; + return makeSubNodeFrameHelper(child, std::move(childBuilder)); + } + + return nullptr; } - for (auto&& modificationEntry : _modifications) { - auto&& idx = modificationEntry.first; - auto&& modification = modificationEntry.second; - stdx::visit( - visit_helper::Overloaded{ - [&idx, output](const std::unique_ptr<DiffBuilderBase>& subDiff) { - BSONObjBuilder subObjBuilder = - output->subobjStart(kSubDiffSectionFieldPrefix + idx); - subDiff->serializeTo(&subObjBuilder); - }, - [&idx, output](BSONElement elt) { - output->appendAs(elt, kUpdateSectionFieldName + idx); - }}, - modification); + BSONObjBuilder& bob() { + return _bob; } -} -void DocumentDiffBuilder::serializeTo(BSONObjBuilder* output) const { - if (!_deletes.empty()) { - BSONObjBuilder subBob(output->subobjStart(kDeleteSectionFieldName)); - for (auto&& del : _deletes) { - subBob.append(del, false); +private: + const DocumentSubDiffNode& _node; + BSONObjBuilder _bob; + + // Indicates whether or not we've written deletes and updates yet. Since deletes and updates + // are always leaf nodes, they are always written in the first call to execute(). + bool _wroteDeletesAndUpdates = false; + + // Keeps track of which insertion or subDiff is being serialized. + size_t _insertIdx = 0; + size_t _subDiffIdx = 0; + + boost::optional<BSONObjBuilder> _insertBob; +}; + +// Stack frame used to maintain state while serializing ArrayNodes. +class ArrayFrame final : public Frame { +public: + ArrayFrame(const ArrayNode& node, BSONObjBuilder bob) + : _node(node), _bob(std::move(bob)), _childIt(node.getChildren().begin()) {} + + UniqueFrame execute() override { + invariant(!_node.getChildren().empty()); + if (_childIt == _node.getChildren().begin()) { + // If this is the first execution of this frame, append array header and resize field if + // present. + _bob.append(doc_diff::kArrayHeader, true); + if (auto size = _node.getResize()) { + _bob.append(doc_diff::kResizeSectionFieldName, static_cast<int>(*size)); + } } - } - if (!_updates.empty()) { - BSONObjBuilder subBob(output->subobjStart(kUpdateSectionFieldName)); - for (auto&& update : _updates) { - subBob.appendAs(update.second, update.first); + for (; _childIt != _node.getChildren().end(); ++_childIt) { + + auto&& [idx, child] = *_childIt; + auto idxAsStr = std::to_string(idx); + + switch (child->type()) { + case (NodeType::kUpdate): { + const auto& valueNode = checked_cast<const UpdateNode&>(*child); + appendElementToBuilder( + valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob); + break; + } + case (NodeType::kInsert): { + const auto& valueNode = checked_cast<const InsertNode&>(*child); + appendElementToBuilder( + valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob); + break; + } + case (NodeType::kDocumentInsert): { + // This represents an array element that is being created with a sub object. + // + // For example {$set: {"a.0.c": 1}} when the input document is {a: []}. Here we + // need to create the array element at '0', then sub document 'c'. + + ++_childIt; + return std::make_unique<DocumentInsertFrame>( + *checked_cast<DocumentInsertionNode*>(child.get()), + BSONObjBuilder( + _bob.subobjStart(doc_diff::kUpdateSectionFieldName + idxAsStr))); + } + case (NodeType::kDocumentSubDiff): + case (NodeType::kArray): { + InternalNode* subNode = checked_cast<InternalNode*>(child.get()); + BSONObjBuilder childBuilder = _bob.subobjStart( + std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + idxAsStr); + + ++_childIt; + return makeSubNodeFrameHelper(subNode, std::move(childBuilder)); + } + case (NodeType::kDelete): { + MONGO_UNREACHABLE; + } + } } + + return nullptr; } - if (!_inserts.empty()) { - BSONObjBuilder subBob(output->subobjStart(kInsertSectionFieldName)); - for (auto&& insert : _inserts) { - subBob.appendAs(insert.second, insert.first); +private: + const ArrayNode& _node; + BSONObjBuilder _bob; + std::map<size_t, std::unique_ptr<Node>>::const_iterator _childIt; +}; + +BSONObj writeDiff(const DocumentSubDiffNode& root) { + std::stack<UniqueFrame> stack; + stack.push(std::make_unique<DocumentSubDiffFrame>(root, BSONObjBuilder{})); + + // Iterate until the stack size is one and there is no more work to be done. + while (true) { + auto nextFrame = stack.top()->execute(); + if (nextFrame) { + stack.push(std::move(nextFrame)); + } else if (stack.size() == 1) { + break; + } else { + stack.pop(); } } - if (!_subDiffs.empty()) { - for (auto&& subDiff : _subDiffs) { - BSONObjBuilder subDiffBuilder( - output->subobjStart(std::string(1, kSubDiffSectionFieldPrefix) + subDiff.first)); - subDiff.second->serializeTo(&subDiffBuilder); - } + invariant(stack.size() == 1); + + auto& topFrame = checked_cast<DocumentSubDiffFrame&>(*stack.top()); + return topFrame.bob().obj(); +} + +UniqueFrame makeSubNodeFrameHelper(InternalNode* node, BSONObjBuilder builder) { + if (node->type() == NodeType::kArray) { + return std::make_unique<ArrayFrame>(*checked_cast<ArrayNode*>(node), std::move(builder)); + } else { + // We never expect to see a DocumentInsertionNode under the 'subDiffs' section of an + // internal node. + invariant(node->type() == NodeType::kDocumentSubDiff); + return std::make_unique<DocumentSubDiffFrame>(*checked_cast<DocumentSubDiffNode*>(node), + std::move(builder)); } } -SubBuilderGuard<DocumentDiffBuilder> DocumentDiffBuilder::startSubObjDiff(StringData field) { - auto subDiffBuilder = std::make_unique<DocumentDiffBuilder>(0); - DocumentDiffBuilder* subBuilderPtr = subDiffBuilder.get(); - _subDiffs.push_back({field, std::move(subDiffBuilder)}); - return SubBuilderGuard<DocumentDiffBuilder>(this, subBuilderPtr); +UniqueFrame handleInsertHelper(StringData fieldName, Node* node, BSONObjBuilder* bob) { + if (node->type() == NodeType::kInsert) { + appendElementToBuilder(checked_cast<InsertNode*>(node)->elt, fieldName, bob); + return nullptr; + } + invariant(node->type() == NodeType::kDocumentInsert); + return std::make_unique<DocumentInsertFrame>(*checked_cast<DocumentInsertionNode*>(node), + BSONObjBuilder(bob->subobjStart(fieldName))); } -SubBuilderGuard<ArrayDiffBuilder> DocumentDiffBuilder::startSubArrDiff(StringData field) { - auto subDiffBuilder = std::unique_ptr<ArrayDiffBuilder>(new ArrayDiffBuilder()); - ArrayDiffBuilder* subBuilderPtr = subDiffBuilder.get(); - _subDiffs.push_back({field, std::move(subDiffBuilder)}); - return SubBuilderGuard<ArrayDiffBuilder>(this, subBuilderPtr); + +} // namespace + +BSONObj DocumentSubDiffNode::serialize() const { + return writeDiff(*this); } +} // namespace diff_tree +namespace doc_diff { namespace { + +void assertDiffNonEmpty(const BSONObjIterator& it) { + uassert(4770500, "Expected diff to be non-empty", it.more()); +} + +// Helper for taking a BSONObj and determining whether it's an array diff or an object diff. +doc_diff::DiffType identifyType(const BSONObj& diff) { + BSONObjIterator it(diff); + assertDiffNonEmpty(it); + + if ((*it).fieldNameStringData() == kArrayHeader) { + return DiffType::kArray; + } + return DiffType::kDocument; +} + +stdx::variant<DocumentDiffReader, ArrayDiffReader> getReader(const Diff& diff) { + const auto type = identifyType(diff); + if (type == DiffType::kArray) { + return ArrayDiffReader(diff); + } + return DocumentDiffReader(diff); +} + void checkSection(BSONElement element, char sectionName, BSONType expectedType) { uassert(4770507, str::stream() << "Expected " << sectionName << " section to be type " << expectedType, element.type() == expectedType); } + +// Converts a (decimal) string to number. Will throw if the string is not a valid unsigned int. +size_t extractArrayIndex(StringData fieldName) { + auto idx = str::parseUnsignedBase10Integer(fieldName); + uassert(4770512, str::stream() << "Expected integer but got " << fieldName, idx); + return *idx; +} } // namespace ArrayDiffReader::ArrayDiffReader(const Diff& diff) : _diff(diff), _it(_diff) { @@ -177,15 +445,6 @@ ArrayDiffReader::ArrayDiffReader(const Diff& diff) : _diff(diff), _it(_diff) { } } -namespace { -// Converts a (decimal) string to number. Will throw if the string is not a valid unsigned int. -size_t extractArrayIndex(StringData fieldName) { - auto idx = str::parseUnsignedBase10Integer(fieldName); - uassert(4770512, str::stream() << "Expected integer but got " << fieldName, idx); - return *idx; -} -} // namespace - boost::optional<std::pair<size_t, ArrayDiffReader::ArrayModification>> ArrayDiffReader::next() { if (!_it.more()) { return {}; @@ -316,4 +575,5 @@ DocumentDiffReader::nextSubDiff() { return {{fieldName.substr(1, fieldName.size()), getReader(next.embeddedObject())}}; } -} // namespace mongo::doc_diff +} // namespace doc_diff +} // namespace mongo diff --git a/src/mongo/db/update/document_diff_serialization.h b/src/mongo/db/update/document_diff_serialization.h index 19f1a3f2e9c..9d54825298b 100644 --- a/src/mongo/db/update/document_diff_serialization.h +++ b/src/mongo/db/update/document_diff_serialization.h @@ -29,8 +29,12 @@ #pragma once +#include "mongo/base/checked_cast.h" +#include "mongo/bson/bsonobj.h" #include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/mutable/document.h" #include "mongo/stdx/variant.h" +#include "mongo/util/string_map.h" #include "mongo/util/visit_helper.h" // This file contains classes for serializing document diffs to a format that can be stored in the @@ -60,308 +64,328 @@ constexpr size_t kSizeOfEmptyDocumentDiffBuilder = 5; // Size of empty object(5) + kArrayHeader(1) + null terminator + type byte + bool size. constexpr size_t kSizeOfEmptyArrayDiffBuilder = 9; -/** - * Internal helper class for BSON size tracking to be used by the DiffBuilder classes. +/* + * The below classes are for reading diffs. When given an invalid/malformed diff they will uassert. */ -struct ApproxBSONSizeTracker { - ApproxBSONSizeTracker(size_t initialSize) : _size(initialSize) {} +class DocumentDiffReader; - void addEntry(size_t fieldSize, size_t valueSize, bool needToCreateWrappingObject = false) { - if (needToCreateWrappingObject) { - // Type byte(1) + FieldName(1) + Null terminator(1) + empty BSON object size (5) - _size += 8; - } - _size += fieldSize + valueSize + 2 /* Type byte + null terminator for field name */; - } +class ArrayDiffReader { +public: + using ArrayModification = stdx::variant<BSONElement, DocumentDiffReader, ArrayDiffReader>; + + explicit ArrayDiffReader(const Diff& diff); - size_t getSize() const { - return _size; + /** + * Methods which return the next modification (if any) and advance the iterator. Returns + * boost::none if there are no remaining modifications. Otherwise, returns a pair. The first + * member of the pair is the index of the modification. The second part is the modification + * itself. + * + * If the second part of the pair contains a BSONElement, it means there is an update at that + * index. + * + * If the second part contains a DocumentDiffReader or ArrayDiffReader, it means there is a + * subdiff at that index. + */ + boost::optional<std::pair<size_t, ArrayModification>> next(); + + /** + * Returns the size of the post image array. + */ + boost::optional<size_t> newSize() { + return _newSize; } private: - size_t _size; -}; + Diff _diff; + BSONObjIterator _it; -template <class DiffBuilder> -class SubBuilderGuard; -class DocumentDiffBuilder; -class ArrayDiffBuilder; + boost::optional<size_t> _newSize; +}; -// Not meant to be used elsewhere. -class DiffBuilderBase { +class DocumentDiffReader { public: - DiffBuilderBase(size_t size) : sizeTracker(size){}; - virtual ~DiffBuilderBase(){}; - - // Serializes the diff to 'out'. - virtual void serializeTo(BSONObjBuilder* out) const = 0; - size_t getObjSize() const { - return sizeTracker.getSize(); - } + DocumentDiffReader(const Diff& diff); -protected: - ApproxBSONSizeTracker sizeTracker; + /** + * The below methods get the next type of modification (if any) and advance the iterator. + */ + boost::optional<StringData> nextDelete(); + boost::optional<BSONElement> nextUpdate(); + boost::optional<BSONElement> nextInsert(); + boost::optional<std::pair<StringData, stdx::variant<DocumentDiffReader, ArrayDiffReader>>> + nextSubDiff(); private: - // When the current child builder is abandoned, we needs to call this to indicate that the - // DiffBuilder should release any state it was holding for the current child. After calling - // this the builder will *not* add anything to the child builder. - virtual void abandonChild() = 0; - - // When the current child builder is finished, we needs to call this to indicate that the - // DiffBuilder should stop accepting further modifications to the current child. After calling - // this the builder will *not* add anything to the child builder. - virtual void finishChild() = 0; - - friend class SubBuilderGuard<ArrayDiffBuilder>; - friend class SubBuilderGuard<DocumentDiffBuilder>; + Diff _diff; + + boost::optional<BSONObjIterator> _deletes; + boost::optional<BSONObjIterator> _inserts; + boost::optional<BSONObjIterator> _updates; + boost::optional<BSONObjIterator> _subDiffs; }; +} // namespace doc_diff + +namespace diff_tree { +/** + * These are structs for a "diff tree" that is constructed while the update is applied. There are + * two types of internal nodes: Document nodes and Array nodes. All other node types are always + * leaves. + * + * When the update is complete, the diff tree is converted into a $v: 2 oplog entry. + */ +enum class NodeType { kDocumentSubDiff, kDocumentInsert, kArray, kDelete, kUpdate, kInsert }; /** - * An RAII type class which provides access to a sub-diff builder. This can be used to let the - * parent builder know what to do with the current builder resource, i.e, either commit or destroy. - * Note that this class is not directly responsible for destroying the diff builders. + * Base class to represents a node in the diff tree. */ -template <class DiffBuilder> -class SubBuilderGuard final { -public: - SubBuilderGuard(DiffBuilderBase* parent, DiffBuilder* builder) - : _parent(parent), _builder(builder) {} - ~SubBuilderGuard() { - if (_parent) { - _parent->finishChild(); - } - } +struct Node { + virtual ~Node(){}; + virtual NodeType type() const = 0; +}; - void abandon() { - _parent->abandonChild(); - _parent = nullptr; - _builder = nullptr; - } +/** + * This class represents insertion of a BSONElement or mutablebson Element. Note that + * 'DocumentInsertionNode' also repesent an insert for the cases where an object is created + * implicity. + */ +struct InsertNode : public Node { + InsertNode(mutablebson::Element el) : elt(el) {} + InsertNode(BSONElement el) : elt(el) {} - DiffBuilder* builder() { - return _builder; + NodeType type() const override { + return NodeType::kInsert; } - -private: - // Both pointers are unowned. - DiffBuilderBase* _parent; - DiffBuilder* _builder; + stdx::variant<mutablebson::Element, BSONElement> elt; }; /** - * Class for building array diffs. The diff builder does not take ownership of the BSONElement - * passed. It is the responsibility of the caller to ensure that data stays in scope until - * serializeTo() is called. + * Structure to represent a field update node. */ -class ArrayDiffBuilder final : public DiffBuilderBase { -public: - using ArrayModification = stdx::variant<BSONElement, std::unique_ptr<DiffBuilderBase>>; - ~ArrayDiffBuilder() {} +struct UpdateNode : public Node { + UpdateNode(mutablebson::Element el) : elt(el) {} + UpdateNode(BSONElement el) : elt(el) {} - /** - * Sets the new size of the array. Should only be used for array truncations. - */ - void setResize(uint32_t index) { - invariant(!_newSize); - // BSON only has signed 4 byte integers. The new size must fit into that type. - invariant(index <= static_cast<uint32_t>(std::numeric_limits<int32_t>::max())); - _newSize = static_cast<int32_t>(index); - sizeTracker.addEntry(1 /* kResizeSectionFieldName */, sizeof(uint32_t) /* size of value */); + NodeType type() const override { + return NodeType::kUpdate; } + stdx::variant<mutablebson::Element, BSONElement> elt; +}; - /* - * Below are functions for adding pieces of the diff. These functions must be called no more - * than once for a given array index. They make no attempt to check for duplicates. - */ - - /** - * Adds an array index to be updated with a new value. Must be called with ascending values for - * 'idx'. For example, you cannot call this with idx = 5 and then call it again with idx = 4. - */ - void addUpdate(size_t idx, BSONElement newValue); +/** + * Structure to represent a field delete node. + */ +struct DeleteNode : public Node { + NodeType type() const override { + return NodeType::kDelete; + } +}; +/** + * Struct representing non-leaf node. + */ +struct InternalNode : public Node { +public: /** - * Starts a sub-diff object and returns an unowned pointer to the sub-diff builder. The client - * should not try to destroy the object. + * Internal helper class for BSON size tracking of the diff to be generated. */ - SubBuilderGuard<DocumentDiffBuilder> startSubObjDiff(size_t idx); - SubBuilderGuard<ArrayDiffBuilder> startSubArrDiff(size_t idx); + struct ApproxBSONSizeTracker { + ApproxBSONSizeTracker(size_t initialSize) : _size(initialSize) {} - // Dumps the diff into the given builder. - void serializeTo(BSONObjBuilder* out) const; + void addEntry(size_t fieldSize, const Node* node); -private: - // The top-level of a diff is never an array diff. One can only construct an ArrayDiffBuilder - // from a parent DocumentDiffBuilder. - explicit ArrayDiffBuilder() : DiffBuilderBase(kSizeOfEmptyArrayDiffBuilder) {} + void addSizeForWrapping() { + // Type byte(1) + FieldName(1) + Null terminator(1) + empty BSON object size (5) + _size += 8; + } - void abandonChild() override { - invariant(!_modifications.empty()); - _modifications.pop_back(); - } + void increment(size_t size) { + _size += size; + } - void finishChild() override { - invariant(!_modifications.empty()); - sizeTracker.addEntry( - _modifications.back().first.size() + 1 /* kSubDiffSectionFieldPrefix */, - stdx::get<std::unique_ptr<DiffBuilderBase>>(_modifications.back().second) - ->getObjSize()); - } + size_t getSize() const { + return _size; + } - // Each element is either an update (BSONElement) or a sub diff. - std::vector<std::pair<std::string, ArrayModification>> _modifications; + private: + size_t _size; + }; + InternalNode(size_t size = 0) : sizeTracker(size){}; - boost::optional<int32_t> _newSize; + // Returns an unowned pointer to the newly added child. + virtual Node* addChild(StringData fieldName, std::unique_ptr<Node> node) = 0; + virtual Node* getChild(StringData fieldName) const = 0; + size_t getObjSize() const { + return sizeTracker.getSize(); + } - friend class DocumentDiffBuilder; -}; // namespace doc_diff +protected: + ApproxBSONSizeTracker sizeTracker; +}; /** - * Class for building document diffs. The diff builder does not take ownership of either the field - * name or the BSONElement passed. It is the responsibility of the caller to ensure that data stays - * in scope until serialize() is called. + * Indicates a Document internal node which is already in the pre-image document. */ -class DocumentDiffBuilder final : public DiffBuilderBase { +class DocumentSubDiffNode : public InternalNode { public: - /** - * The 'padding' is the additional size that needs to be added to the diff builder while - * tracking the overall size of the diff. - */ - DocumentDiffBuilder(size_t padding = 0) - : DiffBuilderBase(kSizeOfEmptyDocumentDiffBuilder + padding) {} - - /** - * Produces an owned BSONObj representing the diff. - */ - Diff serialize() const { - BSONObjBuilder bob; - serializeTo(&bob); - return bob.obj(); - } + template <typename E> + using ModificationEntries = std::vector<std::pair<StringData, E>>; - /** - * Similar to serialize() but using an existing BSONObjBuilder. - */ - void serializeTo(BSONObjBuilder* out) const; + DocumentSubDiffNode(size_t size = 0) + : InternalNode(size + doc_diff::kSizeOfEmptyDocumentDiffBuilder){}; - /** - * Functions for adding pieces of the diff. These functions must be called no more than once - * for a given field. They make no attempt to check for duplicates. - */ - void addDelete(StringData field) { - // Add the size of 'field' + 'value'. - sizeTracker.addEntry(field.size(), sizeof(char), _deletes.empty()); + Node* addChild(StringData fieldName, std::unique_ptr<Node> node) override; - _deletes.push_back(field); + Node* getChild(StringData fieldName) const override { + auto it = children.find(fieldName); + return (it != children.end()) ? it->second.get() : nullptr; } - void addUpdate(StringData field, BSONElement value) { - // Add the size of 'field' + 'value'. - sizeTracker.addEntry(field.size(), value.valuesize(), _updates.empty()); - _updates.push_back({field, value}); + void addUpdate(StringData fieldName, BSONElement value) { + addChild(fieldName, std::make_unique<UpdateNode>(value)); } - void addInsert(StringData field, BSONElement value) { - // Add the size of 'field' + 'value'. - sizeTracker.addEntry(field.size(), value.valuesize(), _inserts.empty()); - - _inserts.push_back({field, value}); + void addInsert(StringData fieldName, BSONElement value) { + addChild(fieldName, std::make_unique<InsertNode>(value)); + } + void addDelete(StringData fieldName) { + addChild(fieldName, std::make_unique<DeleteNode>()); + } + NodeType type() const override { + return NodeType::kDocumentSubDiff; } - /** - * Methods for starting sub diffs. Must not be called more than once for a given field. The - * contents of the StringData passed in must live for the entire duration of the sub-builder's - * life. Returns an unowned pointer to the sub-diff builder. The client should not try to - * destroy the object. - */ - SubBuilderGuard<DocumentDiffBuilder> startSubObjDiff(StringData field); - SubBuilderGuard<ArrayDiffBuilder> startSubArrDiff(StringData field); + BSONObj serialize() const; -private: - void abandonChild() override { - invariant(!_subDiffs.empty()); - _subDiffs.pop_back(); + const ModificationEntries<UpdateNode*>& getUpdates() const { + return updates; + } + const ModificationEntries<DeleteNode*>& getDeletes() const { + return deletes; + } + const ModificationEntries<Node*>& getInserts() const { + return inserts; } + const ModificationEntries<InternalNode*>& getSubDiffs() const { + return subDiffs; + } + +private: + // We store the raw pointer to each of the child node so that we don't have to look up in + // 'children' map every time. Note that the field names of these modifications will reference + // the field name stored in 'children'. The node objects also point to the value of 'children' + // map, where they are owned. + ModificationEntries<UpdateNode*> updates; + ModificationEntries<DeleteNode*> deletes; + ModificationEntries<Node*> inserts; + ModificationEntries<InternalNode*> subDiffs; + + // We use absl::node_hash_map here for pointer stability on keys (field names) when a rehash + // happens. + absl::node_hash_map<std::string, std::unique_ptr<Node>, StringMapHasher, StringMapEq> children; +}; + +/** + * Indicates that the document this node represents was created as part of the update. + * + * E.g. applying the update {$set: {"a.b.c": "foo"}} on document {} will create sub-documents + * at paths "a" and "a.b". + */ +class DocumentInsertionNode : public InternalNode { +public: + DocumentInsertionNode() : InternalNode(0){}; - void finishChild() override { - invariant(!_subDiffs.empty()); + Node* addChild(StringData fieldName, std::unique_ptr<Node> node) override { + invariant(node->type() == NodeType::kInsert || node->type() == NodeType::kDocumentInsert); - // Add the size of 'field' + 'value'. - sizeTracker.addEntry(1 /*kSubDiffSectionFieldPrefix */ + _subDiffs.back().first.size(), - _subDiffs.back().second->getObjSize(), - false); + auto* nodePtr = node.get(); + auto result = children.insert({fieldName.toString(), std::move(node)}); + invariant(result.second); + inserts.push_back({result.first->first, nodePtr}); + return nodePtr; } - std::vector<std::pair<StringData, BSONElement>> _updates; - std::vector<std::pair<StringData, BSONElement>> _inserts; - std::vector<StringData> _deletes; - std::vector<std::pair<StringData, std::unique_ptr<DiffBuilderBase>>> _subDiffs; + Node* getChild(StringData fieldName) const override { + auto it = children.find(fieldName); + return (it != children.end()) ? it->second.get() : nullptr; + } + + NodeType type() const override { + return NodeType::kDocumentInsert; + } - // If there is an outstanding child diff builder, its field is stored here. - boost::optional<std::string> _childSubDiffField; + const std::vector<std::pair<StringData, Node*>>& getInserts() const { + return inserts; + } - friend class ArrayDiffBuilder; +private: + // We store the raw pointer to each of the child node so that we don't have to look up in + // 'children' map every time. Note that the field names of these inserts will reference + // the field name stored in 'children'. The node objects also point to the value of 'children' + // map, where they are owned. + std::vector<std::pair<StringData, Node*>> inserts; + + // We use absl::node_hash_map here for pointer stability on keys (field names) when a rehash + // happens. + absl::node_hash_map<std::string, std::unique_ptr<Node>, StringMapHasher, StringMapEq> children; }; -/* - * The below classes are for reading diffs. When given an invalid/malformed diff they will uassert. +/** + * Class representing an array node. */ -class DocumentDiffReader; - -class ArrayDiffReader { +class ArrayNode : public InternalNode { public: - using ArrayModification = stdx::variant<BSONElement, DocumentDiffReader, ArrayDiffReader>; + ArrayNode() : InternalNode(doc_diff::kSizeOfEmptyArrayDiffBuilder){}; - explicit ArrayDiffReader(const Diff& diff); + Node* addChild(size_t idx, std::unique_ptr<Node> node) { + sizeTracker.addEntry( + 1 /* modification type */ + 1 + + (idx ? static_cast<int>(std::log10(idx)) : 0) /* Count the number of digits */, + node.get()); + auto itr = children.insert({idx, std::move(node)}); + invariant(itr.second); + return itr.first->second.get(); + } - /** - * Methods which return the next modification (if any) and advance the iterator. Returns - * boost::none if there are no remaining modifications. Otherwise, returns a pair. The first - * member of the pair is the index of the modification. The second part is the modification - * itself. - * - * If the second part of the pair contains a BSONElement, it means there is an update at that - * index. - * - * If the second part contains a DocumentDiffReader or ArrayDiffReader, it means there is a - * subdiff at that index. - */ - boost::optional<std::pair<size_t, ArrayModification>> next(); + Node* addChild(StringData fieldName, std::unique_ptr<Node> node) override { + auto idx = str::parseUnsignedBase10Integer(fieldName); + invariant(idx); + return addChild(*idx, std::move(node)); + } - /** - * Returns the size of the post image array. - */ - boost::optional<size_t> newSize() { - return _newSize; + void addUpdate(size_t idx, BSONElement value) { + addChild(idx, std::make_unique<UpdateNode>(value)); } -private: - Diff _diff; - BSONObjIterator _it; + virtual Node* getChild(StringData fieldName) const override { + auto idx = str::parseUnsignedBase10Integer(fieldName); + invariant(idx); + auto it = children.find(*idx); + return (it != children.end()) ? it->second.get() : nullptr; + } - boost::optional<size_t> _newSize; -}; + void setResize(size_t size) { + resize = size; + sizeTracker.increment(1 /* kResizeSectionFieldName */ + + sizeof(uint32_t) /* size of value */ + 2); + } -class DocumentDiffReader { -public: - DocumentDiffReader(const Diff& diff); + NodeType type() const override { + return NodeType::kArray; + } - /** - * The below methods get the next type of modification (if any) and advance the iterator. - */ - boost::optional<StringData> nextDelete(); - boost::optional<BSONElement> nextUpdate(); - boost::optional<BSONElement> nextInsert(); - boost::optional<std::pair<StringData, stdx::variant<DocumentDiffReader, ArrayDiffReader>>> - nextSubDiff(); + const std::map<size_t, std::unique_ptr<Node>>& getChildren() const { + return children; + } + const boost::optional<size_t>& getResize() const { + return resize; + } private: - Diff _diff; - - boost::optional<BSONObjIterator> _deletes; - boost::optional<BSONObjIterator> _inserts; - boost::optional<BSONObjIterator> _updates; - boost::optional<BSONObjIterator> _subDiffs; + // The ordering of this map is significant. We are expected to serialize array indexes in + // numeric ascending order (as opposed to "stringified" order where "11" < "8"). + std::map<size_t, std::unique_ptr<Node>> children; + boost::optional<size_t> resize; }; -} // namespace doc_diff + +} // namespace diff_tree } // namespace mongo diff --git a/src/mongo/db/update/document_diff_serialization_test.cpp b/src/mongo/db/update/document_diff_serialization_test.cpp index c1107a1de32..bcfde9d4ff6 100644 --- a/src/mongo/db/update/document_diff_serialization_test.cpp +++ b/src/mongo/db/update/document_diff_serialization_test.cpp @@ -51,15 +51,15 @@ BSONElement withFieldName(BSONElement elem, StringData fieldName) { } TEST(DiffSerializationTest, DeleteSimple) { - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; StringData fieldName1 = "f1"; StringData fieldName2 = "f2"; StringData fieldName3 = "f3"; - builder.addDelete(fieldName1); - builder.addDelete(fieldName2); - builder.addDelete(fieldName3); + diffNode.addDelete(fieldName1); + diffNode.addDelete(fieldName2); + diffNode.addDelete(fieldName3); - auto out = builder.serialize(); + auto out = diffNode.serialize(); ASSERT_BSONOBJ_BINARY_EQ(fromjson("{d: {f1: false, f2: false, f3: false}}"), out); DocumentDiffReader reader(out); @@ -77,11 +77,11 @@ TEST(DiffSerializationTest, InsertSimple) { const BSONObj kDummyObj(BSON("a" << 1 << "b" << "foo")); - DocumentDiffBuilder builder; - builder.addInsert("f1", kDummyObj["a"]); - builder.addInsert("f2", kDummyObj["b"]); + diff_tree::DocumentSubDiffNode diffNode; + diffNode.addInsert("f1", kDummyObj["a"]); + diffNode.addInsert("f2", kDummyObj["b"]); - auto out = builder.serialize(); + auto out = diffNode.serialize(); ASSERT_BSONOBJ_BINARY_EQ(out, fromjson("{'i': {f1: 1, f2: 'foo' }}")); DocumentDiffReader reader(out); @@ -98,11 +98,11 @@ TEST(DiffSerializationTest, UpdateSimple) { const BSONObj kDummyObj(BSON("a" << 1 << "b" << "foo")); - DocumentDiffBuilder builder; - builder.addUpdate("f1", kDummyObj["a"]); - builder.addUpdate("f2", kDummyObj["b"]); + diff_tree::DocumentSubDiffNode diffNode; + diffNode.addUpdate("f1", kDummyObj["a"]); + diffNode.addUpdate("f2", kDummyObj["b"]); - auto out = builder.serialize(); + auto out = diffNode.serialize(); ASSERT_BSONOBJ_BINARY_EQ(out, fromjson("{'u': { f1: 1, f2: 'foo'}}")); DocumentDiffReader reader(out); @@ -119,16 +119,14 @@ TEST(DiffSerializationTest, SubDiff) { const BSONObj kDummyObj(BSON("a" << 1 << "b" << "foo")); - DocumentDiffBuilder builder; - { - auto subBuilderGuard = builder.startSubObjDiff("obj"); - - subBuilderGuard.builder()->addInsert("iField", kDummyObj["a"]); - subBuilderGuard.builder()->addUpdate("uField", kDummyObj["b"]); - subBuilderGuard.builder()->addDelete("dField"); - } + diff_tree::DocumentSubDiffNode diffNode; + auto subDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); - auto out = builder.serialize(); + subDiffNode->addInsert("iField", kDummyObj["a"]); + subDiffNode->addUpdate("uField", kDummyObj["b"]); + subDiffNode->addDelete("dField"); + diffNode.addChild("obj", std::move(subDiffNode)); + auto out = diffNode.serialize(); ASSERT_BSONOBJ_BINARY_EQ( out, fromjson("{sobj: {d : { dField: false }, u : { uField: 'foo' }, i : { iField: 1}}}")); @@ -159,20 +157,22 @@ TEST(DiffSerializationTest, SubArrayWithSubDiff) { const BSONObj kDummyObj(BSON("a" << 1 << "b" << BSON("foo" << "bar"))); - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubArrDiff("arr"); - subBuilderGuard.builder()->addUpdate(0, kDummyObj["b"]); + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->addUpdate(0, kDummyObj["b"]); { - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(2); - subSubBuilderGuard.builder()->addDelete("dField"); + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subSubDiffNode->addDelete("dField"); + subDiffNode->addChild(2, std::move(subSubDiffNode)); } - subBuilderGuard.builder()->addUpdate(5, kDummyObj["a"]); - subBuilderGuard.builder()->setResize(6); + subDiffNode->addUpdate(5, kDummyObj["a"]); + subDiffNode->setResize(6); + diffNode.addChild("arr", std::move(subDiffNode)); } - auto out = builder.serialize(); + auto out = diffNode.serialize(); ASSERT_BSONOBJ_BINARY_EQ(out, fromjson("{sarr: {a: true, l: 6," "'u0': {foo: 'bar'}, " @@ -224,31 +224,32 @@ TEST(DiffSerializationTest, SubArrayWithSubDiff) { TEST(DiffSerializationTest, SubArrayNestedObject) { BSONObj storage(BSON("a" << 1 << "b" << 2 << "c" << 3)); - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subArrBuilderGuard = builder.startSubArrDiff("subArray"); + auto subArrDiffNode = std::make_unique<diff_tree::ArrayNode>(); { { - auto subBuilderGuard = subArrBuilderGuard.builder()->startSubObjDiff(1); - subBuilderGuard.builder()->addUpdate(storage["a"].fieldNameStringData(), - storage["a"]); + auto subDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subDiffNode->addUpdate(storage["a"].fieldNameStringData(), storage["a"]); + subArrDiffNode->addChild(1, std::move(subDiffNode)); } { - auto subBuilderGuard = subArrBuilderGuard.builder()->startSubObjDiff(2); - subBuilderGuard.builder()->addUpdate(storage["b"].fieldNameStringData(), - storage["b"]); + auto subDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subDiffNode->addUpdate(storage["b"].fieldNameStringData(), storage["b"]); + subArrDiffNode->addChild(2, std::move(subDiffNode)); } { - auto subBuilderGuard = subArrBuilderGuard.builder()->startSubObjDiff(3); - subBuilderGuard.builder()->addUpdate(storage["c"].fieldNameStringData(), - storage["c"]); + auto subDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subDiffNode->addUpdate(storage["c"].fieldNameStringData(), storage["c"]); + subArrDiffNode->addChild(3, std::move(subDiffNode)); } } + diffNode.addChild("subArray", std::move(subArrDiffNode)); } - const auto out = builder.serialize(); + const auto out = diffNode.serialize(); ASSERT_BSONOBJ_BINARY_EQ( out, fromjson( @@ -258,14 +259,15 @@ TEST(DiffSerializationTest, SubArrayNestedObject) { TEST(DiffSerializationTest, SubArrayHighIndex) { const BSONObj kDummyObj(BSON("a" << 1 << "b" << "foo")); - DocumentDiffBuilder builder; + diff_tree::DocumentSubDiffNode diffNode; { - auto subBuilderGuard = builder.startSubArrDiff("subArray"); - subBuilderGuard.builder()->addUpdate(254, kDummyObj["b"]); + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->addUpdate(254, kDummyObj["b"]); + diffNode.addChild("subArray", std::move(subDiffNode)); } - auto out = builder.serialize(); + auto out = diffNode.serialize(); ASSERT_BSONOBJ_BINARY_EQ(out, fromjson("{ssubArray: {a: true, 'u254': 'foo'}}")); DocumentDiffReader reader(out); @@ -290,63 +292,8 @@ TEST(DiffSerializationTest, SubArrayHighIndex) { } } -TEST(DiffSerializationTest, SubDiffAbandon) { - const BSONObj kDummyObj(BSON("a" << 1)); - - DocumentDiffBuilder builder; - builder.addDelete("dField1"); - builder.addUpdate("uField", kDummyObj["a"]); - - { - - auto subBuilderGuard = builder.startSubObjDiff("obj"); - subBuilderGuard.builder()->addDelete("dField3"); - subBuilderGuard.abandon(); - } - - // Make sure that after we abandon something we can still use the parent builder. - builder.addDelete("dField2"); - - { - auto subBuilderGuard = builder.startSubObjDiff("obj2"); - subBuilderGuard.builder()->addDelete("dField2"); - } - - auto out = builder.serialize(); - ASSERT_BSONOBJ_BINARY_EQ( - out, - fromjson("{d: {dField1: false, dField2: false}, u: {uField: 1}, sobj2: {d: " - "{dField2: false}}}")); -} - -TEST(DiffSerializationTest, SubArrayDiffAbandon) { - const BSONObj kDummyObj(BSON("a" << 1)); - - DocumentDiffBuilder builder; - builder.addDelete("dField1"); - builder.addUpdate("uField", kDummyObj["a"]); - - { - auto subBuilderGuard = builder.startSubArrDiff("subArray"); - subBuilderGuard.builder()->addUpdate(4, kDummyObj.firstElement()); - subBuilderGuard.abandon(); - } - - // Make sure that after we abandon something we can still use the parent builder. - builder.addDelete("dField2"); - - { - auto subBuilderGuard = builder.startSubArrDiff("arr2"); - subBuilderGuard.builder()->setResize(5); - } - auto out = builder.serialize(); - ASSERT_BSONOBJ_BINARY_EQ( - out, - fromjson("{d : {dField1: false, dField2: false}, u: {uField: 1}, sarr2: {a: true, l: 5}}")); -} - TEST(DiffSerializationTest, ValidateComputeApproxSize) { - const size_t padding = 20; + const size_t padding = 25; const auto storage = BSON("num" << 4 << "str" << "val" << "emptyStr" @@ -358,67 +305,63 @@ TEST(DiffSerializationTest, ValidateComputeApproxSize) { << BSON("" << "update")); - DocumentDiffBuilder builder(padding); - builder.addDelete("deleteField"); - builder.addInsert("insert", storage["num"]); - builder.addUpdate("update1", storage["subObj"]); - builder.addDelete(""); + diff_tree::DocumentSubDiffNode diffNode(padding); + diffNode.addDelete("deleteField"); + diffNode.addInsert("insert", storage["num"]); + diffNode.addUpdate("update1", storage["subObj"]); + diffNode.addDelete(""); + + // Ensure size of the sub-array diff is included. + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->setResize(5); + subDiffNode->addUpdate(2, storage["num"]); + subDiffNode->addUpdate(3, storage["str"]); { - // Ensure size of the sub-array diff is included. - auto subBuilderGuard = builder.startSubArrDiff("subArray"); - subBuilderGuard.builder()->setResize(5); - subBuilderGuard.builder()->addUpdate(2, storage["num"]); - subBuilderGuard.builder()->addUpdate(2, storage["str"]); - { - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(2); - subSubBuilderGuard.builder()->addInsert("insert2", storage["emptyStr"]); - subSubBuilderGuard.builder()->addUpdate("update3", storage["null"]); - } - { - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(234); - subSubBuilderGuard.builder()->addInsert("subObj", storage["subObj"]); - subSubBuilderGuard.builder()->addUpdate("update3", storage["null"]); - } - { - auto subSubArrayBuilderGuard = subBuilderGuard.builder()->startSubArrDiff(2456); - subSubArrayBuilderGuard.builder()->addUpdate(0, storage["num"]); - subSubArrayBuilderGuard.abandon(); - } - { - auto subSubArrayBuilderGuard = subBuilderGuard.builder()->startSubArrDiff(2456); - subSubArrayBuilderGuard.builder()->setResize(10000); - subSubArrayBuilderGuard.builder()->addUpdate(0, storage["array"]); - } + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subSubDiffNode->addInsert("insert2", storage["emptyStr"]); + subSubDiffNode->addUpdate("update3", storage["null"]); + subDiffNode->addChild(5, std::move(subSubDiffNode)); } { - auto subArrayBuilderGuard = builder.startSubArrDiff("subArray2"); - subArrayBuilderGuard.builder()->addUpdate(2, storage["str"]); + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subSubDiffNode->addInsert("subObj", storage["subObj"]); + subSubDiffNode->addUpdate("update3", storage["null"]); + subDiffNode->addChild(234, std::move(subSubDiffNode)); } { - // Ensure size of the sub-object diff is included. - auto subBuilderGuard = builder.startSubObjDiff("subObj"); - subBuilderGuard.builder()->addUpdate("setArray", storage["array"]); + auto subSubArrayBuilder = std::make_unique<diff_tree::ArrayNode>(); + subSubArrayBuilder->setResize(10000); + subSubArrayBuilder->addUpdate(0, storage["array"]); + subDiffNode->addChild(2456, std::move(subSubArrayBuilder)); } { - // Ensure size of the abandoned sub-object diff is non included. - auto subBuilderGuard = builder.startSubObjDiff("subObj1"); - subBuilderGuard.builder()->addUpdate("setArray2", storage["array"]); - subBuilderGuard.abandon(); + auto subArrayBuilder = std::make_unique<diff_tree::ArrayNode>(); + + subArrayBuilder->addUpdate(2, storage["str"]); + diffNode.addChild("subArray2", std::move(subArrayBuilder)); + } + { + // Ensure size of the sub-object diff is included. + auto subsubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + subsubDiffNode->addUpdate("setArray", storage["array"]); + diffNode.addChild("subObj", std::move(subsubDiffNode)); } + diffNode.addChild("subArray", std::move(subDiffNode)); // Update with a sub-object. - builder.addUpdate("update2", storage["subObj"]); + diffNode.addUpdate("update2", storage["subObj"]); - auto computedSize = builder.getObjSize(); - auto out = builder.serialize(); + auto computedSize = diffNode.getObjSize(); + auto out = diffNode.serialize(); ASSERT_EQ(computedSize, out.objsize() + padding); } TEST(DiffSerializationTest, ExecptionsWhileDiffBuildingDoesNotLeakMemory) { try { - DocumentDiffBuilder builder; - auto subBuilderGuard = builder.startSubArrDiff("subArray"); - subBuilderGuard.builder()->setResize(4); - auto subSubBuilderGuard = subBuilderGuard.builder()->startSubObjDiff(5); + diff_tree::DocumentSubDiffNode diffNode; + auto subDiffNode = std::make_unique<diff_tree::ArrayNode>(); + subDiffNode->setResize(4); + auto subSubDiffNode = std::make_unique<diff_tree::DocumentSubDiffNode>(); + diffNode.addChild("asdf", std::move(subSubDiffNode)); throw std::exception(); } catch (const std::exception&) { } diff --git a/src/mongo/db/update/v2_log_builder.cpp b/src/mongo/db/update/v2_log_builder.cpp index aec1148f22f..08ba0adb3a2 100644 --- a/src/mongo/db/update/v2_log_builder.cpp +++ b/src/mongo/db/update/v2_log_builder.cpp @@ -27,60 +27,23 @@ * it in the license file. */ +#include "mongo/platform/basic.h" + #include "mongo/db/update/v2_log_builder.h" #include <stack> #include "mongo/base/checked_cast.h" -#include "mongo/db/field_ref.h" #include "mongo/db/update/update_oplog_entry_serialization.h" -#include "mongo/util/str.h" namespace mongo::v2_log_builder { -Node* ArrayNode::addChild(StringData fieldName, std::unique_ptr<Node> node) { - auto idx = str::parseUnsignedBase10Integer(fieldName); - invariant(idx); - - auto itr = children.insert({*idx, std::move(node)}); - invariant(itr.second); - return itr.first->second.get(); -} - -Node* DocumentNode::addChild(StringData fieldName, std::unique_ptr<Node> node) { - auto* nodePtr = node.get(); - auto result = children.insert({fieldName.toString(), std::move(node)}); - invariant(result.second); - StringData storedFieldName = result.first->first; - switch (nodePtr->type()) { - case (NodeType::kArray): - case (NodeType::kDocumentSubDiff): { - subDiffs.push_back({storedFieldName, checked_cast<InternalNode*>(nodePtr)}); - return nodePtr; - } - case (NodeType::kDocumentInsert): - case (NodeType::kInsert): { - inserts.push_back({storedFieldName, nodePtr}); - return nodePtr; - } - case (NodeType::kDelete): { - deletes.push_back({storedFieldName, checked_cast<DeleteNode*>(nodePtr)}); - return nodePtr; - } - case (NodeType::kUpdate): { - updates.push_back({storedFieldName, checked_cast<UpdateNode*>(nodePtr)}); - return nodePtr; - } - } - MONGO_UNREACHABLE; -} - Status V2LogBuilder::logUpdatedField(const RuntimeUpdatePath& path, mutablebson::Element elt) { - auto newNode = std::make_unique<UpdateNode>(elt); + auto newNode = std::make_unique<diff_tree::UpdateNode>(elt); addNodeAtPath(path, &_root, std::move(newNode), - boost::none // Index of first created component is none since this was an update, - // not a create. + boost::none // Index of first created component is none since this was an + // update, not a create. ); return Status::OK(); } @@ -88,7 +51,7 @@ Status V2LogBuilder::logUpdatedField(const RuntimeUpdatePath& path, mutablebson: Status V2LogBuilder::logCreatedField(const RuntimeUpdatePath& path, int idxOfFirstNewComponent, mutablebson::Element elt) { - auto newNode = std::make_unique<InsertNode>(elt); + auto newNode = std::make_unique<diff_tree::InsertNode>(elt); addNodeAtPath(path, &_root, std::move(newNode), idxOfFirstNewComponent); return Status::OK(); } @@ -96,55 +59,56 @@ Status V2LogBuilder::logCreatedField(const RuntimeUpdatePath& path, Status V2LogBuilder::logCreatedField(const RuntimeUpdatePath& path, int idxOfFirstNewComponent, BSONElement elt) { - auto newNode = std::make_unique<InsertNode>(elt); + auto newNode = std::make_unique<diff_tree::InsertNode>(elt); addNodeAtPath(path, &_root, std::move(newNode), idxOfFirstNewComponent); return Status::OK(); } Status V2LogBuilder::logDeletedField(const RuntimeUpdatePath& path) { - addNodeAtPath(path, &_root, std::make_unique<DeleteNode>(), boost::none); + addNodeAtPath(path, &_root, std::make_unique<diff_tree::DeleteNode>(), boost::none); return Status::OK(); } -Node* V2LogBuilder::createInternalNode(InternalNode* parent, - const RuntimeUpdatePath& fullPath, - size_t pathIdx, - bool newPath) { +diff_tree::Node* V2LogBuilder::createInternalNode(diff_tree::InternalNode* parent, + const RuntimeUpdatePath& fullPath, + size_t pathIdx, + bool newPath) { auto fieldName = fullPath.fieldRef().getPart(pathIdx); // If the child is an array index, then this node is an ArrayNode. if (fullPath.size() > pathIdx + 1 && fullPath.types()[pathIdx + 1] == RuntimeUpdatePath::ComponentType::kArrayIndex) { invariant(!newPath); - return parent->addChild(fieldName, std::make_unique<ArrayNode>()); + return parent->addChild(fieldName, std::make_unique<diff_tree::ArrayNode>()); } else if (newPath) { - return parent->addChild(fieldName, std::make_unique<DocumentInsertionNode>()); + return parent->addChild(fieldName, std::make_unique<diff_tree::DocumentInsertionNode>()); } else { - return parent->addChild(fieldName, std::make_unique<DocumentSubDiffNode>()); + return parent->addChild(fieldName, std::make_unique<diff_tree::DocumentSubDiffNode>()); } MONGO_UNREACHABLE; } void V2LogBuilder::addNodeAtPath(const RuntimeUpdatePath& path, - Node* root, - std::unique_ptr<Node> nodeToAdd, + diff_tree::Node* root, + std::unique_ptr<diff_tree::Node> nodeToAdd, boost::optional<size_t> idxOfFirstNewComponent) { addNodeAtPathHelper(path, 0, root, std::move(nodeToAdd), idxOfFirstNewComponent); } void V2LogBuilder::addNodeAtPathHelper(const RuntimeUpdatePath& path, size_t pathIdx, - Node* root, - std::unique_ptr<Node> nodeToAdd, + diff_tree::Node* root, + std::unique_ptr<diff_tree::Node> nodeToAdd, boost::optional<size_t> idxOfFirstNewComponent) { - invariant(root->type() == NodeType::kArray || root->type() == NodeType::kDocumentSubDiff || - root->type() == NodeType::kDocumentInsert); + invariant(root->type() == diff_tree::NodeType::kArray || + root->type() == diff_tree::NodeType::kDocumentSubDiff || + root->type() == diff_tree::NodeType::kDocumentInsert); // If our path is a.b.c.d and the first new component is "b" then we are dealing with a // newly created path for components b, c and d. const bool isNewPath = idxOfFirstNewComponent && (pathIdx >= *idxOfFirstNewComponent); - auto* node = checked_cast<InternalNode*>(root); + auto* node = checked_cast<diff_tree::InternalNode*>(root); const auto part = path.fieldRef().getPart(pathIdx); if (pathIdx == static_cast<size_t>(path.fieldRef().numParts() - 1)) { node->addChild(part, std::move(nodeToAdd)); @@ -160,266 +124,8 @@ void V2LogBuilder::addNodeAtPathHelper(const RuntimeUpdatePath& path, } } -namespace { -void appendElementToBuilder(stdx::variant<mutablebson::Element, BSONElement> elem, - StringData fieldName, - BSONObjBuilder* builder) { - stdx::visit( - visit_helper::Overloaded{ - [&](const mutablebson::Element& element) { - if (element.hasValue()) { - builder->appendAs(element.getValue(), fieldName); - } else if (element.getType() == BSONType::Object) { - BSONObjBuilder subBuilder(builder->subobjStart(fieldName)); - element.writeChildrenTo(&subBuilder); - } else { - invariant(element.getType() == BSONType::Array); - BSONArrayBuilder subBuilder(builder->subarrayStart(fieldName)); - element.writeArrayTo(&subBuilder); - } - }, - [&](BSONElement element) { builder->appendAs(element, fieldName); }}, - elem); -} - -// Construction of the $v:2 diff needs to handle the same number of levels of recursion as the -// maximum permitted BSON depth. In order to avoid the possibility of stack overflow, which has -// been observed in configurations that use small stack sizes(such as 'dbg=on'), we use an explicit -// stack data structure stored on the heap instead. -// -// The Frame class represents one "frame" of this explicit stack. -class Frame { -public: - virtual ~Frame() {} - - // When execute() is called, the Frame may either return a new Frame to be placed on top of the - // stack, or return nullptr, indicating that the frame has finished and can be destroyed. - // - // If a Frame returns a new stack frame, it must be able to pick up where it left off when - // execute() is called on it again. - virtual std::unique_ptr<Frame> execute() = 0; -}; -using UniqueFrame = std::unique_ptr<Frame>; - -// Helper used for creating a new frame from a sub-diff node. Definition depends on some of the -// *Frame constructors. -UniqueFrame makeSubNodeFrameHelper(InternalNode* node, BSONObjBuilder builder); -// Given a 'node' stored in the 'inserts' section of an InternalNode, will either append that -// node's value to the given builder, or return a new stack frame which will build the object to be -// inserted. 'node' must be an InsertionNode or a DocumentInsertNode. -UniqueFrame handleInsertHelper(StringData fieldName, Node* node, BSONObjBuilder* bob); - -// Stack frame used to maintain state while serializing DocumentInsertionNodes. -class DocumentInsertFrame final : public Frame { -public: - DocumentInsertFrame(const DocumentInsertionNode& node, BSONObjBuilder bob) - : _node(node), _bob(std::move(bob)) {} - - UniqueFrame execute() override { - for (; _insertIdx < _node.children.size(); ++_insertIdx) { - auto&& [fieldName, child] = _node.inserts[_insertIdx]; - - if (auto newFrame = handleInsertHelper(fieldName, child, &_bob)) { - ++_insertIdx; - return newFrame; - } - } - - return nullptr; - } - -private: - size_t _insertIdx = 0; - const DocumentInsertionNode& _node; - BSONObjBuilder _bob; -}; - -// Stack frame used to maintain state while serializing DocumentSubDiffNodes. -class DocumentSubDiffFrame final : public Frame { -public: - DocumentSubDiffFrame(const DocumentSubDiffNode& node, BSONObjBuilder bob) - : _node(node), _bob(std::move(bob)) {} - - UniqueFrame execute() override { - if (!_wroteDeletesAndUpdates) { - if (!_node.deletes.empty()) { - BSONObjBuilder subBob(_bob.subobjStart(doc_diff::kDeleteSectionFieldName)); - for (auto&& [fieldName, node] : _node.deletes) { - // The deletes are logged in the form {fieldName: false} in $v:2 format. - subBob.append(fieldName, false); - } - } - if (!_node.updates.empty()) { - BSONObjBuilder subBob(_bob.subobjStart(doc_diff::kUpdateSectionFieldName)); - for (auto&& [fieldName, node] : _node.updates) { - appendElementToBuilder(node->elt, fieldName, &subBob); - } - } - _wroteDeletesAndUpdates = true; - } - - for (; _insertIdx < _node.inserts.size(); ++_insertIdx) { - if (!_insertBob) { - _insertBob.emplace(_bob.subobjStart(doc_diff::kInsertSectionFieldName)); - } - - auto&& [fieldName, child] = _node.inserts[_insertIdx]; - - if (auto newFrame = handleInsertHelper(fieldName, child, _insertBob.get_ptr())) { - ++_insertIdx; - return newFrame; - } - } - - if (_insertBob) { - // All inserts have been written so we destroy the insert builder now. - _insertBob = boost::none; - } - - if (_subDiffIdx != _node.subDiffs.size()) { - auto&& [fieldName, child] = _node.subDiffs[_subDiffIdx]; - - BSONObjBuilder childBuilder = - _bob.subobjStart(std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + fieldName); - ++_subDiffIdx; - return makeSubNodeFrameHelper(child, std::move(childBuilder)); - } - - return nullptr; - } - - BSONObjBuilder& bob() { - return _bob; - } - -private: - const DocumentSubDiffNode& _node; - BSONObjBuilder _bob; - - // Indicates whether or not we've written deletes and updates yet. Since deletes and updates - // are always leaf nodes, they are always written in the first call to execute(). - bool _wroteDeletesAndUpdates = false; - - // Keeps track of which insertion or subDiff is being serialized. - size_t _insertIdx = 0; - size_t _subDiffIdx = 0; - - boost::optional<BSONObjBuilder> _insertBob; -}; - -// Stack frame used to maintain state while serializing ArrayNodes. -class ArrayFrame final : public Frame { -public: - ArrayFrame(const ArrayNode& node, BSONObjBuilder bob) - : _node(node), _bob(std::move(bob)), _childIt(node.children.begin()) {} - - UniqueFrame execute() override { - invariant(!_node.children.empty()); - if (_childIt == _node.children.begin()) { - _bob.append(doc_diff::kArrayHeader, true); - } - - for (; _childIt != _node.children.end(); ++_childIt) { - - auto&& [idx, child] = *_childIt; - auto idxAsStr = std::to_string(idx); - - switch (child->type()) { - case (NodeType::kUpdate): { - const auto& valueNode = checked_cast<const UpdateNode&>(*child); - appendElementToBuilder( - valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob); - break; - } - case (NodeType::kInsert): { - const auto& valueNode = checked_cast<const InsertNode&>(*child); - appendElementToBuilder( - valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, &_bob); - break; - } - case (NodeType::kDocumentInsert): { - // This represents an array element that is being created with a sub object. - // - // For example {$set: {"a.0.c": 1}} when the input document is {a: []}. Here we - // need to create the array element at '0', then sub document 'c'. - - ++_childIt; - return std::make_unique<DocumentInsertFrame>( - *checked_cast<DocumentInsertionNode*>(child.get()), - BSONObjBuilder( - _bob.subobjStart(doc_diff::kUpdateSectionFieldName + idxAsStr))); - } - case (NodeType::kDocumentSubDiff): - case (NodeType::kArray): { - InternalNode* subNode = checked_cast<InternalNode*>(child.get()); - BSONObjBuilder childBuilder = _bob.subobjStart( - std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + idxAsStr); - - ++_childIt; - return makeSubNodeFrameHelper(subNode, std::move(childBuilder)); - } - case (NodeType::kDelete): { - MONGO_UNREACHABLE; - } - } - } - - return nullptr; - } - -private: - const ArrayNode& _node; - BSONObjBuilder _bob; - decltype(ArrayNode::children)::const_iterator _childIt; -}; - -BSONObj writeDiff(const DocumentSubDiffNode& root) { - std::stack<UniqueFrame> stack; - stack.push(std::make_unique<DocumentSubDiffFrame>(root, BSONObjBuilder{})); - - // Iterate until the stack size is one and there is no more work to be done. - while (true) { - auto nextFrame = stack.top()->execute(); - if (nextFrame) { - stack.push(std::move(nextFrame)); - } else if (stack.size() == 1) { - break; - } else { - stack.pop(); - } - } - - invariant(stack.size() == 1); - - auto& topFrame = checked_cast<DocumentSubDiffFrame&>(*stack.top()); - return topFrame.bob().obj(); -} - -UniqueFrame makeSubNodeFrameHelper(InternalNode* node, BSONObjBuilder builder) { - if (node->type() == NodeType::kArray) { - return std::make_unique<ArrayFrame>(*checked_cast<ArrayNode*>(node), std::move(builder)); - } else { - // We never expect to see a DocumentInsertionNode under the 'subDiffs' section of an - // internal node. - invariant(node->type() == NodeType::kDocumentSubDiff); - return std::make_unique<DocumentSubDiffFrame>(*checked_cast<DocumentSubDiffNode*>(node), - std::move(builder)); - } -} - -UniqueFrame handleInsertHelper(StringData fieldName, Node* node, BSONObjBuilder* bob) { - if (node->type() == NodeType::kInsert) { - appendElementToBuilder(checked_cast<InsertNode*>(node)->elt, fieldName, bob); - return nullptr; - } - invariant(node->type() == NodeType::kDocumentInsert); - return std::make_unique<DocumentInsertFrame>(*checked_cast<DocumentInsertionNode*>(node), - BSONObjBuilder(bob->subobjStart(fieldName))); -} -} // namespace - BSONObj V2LogBuilder::serialize() const { - auto diff = writeDiff(_root); + auto diff = _root.serialize(); return update_oplog_entry::makeDeltaOplogEntry(diff); } } // namespace mongo::v2_log_builder diff --git a/src/mongo/db/update/v2_log_builder.h b/src/mongo/db/update/v2_log_builder.h index a26d7b596eb..743a3b6b224 100644 --- a/src/mongo/db/update/v2_log_builder.h +++ b/src/mongo/db/update/v2_log_builder.h @@ -35,119 +35,10 @@ #include "mongo/db/update/document_diff_serialization.h" #include "mongo/db/update/log_builder_interface.h" #include "mongo/db/update/runtime_update_path.h" -#include "mongo/util/string_map.h" namespace mongo { namespace v2_log_builder { /** - * These are structs for a "diff tree" that is constructed while the update is applied. There are - * two types of internal nodes: Document nodes and Array nodes. All other node types are always - * leaves. - * - * When the update is complete, the diff tree is converted into a $v: 2 oplog entry. - */ -enum class NodeType { kDocumentSubDiff, kDocumentInsert, kArray, kDelete, kUpdate, kInsert }; - -struct Node { - virtual ~Node(){}; - virtual NodeType type() const = 0; -}; - - -/** - * This class represents insertion of a BSONElement or mutablebson Element. Note that - * 'DocumentInsertionNode' also repesent an insert for the cases where an object is created - * implicity. - */ -struct InsertNode : public Node { - InsertNode(mutablebson::Element el) : elt(el) {} - InsertNode(BSONElement el) : elt(el) {} - - NodeType type() const override { - return NodeType::kInsert; - } - stdx::variant<mutablebson::Element, BSONElement> elt; -}; - -struct UpdateNode : public Node { - UpdateNode(mutablebson::Element el) : elt(el) {} - - NodeType type() const override { - return NodeType::kUpdate; - } - mutablebson::Element elt; -}; - -struct DeleteNode : public Node { - NodeType type() const override { - return NodeType::kDelete; - } -}; - -// Struct representing non-leaf node. -struct InternalNode : public Node { - virtual Node* addChild(StringData fieldName, std::unique_ptr<Node> node) = 0; - virtual Node* getChild(StringData fieldName) const = 0; -}; - -struct DocumentNode : public InternalNode { - Node* addChild(StringData fieldName, std::unique_ptr<Node> node) override; - - Node* getChild(StringData fieldName) const override { - auto it = children.find(fieldName.toString()); - return (it != children.end()) ? it->second.get() : nullptr; - } - - // We store the raw pointer to each of the child node so that we don't have to look up in - // 'children' map every time. Note that the field names of these modifications will reference - // the field name stored in 'children'. - std::vector<std::pair<StringData, UpdateNode*>> updates; - std::vector<std::pair<StringData, DeleteNode*>> deletes; - std::vector<std::pair<StringData, Node*>> inserts; - std::vector<std::pair<StringData, InternalNode*>> subDiffs; - - // We use std::unordered_map here for pointer stability on keys (field names) when a rehash - // happens. - stdx::unordered_map<std::string, std::unique_ptr<Node>, StringMapHasher, StringMapEq> children; -}; - -// Indicates that the document this node represents was created as part of the update. -// -// E.g. applying the update {$set: {"a.b.c": "foo"}} on document {} will create sub-documents -// at paths "a" and "a.b". -struct DocumentInsertionNode : public DocumentNode { - NodeType type() const override { - return NodeType::kDocumentInsert; - } -}; - -// Indicates a Document internal node which is already in the pre-image document. -struct DocumentSubDiffNode : public DocumentNode { - NodeType type() const override { - return NodeType::kDocumentSubDiff; - } -}; - -struct ArrayNode : public InternalNode { - Node* addChild(StringData fieldName, std::unique_ptr<Node> node) override; - - virtual Node* getChild(StringData fieldName) const override { - auto idx = str::parseUnsignedBase10Integer(fieldName); - invariant(idx); - auto it = children.find(*idx); - return (it != children.end()) ? it->second.get() : nullptr; - } - - NodeType type() const override { - return NodeType::kArray; - } - - // The ordering of this map is significant. We are expected to serialize array indexes in - // numeric ascending order (as opposed to "stringified" order where "11" < "8"). - std::map<size_t, std::unique_ptr<Node>> children; -}; - -/** * A log builder which can produce $v:2 oplog entries. * * This log builder accumulates updates, creates and deletes, and stores them in a tree. When the @@ -175,26 +66,26 @@ public: private: // Helpers for maintaining/updating the tree. - Node* createInternalNode(InternalNode* parent, - const RuntimeUpdatePath& fullPath, - size_t pathIdx, - bool newPath); + diff_tree::Node* createInternalNode(diff_tree::InternalNode* parent, + const RuntimeUpdatePath& fullPath, + size_t pathIdx, + bool newPath); // Helpers for adding nodes at a certain path. Returns false if the path was invalid/did // not exist. void addNodeAtPathHelper(const RuntimeUpdatePath& path, size_t pathIdx, - Node* root, - std::unique_ptr<Node> nodeToAdd, + diff_tree::Node* root, + std::unique_ptr<diff_tree::Node> nodeToAdd, boost::optional<size_t> idxOfFirstNewComponent); void addNodeAtPath(const RuntimeUpdatePath& path, - Node* root, - std::unique_ptr<Node> nodeToAdd, + diff_tree::Node* root, + std::unique_ptr<diff_tree::Node> nodeToAdd, boost::optional<size_t> idxOfFirstNewComponent); // Root of the tree. - DocumentSubDiffNode _root; + diff_tree::DocumentSubDiffNode _root; }; } // namespace v2_log_builder } // namespace mongo |