summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Banala <arun.banala@mongodb.com>2020-08-19 13:32:53 +0100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-09-04 18:55:35 +0000
commit52a6af98520e33142ffb0d7ca6fbc404b7356111 (patch)
tree121f8c3b8c080768e4b5b5c960ed84bb44435ace
parent0c2f4ad0b12d4890ff4c7766641ddb2f9ec7cd63 (diff)
downloadmongo-52a6af98520e33142ffb0d7ca6fbc404b7356111.tar.gz
SERVER-50275 Implement V2 LogBuilder
-rw-r--r--src/mongo/bson/mutable/document.cpp8
-rw-r--r--src/mongo/bson/mutable/element.h16
-rw-r--r--src/mongo/db/update/SConscript2
-rw-r--r--src/mongo/db/update/document_diff_serialization.cpp18
-rw-r--r--src/mongo/db/update/document_diff_serialization.h6
-rw-r--r--src/mongo/db/update/v2_log_builder.cpp304
-rw-r--r--src/mongo/db/update/v2_log_builder.h200
-rw-r--r--src/mongo/db/update/v2_log_builder_test.cpp329
8 files changed, 870 insertions, 13 deletions
diff --git a/src/mongo/bson/mutable/document.cpp b/src/mongo/bson/mutable/document.cpp
index d042e2a458f..3e7853146a1 100644
--- a/src/mongo/bson/mutable/document.cpp
+++ b/src/mongo/bson/mutable/document.cpp
@@ -1697,6 +1697,14 @@ void Element::writeTo(BSONObjBuilder* const builder) const {
}
}
+void Element::writeChildrenTo(BSONObjBuilder* const builder) const {
+ invariant(ok());
+ const Document::Impl& impl = getDocument().getImpl();
+ const ElementRep& thisRep = impl.getElementRep(_repIdx);
+ invariant(impl.getType(thisRep) == mongo::Object);
+ impl.writeChildren(_repIdx, builder);
+}
+
void Element::writeArrayTo(BSONArrayBuilder* const builder) const {
invariant(ok());
const Document::Impl& impl = getDocument().getImpl();
diff --git a/src/mongo/bson/mutable/element.h b/src/mongo/bson/mutable/element.h
index 7949344f824..9ae13dda2af 100644
--- a/src/mongo/bson/mutable/element.h
+++ b/src/mongo/bson/mutable/element.h
@@ -385,11 +385,21 @@ public:
// Serialization API.
//
- /** Write this Element to the provided object builder. */
+ /**
+ * Writes this Element to the provided object builder. Note that if this element is not a root,
+ * then it gets wrapped inside an object.
+ */
void writeTo(BSONObjBuilder* builder) const;
- /** Write this Element to the provided array builder. This Element must be of type
- * mongo::Array.
+ /**
+ * Writes all the children of the current Element to the provided object builder. This Element
+ * must be of type mongo::Object.
+ */
+ void writeChildrenTo(BSONObjBuilder* builder) const;
+
+ /**
+ * Write this Element to the provided array builder. This Element must be of type
+ * mongo::Array.
*/
void writeArrayTo(BSONArrayBuilder* builder) const;
diff --git a/src/mongo/db/update/SConscript b/src/mongo/db/update/SConscript
index e17a2076b2e..7a9d1167d21 100644
--- a/src/mongo/db/update/SConscript
+++ b/src/mongo/db/update/SConscript
@@ -11,6 +11,7 @@ env.Library(
'path_support.cpp',
'storage_validation.cpp',
'v1_log_builder.cpp',
+ 'v2_log_builder.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
@@ -136,6 +137,7 @@ env.CppUnitTest(
'update_object_node_test.cpp',
'update_serialization_test.cpp',
'v1_log_builder_test.cpp',
+ 'v2_log_builder_test.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/bson/mutable/mutable_bson',
diff --git a/src/mongo/db/update/document_diff_serialization.cpp b/src/mongo/db/update/document_diff_serialization.cpp
index f978eb23e9e..a775d59bb5f 100644
--- a/src/mongo/db/update/document_diff_serialization.cpp
+++ b/src/mongo/db/update/document_diff_serialization.cpp
@@ -105,21 +105,21 @@ void ArrayDiffBuilder::serializeTo(BSONObjBuilder* output) const {
void DocumentDiffBuilder::serializeTo(BSONObjBuilder* output) const {
if (!_deletes.empty()) {
- BSONObjBuilder subBob(output->subobjStart(StringData(&kDeleteSectionFieldName, 1)));
+ BSONObjBuilder subBob(output->subobjStart(kDeleteSectionFieldName));
for (auto&& del : _deletes) {
subBob.append(del, false);
}
}
if (!_updates.empty()) {
- BSONObjBuilder subBob(output->subobjStart(StringData(&kUpdateSectionFieldName, 1)));
+ BSONObjBuilder subBob(output->subobjStart(kUpdateSectionFieldName));
for (auto&& update : _updates) {
subBob.appendAs(update.second, update.first);
}
}
if (!_inserts.empty()) {
- BSONObjBuilder subBob(output->subobjStart(StringData(&kInsertSectionFieldName, 1)));
+ BSONObjBuilder subBob(output->subobjStart(kInsertSectionFieldName));
for (auto&& insert : _inserts) {
subBob.appendAs(insert.second, insert.first);
}
@@ -200,7 +200,7 @@ boost::optional<std::pair<size_t, ArrayDiffReader::ArrayModification>> ArrayDiff
fieldName.size() > 1);
const size_t idx = extractArrayIndex(fieldName.substr(1, fieldName.size()));
- if (fieldName[0] == kUpdateSectionFieldName) {
+ if (fieldName[0] == kUpdateSectionFieldName[0]) {
// It's an update.
return {{idx, next}};
} else if (fieldName[0] == kSubDiffSectionFieldPrefix) {
@@ -232,9 +232,13 @@ DocumentDiffReader::DocumentDiffReader(const Diff& diff) : _diff(diff) {
int order;
};
- const std::map<char, Section> sections{{kDeleteSectionFieldName, Section{&_deletes, 1}},
- {kUpdateSectionFieldName, Section{&_updates, 2}},
- {kInsertSectionFieldName, Section{&_inserts, 3}},
+ static_assert(kDeleteSectionFieldName.size() == 1);
+ static_assert(kInsertSectionFieldName.size() == 1);
+ static_assert(kUpdateSectionFieldName.size() == 1);
+
+ const std::map<char, Section> sections{{kDeleteSectionFieldName[0], Section{&_deletes, 1}},
+ {kUpdateSectionFieldName[0], Section{&_updates, 2}},
+ {kInsertSectionFieldName[0], Section{&_inserts, 3}},
{kSubDiffSectionFieldPrefix, Section{&_subDiffs, 4}}};
char prev = 0;
diff --git a/src/mongo/db/update/document_diff_serialization.h b/src/mongo/db/update/document_diff_serialization.h
index 8ade179a875..19f1a3f2e9c 100644
--- a/src/mongo/db/update/document_diff_serialization.h
+++ b/src/mongo/db/update/document_diff_serialization.h
@@ -47,9 +47,9 @@ enum DiffType : uint8_t { kDocument, kArray };
// Below are string constants used in the diff format.
constexpr StringData kArrayHeader = "a"_sd;
-constexpr char kDeleteSectionFieldName = 'd';
-constexpr char kInsertSectionFieldName = 'i';
-constexpr char kUpdateSectionFieldName = 'u';
+constexpr StringData kDeleteSectionFieldName = "d"_sd;
+constexpr StringData kInsertSectionFieldName = "i"_sd;
+constexpr StringData kUpdateSectionFieldName = "u"_sd;
constexpr char kSubDiffSectionFieldPrefix = 's';
// 'l' for length.
constexpr StringData kResizeSectionFieldName = "l"_sd;
diff --git a/src/mongo/db/update/v2_log_builder.cpp b/src/mongo/db/update/v2_log_builder.cpp
new file mode 100644
index 00000000000..45b49e24a13
--- /dev/null
+++ b/src/mongo/db/update/v2_log_builder.cpp
@@ -0,0 +1,304 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/update/v2_log_builder.h"
+
+#include "mongo/base/checked_cast.h"
+#include "mongo/db/field_ref.h"
+#include "mongo/db/update/update_oplog_entry_serialization.h"
+#include "mongo/util/str.h"
+
+namespace mongo::v2_log_builder {
+Node* ArrayNode::addChild(StringData fieldName, std::unique_ptr<Node> node) {
+ auto idx = str::parseUnsignedBase10Integer(fieldName);
+ invariant(idx);
+
+ auto itr = children.insert({*idx, std::move(node)});
+ invariant(itr.second);
+ return itr.first->second.get();
+}
+
+Node* DocumentNode::addChild(StringData fieldName, std::unique_ptr<Node> node) {
+ auto* nodePtr = node.get();
+ auto result = children.insert({fieldName.toString(), std::move(node)});
+ invariant(result.second);
+ StringData storedFieldName = result.first->first;
+ switch (nodePtr->type()) {
+ case (NodeType::kArray):
+ case (NodeType::kDocumentSubDiff): {
+ subDiffs.push_back({storedFieldName, checked_cast<InternalNode*>(nodePtr)});
+ return nodePtr;
+ }
+ case (NodeType::kDocumentInsert):
+ case (NodeType::kInsert): {
+ inserts.push_back({storedFieldName, nodePtr});
+ return nodePtr;
+ }
+ case (NodeType::kDelete): {
+ deletes.push_back({storedFieldName, checked_cast<DeleteNode*>(nodePtr)});
+ return nodePtr;
+ }
+ case (NodeType::kUpdate): {
+ updates.push_back({storedFieldName, checked_cast<UpdateNode*>(nodePtr)});
+ return nodePtr;
+ }
+ }
+ MONGO_UNREACHABLE;
+}
+
+Status V2LogBuilder::logUpdatedField(const RuntimeUpdatePath& path, mutablebson::Element elt) {
+ auto newNode = std::make_unique<UpdateNode>(elt);
+ addNodeAtPath(path,
+ &_root,
+ std::move(newNode),
+ boost::none // Index of first created component is none since this was an update,
+ // not a create.
+ );
+ return Status::OK();
+}
+
+Status V2LogBuilder::logCreatedField(const RuntimeUpdatePath& path,
+ int idxOfFirstNewComponent,
+ mutablebson::Element elt) {
+ auto newNode = std::make_unique<InsertElement>(elt);
+ addNodeAtPath(path, &_root, std::move(newNode), idxOfFirstNewComponent);
+ return Status::OK();
+}
+
+Status V2LogBuilder::logCreatedField(const RuntimeUpdatePath& path,
+ int idxOfFirstNewComponent,
+ BSONElement elt) {
+ auto newNode = std::make_unique<InsertElement>(elt);
+ addNodeAtPath(path, &_root, std::move(newNode), idxOfFirstNewComponent);
+ return Status::OK();
+}
+
+Status V2LogBuilder::logDeletedField(const RuntimeUpdatePath& path) {
+ addNodeAtPath(path, &_root, std::make_unique<DeleteNode>(), boost::none);
+ return Status::OK();
+}
+
+Node* V2LogBuilder::createInternalNode(InternalNode* parent,
+ const RuntimeUpdatePath& fullPath,
+ size_t pathIdx,
+ bool newPath) {
+ auto fieldName = fullPath.fieldRef().getPart(pathIdx);
+
+ // If the child is an array index, then this node is an ArrayNode.
+ if (fullPath.size() > pathIdx + 1 &&
+ fullPath.types()[pathIdx + 1] == RuntimeUpdatePath::ComponentType::kArrayIndex) {
+ invariant(!newPath);
+ return parent->addChild(fieldName, std::make_unique<ArrayNode>());
+ } else if (newPath) {
+ return parent->addChild(fieldName, std::make_unique<DocumentInsertionNode>());
+ } else {
+ return parent->addChild(fieldName, std::make_unique<DocumentSubDiffNode>());
+ }
+ MONGO_UNREACHABLE;
+}
+
+void V2LogBuilder::addNodeAtPath(const RuntimeUpdatePath& path,
+ Node* root,
+ std::unique_ptr<Node> nodeToAdd,
+ boost::optional<size_t> idxOfFirstNewComponent) {
+ addNodeAtPathHelper(path, 0, root, std::move(nodeToAdd), idxOfFirstNewComponent);
+}
+
+void V2LogBuilder::addNodeAtPathHelper(const RuntimeUpdatePath& path,
+ size_t pathIdx,
+ Node* root,
+ std::unique_ptr<Node> nodeToAdd,
+ boost::optional<size_t> idxOfFirstNewComponent) {
+ invariant(root->type() == NodeType::kArray || root->type() == NodeType::kDocumentSubDiff ||
+ root->type() == NodeType::kDocumentInsert);
+
+ // If our path is a.b.c.d and the first new component is "b" then we are dealing with a
+ // newly created path for components b, c and d.
+ const bool isNewPath = idxOfFirstNewComponent && (pathIdx >= *idxOfFirstNewComponent);
+
+ auto* node = checked_cast<InternalNode*>(root);
+ const auto part = path.fieldRef().getPart(pathIdx);
+ if (pathIdx == static_cast<size_t>(path.fieldRef().numParts() - 1)) {
+ node->addChild(part, std::move(nodeToAdd));
+ return;
+ }
+
+ if (auto* child = node->getChild(part)) {
+ addNodeAtPathHelper(path, pathIdx + 1, child, std::move(nodeToAdd), idxOfFirstNewComponent);
+ } else {
+ auto newNode = createInternalNode(node, path, pathIdx, isNewPath);
+ addNodeAtPathHelper(
+ path, pathIdx + 1, newNode, std::move(nodeToAdd), idxOfFirstNewComponent);
+ }
+}
+
+namespace {
+void appendElementToBuilder(stdx::variant<mutablebson::Element, BSONElement> elem,
+ StringData fieldName,
+ BSONObjBuilder* builder) {
+ stdx::visit(
+ visit_helper::Overloaded{
+ [&](const mutablebson::Element& element) {
+ if (element.hasValue()) {
+ builder->appendAs(element.getValue(), fieldName);
+ } else if (element.getType() == BSONType::Object) {
+ BSONObjBuilder subBuilder(builder->subobjStart(fieldName));
+ element.writeChildrenTo(&subBuilder);
+ } else {
+ invariant(element.getType() == BSONType::Array);
+ BSONArrayBuilder subBuilder(builder->subarrayStart(fieldName));
+ element.writeArrayTo(&subBuilder);
+ }
+ },
+ [&](BSONElement element) { builder->appendAs(element, fieldName); }},
+ elem);
+}
+void serializeNewlyCreatedDocument(DocumentNode* const node, BSONObjBuilder* out) {
+ for (auto&& [fieldName, child] : node->inserts) {
+ if (child->type() == NodeType::kInsert) {
+ appendElementToBuilder(checked_cast<InsertElement*>(child)->elt, fieldName, out);
+ continue;
+ }
+
+ BSONObjBuilder childBuilder(out->subobjStart(fieldName));
+ serializeNewlyCreatedDocument(checked_cast<DocumentNode* const>(child), &childBuilder);
+ }
+}
+
+// Mutually recursive with writeArrayDiff().
+void writeSubNodeHelper(InternalNode* node, BSONObjBuilder* builder);
+
+void writeArrayDiff(const ArrayNode& node, BSONObjBuilder* builder) {
+ builder->append(doc_diff::kArrayHeader, true);
+ for (auto&& [idx, child] : node.children) {
+ auto idxAsStr = std::to_string(idx);
+ switch (child->type()) {
+ case (NodeType::kUpdate): {
+ const auto& valueNode = checked_cast<const UpdateNode&>(*child);
+ appendElementToBuilder(
+ valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, builder);
+ break;
+ }
+ case (NodeType::kInsert): {
+ const auto& valueNode = checked_cast<const InsertElement&>(*child);
+ appendElementToBuilder(
+ valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, builder);
+ break;
+ }
+ case (NodeType::kDocumentInsert): {
+ // This represents that the array element is being created which has a sub-object.
+ //
+ // For example {$set: {"a.0.c": 1}} when the input document is {a: []}. Here we need
+ // to create the array element at '0', then sub document 'c'.
+ BSONObjBuilder childBuilder =
+ builder->subobjStart(doc_diff::kUpdateSectionFieldName + idxAsStr);
+ serializeNewlyCreatedDocument(checked_cast<DocumentNode*>(child.get()),
+ &childBuilder);
+ break;
+ }
+ case (NodeType::kDocumentSubDiff):
+ case (NodeType::kArray): {
+ InternalNode* subNode = checked_cast<InternalNode*>(child.get());
+ BSONObjBuilder childBuilder = builder->subobjStart(
+ std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + idxAsStr);
+ writeSubNodeHelper(subNode, &childBuilder);
+ break;
+ }
+ case (NodeType::kDelete): {
+ MONGO_UNREACHABLE;
+ }
+ }
+ }
+}
+
+void writeDocumentDiff(const DocumentNode& node, BSONObjBuilder* builder) {
+ if (!node.deletes.empty()) {
+ BSONObjBuilder subBob(builder->subobjStart(doc_diff::kDeleteSectionFieldName));
+ for (auto&& [fieldName, node] : node.deletes) {
+ // The deletes are logged in the form {fieldName: false} in $v:2 format.
+ subBob.append(fieldName, false);
+ }
+ }
+ if (!node.updates.empty()) {
+ BSONObjBuilder subBob(builder->subobjStart(doc_diff::kUpdateSectionFieldName));
+ for (auto&& [fieldName, node] : node.updates) {
+ appendElementToBuilder(node->elt, fieldName, &subBob);
+ }
+ }
+
+ if (!node.inserts.empty()) {
+ BSONObjBuilder insertBob(builder->subobjStart(doc_diff::kInsertSectionFieldName));
+ for (auto&& [fieldName, child] : node.inserts) {
+ if (child->type() == NodeType::kInsert) {
+ appendElementToBuilder(
+ checked_cast<InsertElement*>(child)->elt, fieldName, &insertBob);
+ continue;
+ }
+ // This represents a new document. While the modifier-style update system
+ // was capable of writing paths which would implicitly create new
+ // documents, there is no equivalent in $v: 2 updates.
+ //
+ // For example {$set: {"a.b.c": 1}} would create document 'a' and 'a.b' if
+ // necessary.
+ //
+ // Since $v:2 entries don't have this capability, we instead build the new
+ // object and log it as an insert. For example, applying the above $set on
+ // document {a: {}} will be logged as an insert of the value {b: {c: 1}} on
+ // document 'a'.
+ invariant(child->type() == NodeType::kDocumentInsert);
+ BSONObjBuilder subBob = insertBob.subobjStart(fieldName);
+ serializeNewlyCreatedDocument(checked_cast<DocumentNode*>(child), &subBob);
+ }
+ }
+
+ for (auto&& [fieldName, child] : node.subDiffs) {
+ BSONObjBuilder childBuilder =
+ builder->subobjStart(std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + fieldName);
+ writeSubNodeHelper(child, &childBuilder);
+ }
+}
+
+void writeSubNodeHelper(InternalNode* node, BSONObjBuilder* builder) {
+ if (node->type() == NodeType::kArray) {
+ writeArrayDiff(*checked_cast<ArrayNode*>(node), builder);
+ } else {
+ invariant(node->type() == NodeType::kDocumentSubDiff ||
+ node->type() == NodeType::kDocumentInsert);
+ writeDocumentDiff(*checked_cast<DocumentNode*>(node), builder);
+ }
+}
+} // namespace
+
+
+BSONObj V2LogBuilder::serialize() const {
+ BSONObjBuilder topBuilder;
+ writeDocumentDiff(_root, &topBuilder);
+ return update_oplog_entry::makeDeltaOplogEntry(topBuilder.obj());
+}
+} // namespace mongo::v2_log_builder \ No newline at end of file
diff --git a/src/mongo/db/update/v2_log_builder.h b/src/mongo/db/update/v2_log_builder.h
new file mode 100644
index 00000000000..d0553cb54b3
--- /dev/null
+++ b/src/mongo/db/update/v2_log_builder.h
@@ -0,0 +1,200 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/base/status.h"
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/mutable/document.h"
+#include "mongo/db/update/document_diff_serialization.h"
+#include "mongo/db/update/log_builder_interface.h"
+#include "mongo/db/update/runtime_update_path.h"
+#include "mongo/util/string_map.h"
+
+namespace mongo {
+namespace v2_log_builder {
+/**
+ * These are structs for a "diff tree" that is constructed while the update is applied. There are
+ * two types of internal nodes: Document nodes and Array nodes. All other node types are always
+ * leaves.
+ *
+ * When the update is complete, the diff tree is converted into a $v: 2 oplog entry.
+ */
+enum class NodeType { kDocumentSubDiff, kDocumentInsert, kArray, kDelete, kUpdate, kInsert };
+
+struct Node {
+ virtual ~Node(){};
+ virtual NodeType type() const = 0;
+};
+
+
+/**
+ * This class represents insertion of a BSONElement or mutablebson Element. Note that
+ * 'DocumentInsertionNode' also repesent an insert for the cases where an object is created
+ * implicity.
+ */
+struct InsertElement : public Node {
+ InsertElement(mutablebson::Element el) : elt(el) {}
+ InsertElement(BSONElement el) : elt(el) {}
+
+ NodeType type() const override {
+ return NodeType::kInsert;
+ }
+ stdx::variant<mutablebson::Element, BSONElement> elt;
+};
+
+struct UpdateNode : public Node {
+ UpdateNode(mutablebson::Element el) : elt(el) {}
+
+ NodeType type() const override {
+ return NodeType::kUpdate;
+ }
+ mutablebson::Element elt;
+};
+
+struct DeleteNode : public Node {
+ NodeType type() const override {
+ return NodeType::kDelete;
+ }
+};
+
+// Struct representing non-leaf node.
+struct InternalNode : public Node {
+ virtual Node* addChild(StringData fieldName, std::unique_ptr<Node> node) = 0;
+ virtual Node* getChild(StringData fieldName) const = 0;
+};
+
+struct DocumentNode : public InternalNode {
+ Node* addChild(StringData fieldName, std::unique_ptr<Node> node) override;
+
+ Node* getChild(StringData fieldName) const override {
+ auto it = children.find(fieldName.toString());
+ return (it != children.end()) ? it->second.get() : nullptr;
+ }
+
+ // We store the raw pointer to each of the child node so that we don't have to look up in
+ // 'children' map every time. Note that the field names of these modifications will reference
+ // the field name stored in 'children'.
+ std::vector<std::pair<StringData, UpdateNode*>> updates;
+ std::vector<std::pair<StringData, DeleteNode*>> deletes;
+ std::vector<std::pair<StringData, Node*>> inserts;
+ std::vector<std::pair<StringData, InternalNode*>> subDiffs;
+
+ // We use std::unordered_map here for pointer stability on keys (field names) when a rehash
+ // happens.
+ stdx::unordered_map<std::string, std::unique_ptr<Node>, StringMapHasher, StringMapEq> children;
+};
+
+// Indicates that the document this node represents was created as part of the update.
+//
+// E.g. applying the update {$set: {"a.b.c": "foo"}} on document {} will create sub-documents
+// at paths "a" and "a.b".
+struct DocumentInsertionNode : public DocumentNode {
+ NodeType type() const override {
+ return NodeType::kDocumentInsert;
+ }
+};
+
+// Indicates a Document internal node which is already in the pre-image document.
+struct DocumentSubDiffNode : public DocumentNode {
+ NodeType type() const override {
+ return NodeType::kDocumentSubDiff;
+ }
+};
+
+struct ArrayNode : public InternalNode {
+ Node* addChild(StringData fieldName, std::unique_ptr<Node> node) override;
+
+ virtual Node* getChild(StringData fieldName) const override {
+ auto idx = str::parseUnsignedBase10Integer(fieldName);
+ invariant(idx);
+ auto it = children.find(*idx);
+ return (it != children.end()) ? it->second.get() : nullptr;
+ }
+
+ NodeType type() const override {
+ return NodeType::kArray;
+ }
+
+ // The ordering of this map is significant. We are expected to serialize array indexes in
+ // numeric ascending order (as opposed to "stringified" order where "11" < "8").
+ std::map<size_t, std::unique_ptr<Node>> children;
+};
+
+/**
+ * A log builder which can produce $v:2 oplog entries.
+ *
+ * This log builder accumulates updates, creates and deletes, and stores them in a tree. When the
+ * update is done and serialize() is called, the tree is converted into a $v:2 oplog entry. Note
+ * that we don't need a pre-image for building the oplog.
+ */
+class V2LogBuilder : public LogBuilderInterface {
+public:
+ /**
+ * Overload methods from the LogBuilder interface.
+ */
+ Status logUpdatedField(const RuntimeUpdatePath& path, mutablebson::Element elt) override;
+ Status logCreatedField(const RuntimeUpdatePath& path,
+ int idxOfFirstNewComponent,
+ mutablebson::Element elt) override;
+ Status logCreatedField(const RuntimeUpdatePath& path,
+ int idxOfFirstNewComponent,
+ BSONElement elt) override;
+ Status logDeletedField(const RuntimeUpdatePath& path) override;
+
+ /**
+ * Converts the in-memory tree to a $v:2 delta oplog entry.
+ */
+ BSONObj serialize() const override;
+
+private:
+ // Helpers for maintaining/updating the tree.
+ Node* createInternalNode(InternalNode* parent,
+ const RuntimeUpdatePath& fullPath,
+ size_t pathIdx,
+ bool newPath);
+
+ // Helpers for adding nodes at a certain path. Returns false if the path was invalid/did
+ // not exist.
+ void addNodeAtPathHelper(const RuntimeUpdatePath& path,
+ size_t pathIdx,
+ Node* root,
+ std::unique_ptr<Node> nodeToAdd,
+ boost::optional<size_t> idxOfFirstNewComponent);
+
+ void addNodeAtPath(const RuntimeUpdatePath& path,
+ Node* root,
+ std::unique_ptr<Node> nodeToAdd,
+ boost::optional<size_t> idxOfFirstNewComponent);
+
+ // Root of the tree.
+ DocumentSubDiffNode _root;
+};
+} // namespace v2_log_builder
+} // namespace mongo
diff --git a/src/mongo/db/update/v2_log_builder_test.cpp b/src/mongo/db/update/v2_log_builder_test.cpp
new file mode 100644
index 00000000000..37e82f48061
--- /dev/null
+++ b/src/mongo/db/update/v2_log_builder_test.cpp
@@ -0,0 +1,329 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/update/v2_log_builder.h"
+
+#include "mongo/base/status.h"
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/mutable/mutable_bson_test_utils.h"
+#include "mongo/db/json.h"
+#include "mongo/unittest/death_test.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/safe_num.h"
+
+namespace mongo::v2_log_builder {
+namespace {
+namespace mmb = mongo::mutablebson;
+
+/**
+ * Given a FieldRef, creates a RuntimeUpdatePath based on it, assuming that every numeric component
+ * is an array index.
+ */
+RuntimeUpdatePath makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes(StringData path) {
+ FieldRef fr(path);
+ std::vector<RuntimeUpdatePath::ComponentType> types;
+
+ for (int i = 0; i < fr.numParts(); ++i) {
+ types.push_back(fr.isNumericPathComponentStrict(i)
+ ? RuntimeUpdatePath::ComponentType::kArrayIndex
+ : RuntimeUpdatePath::ComponentType::kFieldName);
+ }
+ return RuntimeUpdatePath(fr, std::move(types));
+}
+
+/**
+ * Given a FieldRef, creates a RuntimeUpdatePath based on it, assuming that every component is a
+ * field name.
+ */
+RuntimeUpdatePath makeRuntimeUpdatePathAssumeAllComponentsFieldNames(std::string path) {
+ FieldRef fieldRef(path);
+ return RuntimeUpdatePath(
+ std::move(fieldRef),
+ std::vector<RuntimeUpdatePath::ComponentType>(
+ fieldRef.numParts(), RuntimeUpdatePath::ComponentType::kFieldName));
+}
+
+TEST(V2LogBuilder, UpdateFieldWithTopLevelMutableBsonElement) {
+ mmb::Document doc;
+ // Element is an array. Modifying a sub-element of mmb::Element does not store the data
+ // serialized. So we need to call Element::writeToArray() in those cases to serialize the array.
+ // We explicity modify the sub-element so that this logic can be tested.
+ const mmb::Element eltArr = doc.makeElementArray("",
+ BSON_ARRAY(1 << BSON("sub"
+ << "obj")
+ << 3));
+ ASSERT_OK(eltArr.leftChild().setValueString("val"));
+
+ // Element is an obj. Modifying a sub-element of mmb::Element does not store the data
+ // serialized. So we need to call Element::writeChildren() in those cases to serialize the
+ // object. We explicity modify the sub-element so that this logic can be tested.
+ mmb::Element eltObj = doc.makeElementObject("", fromjson("{arr: [{a: [0]}, 2]}"));
+ ASSERT_OK(eltObj.leftChild().rightChild().setValueString("val"));
+
+ // Element is a scalar.
+ const mmb::Element eltScalar = doc.makeElementString("c.d", "val");
+ ASSERT_TRUE(eltScalar.ok());
+
+ V2LogBuilder lb;
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b"), eltArr));
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.d"), eltObj));
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.e"), eltScalar));
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson("{ $v: 2, diff: { sa: {u: {b: ['val', {sub: 'obj'}, 3]}}, sc: {u: {d: "
+ "{arr: [{a: [0]}, 'val']}, e: 'val'}} }}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, UpdateFieldMutableBson) {
+ auto storage = fromjson("{subArray: [{a: [0]}], array: [{}], scalar: 'val'}");
+ mmb::Document doc(
+ fromjson("{topLevel: {obj: {subArray: 1}, array: [[], [0], {}], scalar: 'val'}}"));
+
+ // Element is an obj. Modifying a sub-element of mmb::Element does not store the data
+ // serialized. So we need to call Element::writeChildren() in those cases to serialize the
+ // object. We explicity modify the sub-element so that this logic can be tested.
+ mmb::Element eltObj = doc.root().leftChild().leftChild();
+ ASSERT_OK(eltObj.leftChild().setValueBSONElement(storage["subArray"]));
+
+ // Element is an array.
+ const mmb::Element eltArr = eltObj.rightSibling();
+ ASSERT_OK(eltArr.leftChild().setValueBSONElement(storage["array"]));
+
+ // Element is a scalar.
+ const mmb::Element eltScalar = eltArr.rightSibling();
+ ASSERT_TRUE(eltScalar.ok());
+
+ V2LogBuilder lb;
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b"), eltArr));
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.d"), eltObj));
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.e"), eltScalar));
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson("{ $v: 2, diff: { sa: {u: {b: [[{}], [0], {}]}}, sc: {u: {d: "
+ "{subArray: [{a: [0]}]}, e: 'val'}} }}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, CreateFieldBSONElt) {
+ BSONObj storage = fromjson("{arr: ['v','a','l'], obj: {}, scalar: 2}");
+ V2LogBuilder lb;
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.d"), 0, storage["arr"]));
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.c.e"), 0, storage["obj"]));
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.c"), 0, storage["scalar"]));
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson("{ $v: 2, diff: {i : {a: {b: {d: ['v','a','l'], c: 2}, c: {e: {}}} }}}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, CreateFieldMutableBson) {
+ mmb::Document doc;
+ V2LogBuilder lb;
+
+ const mmb::Element elt_ab = doc.makeElementInt("a.b", 1);
+ ASSERT_TRUE(elt_ab.ok());
+ ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b"),
+ 0, // idxOfFirstNewComponent (unused)
+ elt_ab));
+ ASSERT_BSONOBJ_BINARY_EQ(mongo::fromjson("{ $v: 2, diff: { i: {a: {b: 1}}}}"), lb.serialize());
+}
+
+TEST(V2LogBuilder, CreateFieldMergeInOrder) {
+ mmb::Document doc;
+ const mmb::Element eltScalar = doc.makeElementInt("scalar", 1);
+ const mmb::Element eltObj = doc.makeElementObject("obj", fromjson("{arr: [{a: [0]}]}"));
+ ASSERT_TRUE(eltObj.ok());
+ ASSERT_TRUE(eltScalar.ok());
+
+ // Test where only a 'root' object exists and logCreateField() needs to create sub-objects
+ // recursively.
+ const int idxOfFirstNewComponent = 1;
+ V2LogBuilder lb;
+ ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.d"),
+ idxOfFirstNewComponent,
+ eltScalar));
+ ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.b.a"),
+ idxOfFirstNewComponent,
+ eltScalar));
+ ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.c.b"),
+ idxOfFirstNewComponent,
+ eltObj));
+ ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.c.e"),
+ idxOfFirstNewComponent,
+ eltScalar));
+ ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.a"),
+ idxOfFirstNewComponent,
+ eltObj));
+ ASSERT_BSONOBJ_BINARY_EQ(mongo::fromjson("{ $v: 2, diff: { sroot: {i: {d: 1, b: {a: 1}, c: {b: "
+ "{arr: [{a: [0]}]}, e: 1}, a: {arr: [{a: [0]}]}} }}}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, BasicDelete) {
+ mmb::Document doc;
+ V2LogBuilder lb;
+ ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x")));
+ ASSERT_BSONOBJ_BINARY_EQ(mongo::fromjson("{ $v: 2, diff: { d: {x: false}}}"), lb.serialize());
+}
+
+TEST(V2LogBuilder, VerifyDeletesOrder) {
+ mmb::Document doc;
+ V2LogBuilder lb;
+
+ ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.c")));
+ ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b")));
+ ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.z")));
+ ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.y")));
+
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson(
+ "{ $v: 2, diff: { sa: {d: {c: false, b: false}}, sx: {d: {z: false, y: false}} }}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, WithAllModificationTypes) {
+ mmb::Document doc;
+ V2LogBuilder lb;
+
+ const mmb::Element elt_ab = doc.makeElementInt("", 1);
+ ASSERT_TRUE(elt_ab.ok());
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.c"), elt_ab));
+
+ const mmb::Element elt_cd = doc.makeElementInt("", 2);
+ ASSERT_TRUE(elt_cd.ok());
+
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.d.e"), 3, elt_cd));
+
+ ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.e")));
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson("{ $v: 2, diff: {sa: {sb: {d: {e: false}, u: {c: 1}, sd: {i: {e: 2}} }}}}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, VerifySubDiffsAreOrderedCorrectly) {
+ mmb::Document doc;
+ V2LogBuilder lb;
+
+ const mmb::Element elt_ab = doc.makeElementInt("a.b", 1);
+ ASSERT_TRUE(elt_ab.ok());
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.c"), elt_ab));
+ ASSERT_OK(
+ lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.c.d"), 2, elt_ab));
+
+ const mmb::Element elt_xy = doc.makeElementInt("x.y", 2);
+ ASSERT_TRUE(elt_xy.ok());
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.y.z"), elt_xy));
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.z.a"), elt_xy));
+
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson("{ $v: 2, diff: { sa: {sb: {u: {c: 1}}, sc: {i: {d: 1}}}, "
+ "sx: {sy: {u: {z: 2}}, sz: {u: {a: 2}}} }}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, ArrayModifications) {
+ mmb::Document doc;
+ V2LogBuilder lb;
+
+ const mmb::Element elt_ab = doc.makeElementInt("", 1);
+ ASSERT_TRUE(elt_ab.ok());
+ ASSERT_OK(lb.logUpdatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.update"), elt_ab));
+
+ // Element is an obj.
+ const mmb::Element eltObj = doc.makeElementObject("", fromjson("{arr: [{a: [0]}]}"));
+ ASSERT_TRUE(eltObj.ok());
+
+ const mmb::Element eltScalar = doc.makeElementInt("", 2);
+ ASSERT_TRUE(eltScalar.ok());
+
+ // The order of the update of array elements need not be maintained by the caller.
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.10.insert.a"), 2, eltScalar));
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.3"), elt_ab));
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.insert.a"), 3, eltScalar));
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.insert.f"), 3, eltScalar));
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.1"), 1, eltObj));
+ ASSERT_OK(
+ lb.logDeletedField(makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.del")));
+
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson("{$v: 2, diff: {sa: {a: true, s0: {sc: {d: {del: false}, u: {update: 1}, "
+ "i: {insert: {a: 2, f: 2}}}}, u1: {arr: [{a: [0]}]}, u3: 1, s10: {i: "
+ "{insert: {a: 2}}} }}}"),
+ lb.serialize());
+}
+
+TEST(V2LogBuilder, ImplicityArrayElementCreationAllowed) {
+ mmb::Document doc;
+ V2LogBuilder lb;
+
+ const mmb::Element elt = doc.makeElementInt("", 1);
+ ASSERT_TRUE(elt.ok());
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.10.b.a"), 1, elt));
+ ASSERT_OK(lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.2.b.a"), 1, elt));
+ ASSERT_OK(
+ lb.logUpdatedField(makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.b.a"), elt));
+
+ ASSERT_BSONOBJ_BINARY_EQ(
+ mongo::fromjson("{$v: 2, diff: {sa: {a: true, s0: {sb: {u: {a: 1}}}, u2: "
+ "{b: {a: 1}}, u10: {b: {a: 1}} }}}"),
+ lb.serialize());
+}
+
+DEATH_TEST(V2LogBuilder, ImplicityArrayCreationDisallowed, "invariant") {
+ mmb::Document doc;
+ V2LogBuilder lb;
+
+ const mmb::Element elt = doc.makeElementInt("", 1);
+ ASSERT_TRUE(elt.ok());
+ auto status = lb.logCreatedField(
+ makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.10.insert.a"), 0, elt);
+}
+
+} // namespace
+} // namespace mongo::v2_log_builder