From 52a6af98520e33142ffb0d7ca6fbc404b7356111 Mon Sep 17 00:00:00 2001 From: Arun Banala Date: Wed, 19 Aug 2020 13:32:53 +0100 Subject: SERVER-50275 Implement V2 LogBuilder --- src/mongo/bson/mutable/document.cpp | 8 + src/mongo/bson/mutable/element.h | 16 +- src/mongo/db/update/SConscript | 2 + .../db/update/document_diff_serialization.cpp | 18 +- src/mongo/db/update/document_diff_serialization.h | 6 +- src/mongo/db/update/v2_log_builder.cpp | 304 +++++++++++++++++++ src/mongo/db/update/v2_log_builder.h | 200 +++++++++++++ src/mongo/db/update/v2_log_builder_test.cpp | 329 +++++++++++++++++++++ 8 files changed, 870 insertions(+), 13 deletions(-) create mode 100644 src/mongo/db/update/v2_log_builder.cpp create mode 100644 src/mongo/db/update/v2_log_builder.h create mode 100644 src/mongo/db/update/v2_log_builder_test.cpp diff --git a/src/mongo/bson/mutable/document.cpp b/src/mongo/bson/mutable/document.cpp index d042e2a458f..3e7853146a1 100644 --- a/src/mongo/bson/mutable/document.cpp +++ b/src/mongo/bson/mutable/document.cpp @@ -1697,6 +1697,14 @@ void Element::writeTo(BSONObjBuilder* const builder) const { } } +void Element::writeChildrenTo(BSONObjBuilder* const builder) const { + invariant(ok()); + const Document::Impl& impl = getDocument().getImpl(); + const ElementRep& thisRep = impl.getElementRep(_repIdx); + invariant(impl.getType(thisRep) == mongo::Object); + impl.writeChildren(_repIdx, builder); +} + void Element::writeArrayTo(BSONArrayBuilder* const builder) const { invariant(ok()); const Document::Impl& impl = getDocument().getImpl(); diff --git a/src/mongo/bson/mutable/element.h b/src/mongo/bson/mutable/element.h index 7949344f824..9ae13dda2af 100644 --- a/src/mongo/bson/mutable/element.h +++ b/src/mongo/bson/mutable/element.h @@ -385,11 +385,21 @@ public: // Serialization API. // - /** Write this Element to the provided object builder. */ + /** + * Writes this Element to the provided object builder. Note that if this element is not a root, + * then it gets wrapped inside an object. + */ void writeTo(BSONObjBuilder* builder) const; - /** Write this Element to the provided array builder. This Element must be of type - * mongo::Array. + /** + * Writes all the children of the current Element to the provided object builder. This Element + * must be of type mongo::Object. + */ + void writeChildrenTo(BSONObjBuilder* builder) const; + + /** + * Write this Element to the provided array builder. This Element must be of type + * mongo::Array. */ void writeArrayTo(BSONArrayBuilder* builder) const; diff --git a/src/mongo/db/update/SConscript b/src/mongo/db/update/SConscript index e17a2076b2e..7a9d1167d21 100644 --- a/src/mongo/db/update/SConscript +++ b/src/mongo/db/update/SConscript @@ -11,6 +11,7 @@ env.Library( 'path_support.cpp', 'storage_validation.cpp', 'v1_log_builder.cpp', + 'v2_log_builder.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', @@ -136,6 +137,7 @@ env.CppUnitTest( 'update_object_node_test.cpp', 'update_serialization_test.cpp', 'v1_log_builder_test.cpp', + 'v2_log_builder_test.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/bson/mutable/mutable_bson', diff --git a/src/mongo/db/update/document_diff_serialization.cpp b/src/mongo/db/update/document_diff_serialization.cpp index f978eb23e9e..a775d59bb5f 100644 --- a/src/mongo/db/update/document_diff_serialization.cpp +++ b/src/mongo/db/update/document_diff_serialization.cpp @@ -105,21 +105,21 @@ void ArrayDiffBuilder::serializeTo(BSONObjBuilder* output) const { void DocumentDiffBuilder::serializeTo(BSONObjBuilder* output) const { if (!_deletes.empty()) { - BSONObjBuilder subBob(output->subobjStart(StringData(&kDeleteSectionFieldName, 1))); + BSONObjBuilder subBob(output->subobjStart(kDeleteSectionFieldName)); for (auto&& del : _deletes) { subBob.append(del, false); } } if (!_updates.empty()) { - BSONObjBuilder subBob(output->subobjStart(StringData(&kUpdateSectionFieldName, 1))); + BSONObjBuilder subBob(output->subobjStart(kUpdateSectionFieldName)); for (auto&& update : _updates) { subBob.appendAs(update.second, update.first); } } if (!_inserts.empty()) { - BSONObjBuilder subBob(output->subobjStart(StringData(&kInsertSectionFieldName, 1))); + BSONObjBuilder subBob(output->subobjStart(kInsertSectionFieldName)); for (auto&& insert : _inserts) { subBob.appendAs(insert.second, insert.first); } @@ -200,7 +200,7 @@ boost::optional> ArrayDiff fieldName.size() > 1); const size_t idx = extractArrayIndex(fieldName.substr(1, fieldName.size())); - if (fieldName[0] == kUpdateSectionFieldName) { + if (fieldName[0] == kUpdateSectionFieldName[0]) { // It's an update. return {{idx, next}}; } else if (fieldName[0] == kSubDiffSectionFieldPrefix) { @@ -232,9 +232,13 @@ DocumentDiffReader::DocumentDiffReader(const Diff& diff) : _diff(diff) { int order; }; - const std::map sections{{kDeleteSectionFieldName, Section{&_deletes, 1}}, - {kUpdateSectionFieldName, Section{&_updates, 2}}, - {kInsertSectionFieldName, Section{&_inserts, 3}}, + static_assert(kDeleteSectionFieldName.size() == 1); + static_assert(kInsertSectionFieldName.size() == 1); + static_assert(kUpdateSectionFieldName.size() == 1); + + const std::map sections{{kDeleteSectionFieldName[0], Section{&_deletes, 1}}, + {kUpdateSectionFieldName[0], Section{&_updates, 2}}, + {kInsertSectionFieldName[0], Section{&_inserts, 3}}, {kSubDiffSectionFieldPrefix, Section{&_subDiffs, 4}}}; char prev = 0; diff --git a/src/mongo/db/update/document_diff_serialization.h b/src/mongo/db/update/document_diff_serialization.h index 8ade179a875..19f1a3f2e9c 100644 --- a/src/mongo/db/update/document_diff_serialization.h +++ b/src/mongo/db/update/document_diff_serialization.h @@ -47,9 +47,9 @@ enum DiffType : uint8_t { kDocument, kArray }; // Below are string constants used in the diff format. constexpr StringData kArrayHeader = "a"_sd; -constexpr char kDeleteSectionFieldName = 'd'; -constexpr char kInsertSectionFieldName = 'i'; -constexpr char kUpdateSectionFieldName = 'u'; +constexpr StringData kDeleteSectionFieldName = "d"_sd; +constexpr StringData kInsertSectionFieldName = "i"_sd; +constexpr StringData kUpdateSectionFieldName = "u"_sd; constexpr char kSubDiffSectionFieldPrefix = 's'; // 'l' for length. constexpr StringData kResizeSectionFieldName = "l"_sd; diff --git a/src/mongo/db/update/v2_log_builder.cpp b/src/mongo/db/update/v2_log_builder.cpp new file mode 100644 index 00000000000..45b49e24a13 --- /dev/null +++ b/src/mongo/db/update/v2_log_builder.cpp @@ -0,0 +1,304 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/update/v2_log_builder.h" + +#include "mongo/base/checked_cast.h" +#include "mongo/db/field_ref.h" +#include "mongo/db/update/update_oplog_entry_serialization.h" +#include "mongo/util/str.h" + +namespace mongo::v2_log_builder { +Node* ArrayNode::addChild(StringData fieldName, std::unique_ptr node) { + auto idx = str::parseUnsignedBase10Integer(fieldName); + invariant(idx); + + auto itr = children.insert({*idx, std::move(node)}); + invariant(itr.second); + return itr.first->second.get(); +} + +Node* DocumentNode::addChild(StringData fieldName, std::unique_ptr node) { + auto* nodePtr = node.get(); + auto result = children.insert({fieldName.toString(), std::move(node)}); + invariant(result.second); + StringData storedFieldName = result.first->first; + switch (nodePtr->type()) { + case (NodeType::kArray): + case (NodeType::kDocumentSubDiff): { + subDiffs.push_back({storedFieldName, checked_cast(nodePtr)}); + return nodePtr; + } + case (NodeType::kDocumentInsert): + case (NodeType::kInsert): { + inserts.push_back({storedFieldName, nodePtr}); + return nodePtr; + } + case (NodeType::kDelete): { + deletes.push_back({storedFieldName, checked_cast(nodePtr)}); + return nodePtr; + } + case (NodeType::kUpdate): { + updates.push_back({storedFieldName, checked_cast(nodePtr)}); + return nodePtr; + } + } + MONGO_UNREACHABLE; +} + +Status V2LogBuilder::logUpdatedField(const RuntimeUpdatePath& path, mutablebson::Element elt) { + auto newNode = std::make_unique(elt); + addNodeAtPath(path, + &_root, + std::move(newNode), + boost::none // Index of first created component is none since this was an update, + // not a create. + ); + return Status::OK(); +} + +Status V2LogBuilder::logCreatedField(const RuntimeUpdatePath& path, + int idxOfFirstNewComponent, + mutablebson::Element elt) { + auto newNode = std::make_unique(elt); + addNodeAtPath(path, &_root, std::move(newNode), idxOfFirstNewComponent); + return Status::OK(); +} + +Status V2LogBuilder::logCreatedField(const RuntimeUpdatePath& path, + int idxOfFirstNewComponent, + BSONElement elt) { + auto newNode = std::make_unique(elt); + addNodeAtPath(path, &_root, std::move(newNode), idxOfFirstNewComponent); + return Status::OK(); +} + +Status V2LogBuilder::logDeletedField(const RuntimeUpdatePath& path) { + addNodeAtPath(path, &_root, std::make_unique(), boost::none); + return Status::OK(); +} + +Node* V2LogBuilder::createInternalNode(InternalNode* parent, + const RuntimeUpdatePath& fullPath, + size_t pathIdx, + bool newPath) { + auto fieldName = fullPath.fieldRef().getPart(pathIdx); + + // If the child is an array index, then this node is an ArrayNode. + if (fullPath.size() > pathIdx + 1 && + fullPath.types()[pathIdx + 1] == RuntimeUpdatePath::ComponentType::kArrayIndex) { + invariant(!newPath); + return parent->addChild(fieldName, std::make_unique()); + } else if (newPath) { + return parent->addChild(fieldName, std::make_unique()); + } else { + return parent->addChild(fieldName, std::make_unique()); + } + MONGO_UNREACHABLE; +} + +void V2LogBuilder::addNodeAtPath(const RuntimeUpdatePath& path, + Node* root, + std::unique_ptr nodeToAdd, + boost::optional idxOfFirstNewComponent) { + addNodeAtPathHelper(path, 0, root, std::move(nodeToAdd), idxOfFirstNewComponent); +} + +void V2LogBuilder::addNodeAtPathHelper(const RuntimeUpdatePath& path, + size_t pathIdx, + Node* root, + std::unique_ptr nodeToAdd, + boost::optional idxOfFirstNewComponent) { + invariant(root->type() == NodeType::kArray || root->type() == NodeType::kDocumentSubDiff || + root->type() == NodeType::kDocumentInsert); + + // If our path is a.b.c.d and the first new component is "b" then we are dealing with a + // newly created path for components b, c and d. + const bool isNewPath = idxOfFirstNewComponent && (pathIdx >= *idxOfFirstNewComponent); + + auto* node = checked_cast(root); + const auto part = path.fieldRef().getPart(pathIdx); + if (pathIdx == static_cast(path.fieldRef().numParts() - 1)) { + node->addChild(part, std::move(nodeToAdd)); + return; + } + + if (auto* child = node->getChild(part)) { + addNodeAtPathHelper(path, pathIdx + 1, child, std::move(nodeToAdd), idxOfFirstNewComponent); + } else { + auto newNode = createInternalNode(node, path, pathIdx, isNewPath); + addNodeAtPathHelper( + path, pathIdx + 1, newNode, std::move(nodeToAdd), idxOfFirstNewComponent); + } +} + +namespace { +void appendElementToBuilder(stdx::variant elem, + StringData fieldName, + BSONObjBuilder* builder) { + stdx::visit( + visit_helper::Overloaded{ + [&](const mutablebson::Element& element) { + if (element.hasValue()) { + builder->appendAs(element.getValue(), fieldName); + } else if (element.getType() == BSONType::Object) { + BSONObjBuilder subBuilder(builder->subobjStart(fieldName)); + element.writeChildrenTo(&subBuilder); + } else { + invariant(element.getType() == BSONType::Array); + BSONArrayBuilder subBuilder(builder->subarrayStart(fieldName)); + element.writeArrayTo(&subBuilder); + } + }, + [&](BSONElement element) { builder->appendAs(element, fieldName); }}, + elem); +} +void serializeNewlyCreatedDocument(DocumentNode* const node, BSONObjBuilder* out) { + for (auto&& [fieldName, child] : node->inserts) { + if (child->type() == NodeType::kInsert) { + appendElementToBuilder(checked_cast(child)->elt, fieldName, out); + continue; + } + + BSONObjBuilder childBuilder(out->subobjStart(fieldName)); + serializeNewlyCreatedDocument(checked_cast(child), &childBuilder); + } +} + +// Mutually recursive with writeArrayDiff(). +void writeSubNodeHelper(InternalNode* node, BSONObjBuilder* builder); + +void writeArrayDiff(const ArrayNode& node, BSONObjBuilder* builder) { + builder->append(doc_diff::kArrayHeader, true); + for (auto&& [idx, child] : node.children) { + auto idxAsStr = std::to_string(idx); + switch (child->type()) { + case (NodeType::kUpdate): { + const auto& valueNode = checked_cast(*child); + appendElementToBuilder( + valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, builder); + break; + } + case (NodeType::kInsert): { + const auto& valueNode = checked_cast(*child); + appendElementToBuilder( + valueNode.elt, doc_diff::kUpdateSectionFieldName + idxAsStr, builder); + break; + } + case (NodeType::kDocumentInsert): { + // This represents that the array element is being created which has a sub-object. + // + // For example {$set: {"a.0.c": 1}} when the input document is {a: []}. Here we need + // to create the array element at '0', then sub document 'c'. + BSONObjBuilder childBuilder = + builder->subobjStart(doc_diff::kUpdateSectionFieldName + idxAsStr); + serializeNewlyCreatedDocument(checked_cast(child.get()), + &childBuilder); + break; + } + case (NodeType::kDocumentSubDiff): + case (NodeType::kArray): { + InternalNode* subNode = checked_cast(child.get()); + BSONObjBuilder childBuilder = builder->subobjStart( + std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + idxAsStr); + writeSubNodeHelper(subNode, &childBuilder); + break; + } + case (NodeType::kDelete): { + MONGO_UNREACHABLE; + } + } + } +} + +void writeDocumentDiff(const DocumentNode& node, BSONObjBuilder* builder) { + if (!node.deletes.empty()) { + BSONObjBuilder subBob(builder->subobjStart(doc_diff::kDeleteSectionFieldName)); + for (auto&& [fieldName, node] : node.deletes) { + // The deletes are logged in the form {fieldName: false} in $v:2 format. + subBob.append(fieldName, false); + } + } + if (!node.updates.empty()) { + BSONObjBuilder subBob(builder->subobjStart(doc_diff::kUpdateSectionFieldName)); + for (auto&& [fieldName, node] : node.updates) { + appendElementToBuilder(node->elt, fieldName, &subBob); + } + } + + if (!node.inserts.empty()) { + BSONObjBuilder insertBob(builder->subobjStart(doc_diff::kInsertSectionFieldName)); + for (auto&& [fieldName, child] : node.inserts) { + if (child->type() == NodeType::kInsert) { + appendElementToBuilder( + checked_cast(child)->elt, fieldName, &insertBob); + continue; + } + // This represents a new document. While the modifier-style update system + // was capable of writing paths which would implicitly create new + // documents, there is no equivalent in $v: 2 updates. + // + // For example {$set: {"a.b.c": 1}} would create document 'a' and 'a.b' if + // necessary. + // + // Since $v:2 entries don't have this capability, we instead build the new + // object and log it as an insert. For example, applying the above $set on + // document {a: {}} will be logged as an insert of the value {b: {c: 1}} on + // document 'a'. + invariant(child->type() == NodeType::kDocumentInsert); + BSONObjBuilder subBob = insertBob.subobjStart(fieldName); + serializeNewlyCreatedDocument(checked_cast(child), &subBob); + } + } + + for (auto&& [fieldName, child] : node.subDiffs) { + BSONObjBuilder childBuilder = + builder->subobjStart(std::string(1, doc_diff::kSubDiffSectionFieldPrefix) + fieldName); + writeSubNodeHelper(child, &childBuilder); + } +} + +void writeSubNodeHelper(InternalNode* node, BSONObjBuilder* builder) { + if (node->type() == NodeType::kArray) { + writeArrayDiff(*checked_cast(node), builder); + } else { + invariant(node->type() == NodeType::kDocumentSubDiff || + node->type() == NodeType::kDocumentInsert); + writeDocumentDiff(*checked_cast(node), builder); + } +} +} // namespace + + +BSONObj V2LogBuilder::serialize() const { + BSONObjBuilder topBuilder; + writeDocumentDiff(_root, &topBuilder); + return update_oplog_entry::makeDeltaOplogEntry(topBuilder.obj()); +} +} // namespace mongo::v2_log_builder \ No newline at end of file diff --git a/src/mongo/db/update/v2_log_builder.h b/src/mongo/db/update/v2_log_builder.h new file mode 100644 index 00000000000..d0553cb54b3 --- /dev/null +++ b/src/mongo/db/update/v2_log_builder.h @@ -0,0 +1,200 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/status.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/mutable/document.h" +#include "mongo/db/update/document_diff_serialization.h" +#include "mongo/db/update/log_builder_interface.h" +#include "mongo/db/update/runtime_update_path.h" +#include "mongo/util/string_map.h" + +namespace mongo { +namespace v2_log_builder { +/** + * These are structs for a "diff tree" that is constructed while the update is applied. There are + * two types of internal nodes: Document nodes and Array nodes. All other node types are always + * leaves. + * + * When the update is complete, the diff tree is converted into a $v: 2 oplog entry. + */ +enum class NodeType { kDocumentSubDiff, kDocumentInsert, kArray, kDelete, kUpdate, kInsert }; + +struct Node { + virtual ~Node(){}; + virtual NodeType type() const = 0; +}; + + +/** + * This class represents insertion of a BSONElement or mutablebson Element. Note that + * 'DocumentInsertionNode' also repesent an insert for the cases where an object is created + * implicity. + */ +struct InsertElement : public Node { + InsertElement(mutablebson::Element el) : elt(el) {} + InsertElement(BSONElement el) : elt(el) {} + + NodeType type() const override { + return NodeType::kInsert; + } + stdx::variant elt; +}; + +struct UpdateNode : public Node { + UpdateNode(mutablebson::Element el) : elt(el) {} + + NodeType type() const override { + return NodeType::kUpdate; + } + mutablebson::Element elt; +}; + +struct DeleteNode : public Node { + NodeType type() const override { + return NodeType::kDelete; + } +}; + +// Struct representing non-leaf node. +struct InternalNode : public Node { + virtual Node* addChild(StringData fieldName, std::unique_ptr node) = 0; + virtual Node* getChild(StringData fieldName) const = 0; +}; + +struct DocumentNode : public InternalNode { + Node* addChild(StringData fieldName, std::unique_ptr node) override; + + Node* getChild(StringData fieldName) const override { + auto it = children.find(fieldName.toString()); + return (it != children.end()) ? it->second.get() : nullptr; + } + + // We store the raw pointer to each of the child node so that we don't have to look up in + // 'children' map every time. Note that the field names of these modifications will reference + // the field name stored in 'children'. + std::vector> updates; + std::vector> deletes; + std::vector> inserts; + std::vector> subDiffs; + + // We use std::unordered_map here for pointer stability on keys (field names) when a rehash + // happens. + stdx::unordered_map, StringMapHasher, StringMapEq> children; +}; + +// Indicates that the document this node represents was created as part of the update. +// +// E.g. applying the update {$set: {"a.b.c": "foo"}} on document {} will create sub-documents +// at paths "a" and "a.b". +struct DocumentInsertionNode : public DocumentNode { + NodeType type() const override { + return NodeType::kDocumentInsert; + } +}; + +// Indicates a Document internal node which is already in the pre-image document. +struct DocumentSubDiffNode : public DocumentNode { + NodeType type() const override { + return NodeType::kDocumentSubDiff; + } +}; + +struct ArrayNode : public InternalNode { + Node* addChild(StringData fieldName, std::unique_ptr node) override; + + virtual Node* getChild(StringData fieldName) const override { + auto idx = str::parseUnsignedBase10Integer(fieldName); + invariant(idx); + auto it = children.find(*idx); + return (it != children.end()) ? it->second.get() : nullptr; + } + + NodeType type() const override { + return NodeType::kArray; + } + + // The ordering of this map is significant. We are expected to serialize array indexes in + // numeric ascending order (as opposed to "stringified" order where "11" < "8"). + std::map> children; +}; + +/** + * A log builder which can produce $v:2 oplog entries. + * + * This log builder accumulates updates, creates and deletes, and stores them in a tree. When the + * update is done and serialize() is called, the tree is converted into a $v:2 oplog entry. Note + * that we don't need a pre-image for building the oplog. + */ +class V2LogBuilder : public LogBuilderInterface { +public: + /** + * Overload methods from the LogBuilder interface. + */ + Status logUpdatedField(const RuntimeUpdatePath& path, mutablebson::Element elt) override; + Status logCreatedField(const RuntimeUpdatePath& path, + int idxOfFirstNewComponent, + mutablebson::Element elt) override; + Status logCreatedField(const RuntimeUpdatePath& path, + int idxOfFirstNewComponent, + BSONElement elt) override; + Status logDeletedField(const RuntimeUpdatePath& path) override; + + /** + * Converts the in-memory tree to a $v:2 delta oplog entry. + */ + BSONObj serialize() const override; + +private: + // Helpers for maintaining/updating the tree. + Node* createInternalNode(InternalNode* parent, + const RuntimeUpdatePath& fullPath, + size_t pathIdx, + bool newPath); + + // Helpers for adding nodes at a certain path. Returns false if the path was invalid/did + // not exist. + void addNodeAtPathHelper(const RuntimeUpdatePath& path, + size_t pathIdx, + Node* root, + std::unique_ptr nodeToAdd, + boost::optional idxOfFirstNewComponent); + + void addNodeAtPath(const RuntimeUpdatePath& path, + Node* root, + std::unique_ptr nodeToAdd, + boost::optional idxOfFirstNewComponent); + + // Root of the tree. + DocumentSubDiffNode _root; +}; +} // namespace v2_log_builder +} // namespace mongo diff --git a/src/mongo/db/update/v2_log_builder_test.cpp b/src/mongo/db/update/v2_log_builder_test.cpp new file mode 100644 index 00000000000..37e82f48061 --- /dev/null +++ b/src/mongo/db/update/v2_log_builder_test.cpp @@ -0,0 +1,329 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/update/v2_log_builder.h" + +#include "mongo/base/status.h" +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/mutable/mutable_bson_test_utils.h" +#include "mongo/db/json.h" +#include "mongo/unittest/death_test.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/safe_num.h" + +namespace mongo::v2_log_builder { +namespace { +namespace mmb = mongo::mutablebson; + +/** + * Given a FieldRef, creates a RuntimeUpdatePath based on it, assuming that every numeric component + * is an array index. + */ +RuntimeUpdatePath makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes(StringData path) { + FieldRef fr(path); + std::vector types; + + for (int i = 0; i < fr.numParts(); ++i) { + types.push_back(fr.isNumericPathComponentStrict(i) + ? RuntimeUpdatePath::ComponentType::kArrayIndex + : RuntimeUpdatePath::ComponentType::kFieldName); + } + return RuntimeUpdatePath(fr, std::move(types)); +} + +/** + * Given a FieldRef, creates a RuntimeUpdatePath based on it, assuming that every component is a + * field name. + */ +RuntimeUpdatePath makeRuntimeUpdatePathAssumeAllComponentsFieldNames(std::string path) { + FieldRef fieldRef(path); + return RuntimeUpdatePath( + std::move(fieldRef), + std::vector( + fieldRef.numParts(), RuntimeUpdatePath::ComponentType::kFieldName)); +} + +TEST(V2LogBuilder, UpdateFieldWithTopLevelMutableBsonElement) { + mmb::Document doc; + // Element is an array. Modifying a sub-element of mmb::Element does not store the data + // serialized. So we need to call Element::writeToArray() in those cases to serialize the array. + // We explicity modify the sub-element so that this logic can be tested. + const mmb::Element eltArr = doc.makeElementArray("", + BSON_ARRAY(1 << BSON("sub" + << "obj") + << 3)); + ASSERT_OK(eltArr.leftChild().setValueString("val")); + + // Element is an obj. Modifying a sub-element of mmb::Element does not store the data + // serialized. So we need to call Element::writeChildren() in those cases to serialize the + // object. We explicity modify the sub-element so that this logic can be tested. + mmb::Element eltObj = doc.makeElementObject("", fromjson("{arr: [{a: [0]}, 2]}")); + ASSERT_OK(eltObj.leftChild().rightChild().setValueString("val")); + + // Element is a scalar. + const mmb::Element eltScalar = doc.makeElementString("c.d", "val"); + ASSERT_TRUE(eltScalar.ok()); + + V2LogBuilder lb; + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b"), eltArr)); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.d"), eltObj)); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.e"), eltScalar)); + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson("{ $v: 2, diff: { sa: {u: {b: ['val', {sub: 'obj'}, 3]}}, sc: {u: {d: " + "{arr: [{a: [0]}, 'val']}, e: 'val'}} }}"), + lb.serialize()); +} + +TEST(V2LogBuilder, UpdateFieldMutableBson) { + auto storage = fromjson("{subArray: [{a: [0]}], array: [{}], scalar: 'val'}"); + mmb::Document doc( + fromjson("{topLevel: {obj: {subArray: 1}, array: [[], [0], {}], scalar: 'val'}}")); + + // Element is an obj. Modifying a sub-element of mmb::Element does not store the data + // serialized. So we need to call Element::writeChildren() in those cases to serialize the + // object. We explicity modify the sub-element so that this logic can be tested. + mmb::Element eltObj = doc.root().leftChild().leftChild(); + ASSERT_OK(eltObj.leftChild().setValueBSONElement(storage["subArray"])); + + // Element is an array. + const mmb::Element eltArr = eltObj.rightSibling(); + ASSERT_OK(eltArr.leftChild().setValueBSONElement(storage["array"])); + + // Element is a scalar. + const mmb::Element eltScalar = eltArr.rightSibling(); + ASSERT_TRUE(eltScalar.ok()); + + V2LogBuilder lb; + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b"), eltArr)); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.d"), eltObj)); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("c.e"), eltScalar)); + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson("{ $v: 2, diff: { sa: {u: {b: [[{}], [0], {}]}}, sc: {u: {d: " + "{subArray: [{a: [0]}]}, e: 'val'}} }}"), + lb.serialize()); +} + +TEST(V2LogBuilder, CreateFieldBSONElt) { + BSONObj storage = fromjson("{arr: ['v','a','l'], obj: {}, scalar: 2}"); + V2LogBuilder lb; + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.d"), 0, storage["arr"])); + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.c.e"), 0, storage["obj"])); + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.c"), 0, storage["scalar"])); + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson("{ $v: 2, diff: {i : {a: {b: {d: ['v','a','l'], c: 2}, c: {e: {}}} }}}"), + lb.serialize()); +} + +TEST(V2LogBuilder, CreateFieldMutableBson) { + mmb::Document doc; + V2LogBuilder lb; + + const mmb::Element elt_ab = doc.makeElementInt("a.b", 1); + ASSERT_TRUE(elt_ab.ok()); + ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b"), + 0, // idxOfFirstNewComponent (unused) + elt_ab)); + ASSERT_BSONOBJ_BINARY_EQ(mongo::fromjson("{ $v: 2, diff: { i: {a: {b: 1}}}}"), lb.serialize()); +} + +TEST(V2LogBuilder, CreateFieldMergeInOrder) { + mmb::Document doc; + const mmb::Element eltScalar = doc.makeElementInt("scalar", 1); + const mmb::Element eltObj = doc.makeElementObject("obj", fromjson("{arr: [{a: [0]}]}")); + ASSERT_TRUE(eltObj.ok()); + ASSERT_TRUE(eltScalar.ok()); + + // Test where only a 'root' object exists and logCreateField() needs to create sub-objects + // recursively. + const int idxOfFirstNewComponent = 1; + V2LogBuilder lb; + ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.d"), + idxOfFirstNewComponent, + eltScalar)); + ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.b.a"), + idxOfFirstNewComponent, + eltScalar)); + ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.c.b"), + idxOfFirstNewComponent, + eltObj)); + ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.c.e"), + idxOfFirstNewComponent, + eltScalar)); + ASSERT_OK(lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("root.a"), + idxOfFirstNewComponent, + eltObj)); + ASSERT_BSONOBJ_BINARY_EQ(mongo::fromjson("{ $v: 2, diff: { sroot: {i: {d: 1, b: {a: 1}, c: {b: " + "{arr: [{a: [0]}]}, e: 1}, a: {arr: [{a: [0]}]}} }}}"), + lb.serialize()); +} + +TEST(V2LogBuilder, BasicDelete) { + mmb::Document doc; + V2LogBuilder lb; + ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x"))); + ASSERT_BSONOBJ_BINARY_EQ(mongo::fromjson("{ $v: 2, diff: { d: {x: false}}}"), lb.serialize()); +} + +TEST(V2LogBuilder, VerifyDeletesOrder) { + mmb::Document doc; + V2LogBuilder lb; + + ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.c"))); + ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b"))); + ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.z"))); + ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.y"))); + + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson( + "{ $v: 2, diff: { sa: {d: {c: false, b: false}}, sx: {d: {z: false, y: false}} }}"), + lb.serialize()); +} + +TEST(V2LogBuilder, WithAllModificationTypes) { + mmb::Document doc; + V2LogBuilder lb; + + const mmb::Element elt_ab = doc.makeElementInt("", 1); + ASSERT_TRUE(elt_ab.ok()); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.c"), elt_ab)); + + const mmb::Element elt_cd = doc.makeElementInt("", 2); + ASSERT_TRUE(elt_cd.ok()); + + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.d.e"), 3, elt_cd)); + + ASSERT_OK(lb.logDeletedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.e"))); + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson("{ $v: 2, diff: {sa: {sb: {d: {e: false}, u: {c: 1}, sd: {i: {e: 2}} }}}}"), + lb.serialize()); +} + +TEST(V2LogBuilder, VerifySubDiffsAreOrderedCorrectly) { + mmb::Document doc; + V2LogBuilder lb; + + const mmb::Element elt_ab = doc.makeElementInt("a.b", 1); + ASSERT_TRUE(elt_ab.ok()); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.b.c"), elt_ab)); + ASSERT_OK( + lb.logCreatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("a.c.d"), 2, elt_ab)); + + const mmb::Element elt_xy = doc.makeElementInt("x.y", 2); + ASSERT_TRUE(elt_xy.ok()); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.y.z"), elt_xy)); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeAllComponentsFieldNames("x.z.a"), elt_xy)); + + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson("{ $v: 2, diff: { sa: {sb: {u: {c: 1}}, sc: {i: {d: 1}}}, " + "sx: {sy: {u: {z: 2}}, sz: {u: {a: 2}}} }}"), + lb.serialize()); +} + +TEST(V2LogBuilder, ArrayModifications) { + mmb::Document doc; + V2LogBuilder lb; + + const mmb::Element elt_ab = doc.makeElementInt("", 1); + ASSERT_TRUE(elt_ab.ok()); + ASSERT_OK(lb.logUpdatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.update"), elt_ab)); + + // Element is an obj. + const mmb::Element eltObj = doc.makeElementObject("", fromjson("{arr: [{a: [0]}]}")); + ASSERT_TRUE(eltObj.ok()); + + const mmb::Element eltScalar = doc.makeElementInt("", 2); + ASSERT_TRUE(eltScalar.ok()); + + // The order of the update of array elements need not be maintained by the caller. + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.10.insert.a"), 2, eltScalar)); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.3"), elt_ab)); + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.insert.a"), 3, eltScalar)); + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.insert.f"), 3, eltScalar)); + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.1"), 1, eltObj)); + ASSERT_OK( + lb.logDeletedField(makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.c.del"))); + + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson("{$v: 2, diff: {sa: {a: true, s0: {sc: {d: {del: false}, u: {update: 1}, " + "i: {insert: {a: 2, f: 2}}}}, u1: {arr: [{a: [0]}]}, u3: 1, s10: {i: " + "{insert: {a: 2}}} }}}"), + lb.serialize()); +} + +TEST(V2LogBuilder, ImplicityArrayElementCreationAllowed) { + mmb::Document doc; + V2LogBuilder lb; + + const mmb::Element elt = doc.makeElementInt("", 1); + ASSERT_TRUE(elt.ok()); + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.10.b.a"), 1, elt)); + ASSERT_OK(lb.logCreatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.2.b.a"), 1, elt)); + ASSERT_OK( + lb.logUpdatedField(makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.0.b.a"), elt)); + + ASSERT_BSONOBJ_BINARY_EQ( + mongo::fromjson("{$v: 2, diff: {sa: {a: true, s0: {sb: {u: {a: 1}}}, u2: " + "{b: {a: 1}}, u10: {b: {a: 1}} }}}"), + lb.serialize()); +} + +DEATH_TEST(V2LogBuilder, ImplicityArrayCreationDisallowed, "invariant") { + mmb::Document doc; + V2LogBuilder lb; + + const mmb::Element elt = doc.makeElementInt("", 1); + ASSERT_TRUE(elt.ok()); + auto status = lb.logCreatedField( + makeRuntimeUpdatePathAssumeNumericComponentsAreIndexes("a.10.insert.a"), 0, elt); +} + +} // namespace +} // namespace mongo::v2_log_builder -- cgit v1.2.1