diff options
author | Arun Banala <arun.banala@mongodb.com> | 2020-06-11 10:47:55 +0100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-06-17 12:57:47 +0000 |
commit | 069a2a9be9845991c8d6134e1efb56c0b85ab10d (patch) | |
tree | 5994fcbc533199c50b704011fda8610859049c6e | |
parent | 1edd4798f8e72f226ad69269a8b0154b247a8049 (diff) | |
download | mongo-069a2a9be9845991c8d6134e1efb56c0b85ab10d.tar.gz |
SERVER-47725 Implement algorithm for diffing BSON objects
-rw-r--r-- | src/mongo/db/update/SConscript | 4 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_calculator.cpp | 170 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_calculator.h | 44 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_calculator_test.cpp | 228 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_serialization.cpp | 1 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_serialization.h | 58 | ||||
-rw-r--r-- | src/mongo/db/update/document_diff_serialization_test.cpp | 43 |
7 files changed, 545 insertions, 3 deletions
diff --git a/src/mongo/db/update/SConscript b/src/mongo/db/update/SConscript index f240694edbf..47073586e53 100644 --- a/src/mongo/db/update/SConscript +++ b/src/mongo/db/update/SConscript @@ -26,10 +26,11 @@ env.Library( 'addtoset_node.cpp', 'arithmetic_node.cpp', 'array_culling_node.cpp', - 'document_diff_serialization.cpp', 'bit_node.cpp', 'compare_node.cpp', 'current_date_node.cpp', + 'document_diff_calculator.cpp', + 'document_diff_serialization.cpp', 'modifier_node.cpp', 'modifier_table.cpp', 'object_replace_executor.cpp', @@ -78,6 +79,7 @@ env.CppUnitTest( 'bit_node_test.cpp', 'compare_node_test.cpp', 'current_date_node_test.cpp', + 'document_diff_calculator_test.cpp', 'document_diff_serialization_test.cpp', 'field_checker_test.cpp', 'log_builder_test.cpp', diff --git a/src/mongo/db/update/document_diff_calculator.cpp b/src/mongo/db/update/document_diff_calculator.cpp new file mode 100644 index 00000000000..7019d4a675c --- /dev/null +++ b/src/mongo/db/update/document_diff_calculator.cpp @@ -0,0 +1,170 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/db/update/document_diff_calculator.h" + +namespace mongo::doc_diff { +namespace { + +// Note: This function is mutually with computeArrayDiff() and computeDocDiff(). +template <class DiffBuilder, class T> +void calculateSubDiffHelper(const BSONElement& preVal, + const BSONElement& postVal, + T fieldIdentifier, + DiffBuilder* builder); + +bool computeArrayDiff(const BSONObj& pre, + const BSONObj& post, + doc_diff::ArrayDiffBuilder* diffBuilder) { + auto preItr = BSONObjIterator(pre); + auto postItr = BSONObjIterator(post); + const size_t postObjSize = static_cast<size_t>(post.objsize()); + size_t nFieldsInPostArray = 0; + for (; preItr.more() && postItr.more(); ++preItr, ++postItr, ++nFieldsInPostArray) { + // Bailout if the generated diff so far is larger than the 'post' object. + if (postObjSize < diffBuilder->computeApproxSize()) { + return false; + } + if (!(*preItr).binaryEqual(*postItr)) { + // If both are arrays or objects, then recursively compute the diff of the respective + // array or object. + if ((*preItr).type() == (*postItr).type() && + ((*preItr).type() == BSONType::Object || (*preItr).type() == BSONType::Array)) { + calculateSubDiffHelper(*preItr, *postItr, nFieldsInPostArray, diffBuilder); + } else { + diffBuilder->addUpdate(nFieldsInPostArray, *postItr); + } + } + } + + // When we reach here, only one of postItr or preItr can have more fields. If postItr has more + // fields, we need to add all the remaining fields. + for (; postItr.more(); ++postItr, ++nFieldsInPostArray) { + diffBuilder->addUpdate(nFieldsInPostArray, *postItr); + } + + // If preItr has more fields, we can ignore the remaining fields, since we only need to do a + // resize operation. + if (preItr.more()) { + diffBuilder->setResize(nFieldsInPostArray); + } + return postObjSize > diffBuilder->computeApproxSize(); +} + +bool computeDocDiff(const BSONObj& pre, + const BSONObj& post, + doc_diff::DocumentDiffBuilder* diffBuilder) { + BSONObjIterator preItr(pre); + BSONObjIterator postItr(post); + const size_t postObjSize = static_cast<size_t>(post.objsize()); + std::set<StringData> deletes; + while (preItr.more() && postItr.more()) { + auto preVal = *preItr; + auto postVal = *postItr; + + // Bailout if the generated diff so far is larger than the 'post' object. + if (postObjSize < diffBuilder->computeApproxSize()) { + return false; + } + if (preVal.fieldNameStringData() == postVal.fieldNameStringData()) { + if (preVal.binaryEqual(postVal)) { + // They're identical. Move on. + } else if (preVal.type() == postVal.type() && + (preVal.type() == BSONType::Object || preVal.type() == BSONType::Array)) { + // Both are either arrays or objects, recursively compute the diff of the respective + // array or object. + calculateSubDiffHelper(preVal, postVal, preVal.fieldNameStringData(), diffBuilder); + } else { + // Any other case, just replace with the 'postVal'. + diffBuilder->addUpdate((*preItr).fieldNameStringData(), postVal); + } + preItr.next(); + postItr.next(); + } else { + // If the 'preVal' field name does not exist in the 'post' object then, just remove it. + // If it present, we do nothing for now, since the field gets inserted later. + deletes.insert(preVal.fieldNameStringData()); + preItr.next(); + } + } + + // When we reach here, only one of postItr or preItr can have more fields. Record remaining + // fields in preItr as removals. + for (; preItr.more(); preItr.next()) { + // Note that we don't need to record these into the 'deletes' set because there are no more + // fields in the post image. + invariant(!postItr.more()); + diffBuilder->addDelete((*preItr).fieldNameStringData()); + } + + // Record remaining fields in postItr as creates. + for (; postItr.more(); postItr.next()) { + auto fieldName = (*postItr).fieldNameStringData(); + diffBuilder->addInsert(fieldName, *postItr); + deletes.erase(fieldName); + } + for (auto&& deleteField : deletes) { + diffBuilder->addDelete(deleteField); + } + return postObjSize > diffBuilder->computeApproxSize(); +} + +template <class DiffBuilder, class T> +void calculateSubDiffHelper(const BSONElement& preVal, + const BSONElement& postVal, + T fieldIdentifier, + DiffBuilder* builder) { + if (preVal.type() == BSONType::Object) { + auto subDiffBuilder = builder->startSubObjDiff(fieldIdentifier); + const auto hasSubDiff = + computeDocDiff(preVal.embeddedObject(), postVal.embeddedObject(), &subDiffBuilder); + if (!hasSubDiff) { + subDiffBuilder.abandon(); + builder->addUpdate(fieldIdentifier, postVal); + } + } else { + auto subDiffBuilder = builder->startSubArrDiff(fieldIdentifier); + const auto hasSubDiff = + computeArrayDiff(preVal.embeddedObject(), postVal.embeddedObject(), &subDiffBuilder); + if (!hasSubDiff) { + subDiffBuilder.abandon(); + builder->addUpdate(fieldIdentifier, postVal); + } + } +} +} // namespace + +boost::optional<doc_diff::Diff> computeDiff(const BSONObj& pre, const BSONObj& post) { + doc_diff::DocumentDiffBuilder diffBuilder; + bool hasDiff = computeDocDiff(pre, post, &diffBuilder); + return hasDiff ? boost::optional<doc_diff::Diff>(diffBuilder.release()) : boost::none; +} +} // namespace mongo::doc_diff diff --git a/src/mongo/db/update/document_diff_calculator.h b/src/mongo/db/update/document_diff_calculator.h new file mode 100644 index 00000000000..62436511252 --- /dev/null +++ b/src/mongo/db/update/document_diff_calculator.h @@ -0,0 +1,44 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/bson/bsonobj.h" +#include "mongo/db/update/document_diff_serialization.h" + + +namespace mongo::doc_diff { +/** + * Returns the delta between 'pre' and 'post' by recursively iterating the object. If the size + * of the computed delta is larger than the 'post' object then the function returns + * 'boost::none'. + */ +boost::optional<doc_diff::Diff> computeDiff(const BSONObj& pre, const BSONObj& post); + +}; // namespace mongo::doc_diff diff --git a/src/mongo/db/update/document_diff_calculator_test.cpp b/src/mongo/db/update/document_diff_calculator_test.cpp new file mode 100644 index 00000000000..15cace74055 --- /dev/null +++ b/src/mongo/db/update/document_diff_calculator_test.cpp @@ -0,0 +1,228 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/bson/json.h" +#include "mongo/db/update/document_diff_calculator.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace { + +void assertBinaryEq(const BSONObj& objA, const BSONObj& objB) { + // This *MUST* check for binary equality, which is what we enforce between replica set + // members. Logical equality (through woCompare() or assertBinaryEq) is not strong enough. + if (!objA.binaryEqual(objB)) { + FAIL(str::stream() << "Objects not equal. Expected " << objA << " == " << objB); + } +} + +TEST(DocumentDiffTest, SameObjectsNoDiff) { + auto assertDiffEmpty = [](const BSONObj& doc) { + auto diff = doc_diff::computeDiff(doc, doc); + ASSERT(diff); + assertBinaryEq(*diff, BSONObj()); + }; + assertDiffEmpty(fromjson("{field1: 1}")); + assertDiffEmpty(fromjson("{field1: []}")); + assertDiffEmpty(fromjson("{field1: [{}]}")); + assertDiffEmpty(fromjson("{field1: [0, 1, 2, 3, 4]}")); + assertDiffEmpty(fromjson("{field1: null}")); + assertDiffEmpty(fromjson("{'0': [0]}")); + assertDiffEmpty(fromjson("{'0': [[{'0': [[{'0': [[]]} ]]}]]}")); +} + +TEST(DocumentDiffTest, LargeDelta) { + ASSERT_FALSE( + doc_diff::computeDiff(fromjson("{a: 1, b: 2, c: 3}"), fromjson("{A: 1, B: 2, C: 3}"))); + + // Inserting field at the beginning produces a large delta. + ASSERT_FALSE(doc_diff::computeDiff(fromjson("{b: 1, c: 1, d: 1}"), + fromjson("{a: 1, b: 1, c: 1, d: 1}"))); + + // Empty objects. + ASSERT_FALSE(doc_diff::computeDiff(BSONObj(), BSONObj())); + + // Full diff. + ASSERT_FALSE(doc_diff::computeDiff(fromjson("{b: 1}"), BSONObj())); + ASSERT_FALSE(doc_diff::computeDiff(BSONObj(), fromjson("{b: 1}"))); +} + +TEST(DocumentDiffTest, DeleteAndInsertFieldAtTheEnd) { + auto diff = doc_diff::computeDiff(fromjson("{a: 1, b: 2, c: 3, d: 4}"), + fromjson("{b: 2, c: 3, d: 4, a: 1}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{i: {a: 1}}")); +} + +TEST(DocumentDiffTest, DeletesRecordedInAscendingOrderOfFieldNames) { + auto diff = doc_diff::computeDiff(fromjson("{b: 1, a: 2, c: 3, d: 4, e: 'value'}"), + fromjson("{c: 3, d: 4, e: 'value'}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{d: {a: false, b: false}}")); + + // Delete at the end still follow the order. + diff = doc_diff::computeDiff( + fromjson("{b: 1, a: 2, c: 'value', d: 'value', e: 'value', g: 1, f: 1}"), + fromjson("{c: 'value', d: 'value', e: 'value'}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{d: {g: false, f: false, a: false, b: false}}")); +} + +TEST(DocumentDiffTest, DataTypeChangeRecorded) { + const auto objWithDoubleValue = + fromjson("{a: 1, b: 2, c: {subField1: 1, subField2: 3.0}, d: 4}"); + const auto objWithIntValue = fromjson("{a: 1, b: 2, c: {subField1: 1, subField2: 3}, d: 4}"); + const auto objWithLongValue = + fromjson("{a: 1, b: 2, c: {subField1: 1, subField2: NumberLong(3)}, d: 4}"); + auto diff = doc_diff::computeDiff(objWithDoubleValue, objWithIntValue); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {c: {u: {subField2: 3}} }}")); + + diff = doc_diff::computeDiff(objWithIntValue, objWithLongValue); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {c: {u: {subField2: NumberLong(3)}} }}")); + + diff = doc_diff::computeDiff(objWithLongValue, objWithDoubleValue); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {c: {u: {subField2: 3.0}} }}")); +} + +TEST(DocumentDiffTest, NullAndMissing) { + auto diff = doc_diff::computeDiff(fromjson("{a: null}"), fromjson("{}")); + ASSERT_FALSE(diff); + + diff = doc_diff::computeDiff(fromjson("{a: null, b: undefined, c: null, d: 'someVal'}"), + fromjson("{a: null, b: undefined, c: undefined, d: 'someVal'}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{u: {c: undefined}}")); +} + +TEST(DocumentDiffTest, FieldOrder) { + auto diff = doc_diff::computeDiff(fromjson("{a: 1, b: 2, c: {p: 1, q: 1, s: 1, r: 2}, d: 4}"), + fromjson("{a: 1, b: 2, c: {p: 1, q: 1, r: 2, s: 1}, d: 4}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {c: {i: {s: 1}} }}")); +} + +TEST(DocumentDiffTest, SimpleArrayPush) { + auto diff = doc_diff::computeDiff(fromjson("{field1: 'abcd', field2: [1, 2, 3]}"), + fromjson("{field1: 'abcd', field2: [1, 2, 3, 4]}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {field2: {a: true, '3': {u: 4}}}}")); +} + +TEST(DocumentDiffTest, NestedArray) { + auto diff = doc_diff::computeDiff(fromjson("{field1: 'abcd', field2: [1, 2, 3, [[2]]]}"), + fromjson("{field1: 'abcd', field2: [1, 2, 3, [[4]]]}")); + ASSERT(diff); + // When the sub-array delta is larger than the size of the sub-array, we record it as an update + // operation. + assertBinaryEq(*diff, fromjson("{s: {field2: {a: true, '3': {u: [[4]]}}}}")); + + diff = doc_diff::computeDiff( + fromjson("{field1: 'abcd', field2: [1, 2, 3, [1, 'longString', [2], 4, 5, 6], 5, 5, 5]}"), + fromjson("{field1: 'abcd', field2: [1, 2, 3, [1, 'longString', [4], 4], 5, 6]}")); + ASSERT(diff); + assertBinaryEq(*diff, + fromjson("{s: {field2: {a: true, l: 6, '3': {s: {a: true, l: 4, " + "'2': {u: [4]}}}, '5': {u: 6}} }}")); +} + +TEST(DocumentDiffTest, SubObjInSubArrayUpdateElements) { + auto diff = doc_diff::computeDiff( + fromjson("{field1: 'abcd', field2: [1, 2, 3, " + "{field3: ['veryLargeStringValueveryLargeStringValue', 2, 3, 4]}]}"), + fromjson("{field1: 'abcd', field2: [1, 2, 3, {'field3': " + "['veryLargeStringValueveryLargeStringValue', 2, 4, 3, " + "5]}]}")); + + ASSERT(diff); + assertBinaryEq(*diff, + fromjson("{s: {field2: {a: true, '3': {s: {s: {field3: {a: true, '2': " + "{u: 4}, '3': {u: 3}, '4': {u: 5}} }} }} }}")); +} + +TEST(DocumentDiffTest, SubObjInSubArrayDeleteElements) { + auto diff = doc_diff::computeDiff( + fromjson("{field1: 'abcd', field2: [1, 2, 3, {'field3': ['largeString', 2, 3, 4, 5]}]}"), + fromjson("{field1: 'abcd', field2: [1, 2, 3, {'field3': ['largeString', 2, 3, 5]}]}")); + ASSERT(diff); + assertBinaryEq(*diff, + fromjson("{s: {field2: {a: true, '3': {s: {s: {field3: {a: true, l: 4, '3': " + "{u: 5}} }} }} }}")); +} + +TEST(DocumentDiffTest, NestedSubObjs) { + auto diff = doc_diff::computeDiff( + fromjson("{level1Field1: 'abcd', level1Field2: {level2Field1: {level3Field1: {p: 1}, " + "level3Field2: {q: 1}}, level2Field2: 2}, level1Field3: 3}"), + fromjson("{level1Field1: 'abcd', level1Field2: {level2Field1: {level3Field1: {q: 1}, " + "level3Field2: {q: 1}}, level2Field2: 2}, level1Field3: '3'}")); + ASSERT(diff); + assertBinaryEq(*diff, + fromjson("{u: {level1Field3: '3'}, s: {level1Field2: {s: {'level2Field1': " + "{u: {level3Field1: {q: 1}}} }} }}")); +} + +TEST(DocumentDiffTest, SubArrayInSubArrayLargeDelta) { + auto diff = doc_diff::computeDiff( + fromjson("{field1: 'abcd', field2: [1, 2, 3, {field3: [2, 3, 4, 5]}]}"), + fromjson("{field1: 'abcd', field2: [1, 2, 3, {field3: [1, 2, 3, 4, 5]}]}")); + ASSERT(diff); + assertBinaryEq(*diff, + fromjson("{s: {field2: {a: true, '3': {u: {field3: [1, 2, 3, 4, 5]}}} }}")); +} + +TEST(DocumentDiffTest, SubObjInSubArrayLargeDelta) { + auto diff = + doc_diff::computeDiff(fromjson("{field1: [1, 2, 3, 4, 5, 6, {a: 1, b: 2, c: 3, d: 4}, 7]}"), + fromjson("{field1: [1, 2, 3, 4, 5, 6, {p: 1, q: 2}, 7]}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {field1: {a: true, '6': {u: {p: 1, q: 2} }} }}")); +} + +TEST(DocumentDiffTest, SubObjInSubObjLargeDelta) { + auto diff = doc_diff::computeDiff( + fromjson("{field: {p: 1, q: 2, r: {a: 1, b: 2, c: 3, 'd': 4}, s: 3}}"), + fromjson("{field: {p: 1, q: 2, r: {p: 1, q: 2}, s: 3}}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {field: {u: {r: {p: 1, q: 2} }} }}")); +} + +TEST(DocumentDiffTest, SubArrayInSubObjLargeDelta) { + auto diff = doc_diff::computeDiff(fromjson("{field: {p: 1, q: 2, r: [1, 3, 4, 5], s: 3}}"), + fromjson("{field: {p: 1, q: 2, r: [1, 2, 3, 4], s: 3}}")); + ASSERT(diff); + assertBinaryEq(*diff, fromjson("{s: {field: {u: {r: [1, 2, 3, 4]}} }}")); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/update/document_diff_serialization.cpp b/src/mongo/db/update/document_diff_serialization.cpp index 0ef9758dd24..1bec8a0313e 100644 --- a/src/mongo/db/update/document_diff_serialization.cpp +++ b/src/mongo/db/update/document_diff_serialization.cpp @@ -33,7 +33,6 @@ #include "mongo/util/str.h" #include "mongo/util/string_map.h" -#include "mongo/util/visit_helper.h" namespace mongo::doc_diff { namespace { diff --git a/src/mongo/db/update/document_diff_serialization.h b/src/mongo/db/update/document_diff_serialization.h index bbed014d72a..03acb763aea 100644 --- a/src/mongo/db/update/document_diff_serialization.h +++ b/src/mongo/db/update/document_diff_serialization.h @@ -31,6 +31,7 @@ #include "mongo/bson/bsonobjbuilder.h" #include "mongo/stdx/variant.h" +#include "mongo/util/visit_helper.h" // This file contains classes for serializing document diffs to a format that can be stored in the // oplog. Any code/machinery which manipulates document diffs should do so through these classes. @@ -127,6 +128,34 @@ public: _childSubDiffIndex = {}; } + size_t computeApproxSize() const { + // TODO SERVER-48602: We need to ensure that this function returns in O(1). Incrementally + // computing this size might be one option. + size_t size = sizeof(int) /* Size of object */ + 1 /* Type byte */ + kArrayHeader.size() + + 1 /* null terminator */ + 1 /*type byte */ + 1 /* bool size*/; + + if (_newSize) { + size += kResizeSectionFieldName.size() + 1 /* Null terminator */ + 1 /* Type byte */ + + sizeof(int) /* size of value */; + } + + for (auto&& [idx, modification] : _modifications) { + stdx::visit(visit_helper::Overloaded{[idx = idx, &size](const Diff& subDiff) { + size += sizeof(idx) + + kSubDiffSectionFieldName.size() + + subDiff.objsize() + 2; + }, + [idx = idx, &size](BSONElement elt) { + size += sizeof(idx) + + kUpdateSectionFieldName.size() + + elt.size() + 2; + }}, + modification); + } + + return size; + } + private: // The top-level of a diff is never an array diff. One can only construct an ArrayDiffBuilder // from a parent DocumentDiffBuilder. @@ -149,7 +178,7 @@ private: DiffBuilderBase* _parent = nullptr; friend class DocumentDiffBuilder; -}; +}; // namespace doc_diff /** * Class for building document diffs. @@ -227,6 +256,33 @@ public: _childSubDiffField = {}; } + size_t computeApproxSize() { + // TODO SERVER-48602: We need to ensure that this function returns in O(1). Incrementally + // computing this size might be one option. + size_t size = sizeof(int) /* Size of object */ + 1 /* Type byte */; + + if (!_updates.asTempObj().isEmpty()) { + size += 1 /* Type byte */ + kUpdateSectionFieldName.size() /* FieldName */ + + 1 /* Null terminator */ + _updates.bb().len() + 1 /* Null terminator */; + } + + if (!_inserts.asTempObj().isEmpty()) { + size += 1 /* Type byte */ + kInsertSectionFieldName.size() /* FieldName */ + + 1 /* Null terminator */ + _inserts.bb().len() + 1 /* Null terminator */; + } + + if (!_deletes.asTempObj().isEmpty()) { + size += 1 /* Type byte */ + kDeleteSectionFieldName.size() /* FieldName */ + + 1 /* Null terminator */ + _deletes.bb().len() + 1 /* Null terminator */; + } + + if (!_subDiffs.asTempObj().isEmpty()) { + size += 1 /* Type byte */ + kSubDiffSectionFieldName.size() /* FieldName */ + + 1 /* Null terminator */ + _subDiffs.bb().len() + 1 /* Null terminator */; + } + return size; + } + private: DocumentDiffBuilder(DiffBuilderBase* parent) : _parent(parent) {} diff --git a/src/mongo/db/update/document_diff_serialization_test.cpp b/src/mongo/db/update/document_diff_serialization_test.cpp index 2754cb5ddb7..8c830e2343c 100644 --- a/src/mongo/db/update/document_diff_serialization_test.cpp +++ b/src/mongo/db/update/document_diff_serialization_test.cpp @@ -343,5 +343,48 @@ TEST(DiffSerializationTest, SubArrayDiffAbandon) { fromjson("{d : {dField1: false, dField2: false}, u: {uField: 1}," "s: {arr2: {a: true, l: 5}}}")); } + +TEST(DiffSerializationTest, ValidateComputeApproxSize) { + DocumentDiffBuilder builder; + builder.addDelete("deleteField"); + builder.addInsert("insert", BSON("" << 4).firstElement()); + builder.addUpdate("update1", + BSON("" + << "update") + .firstElement()); + builder.addDelete(""); + { + // Ensure size of the sub-array diff is included. + auto subDiff = builder.startSubArrDiff("subArray"); + subDiff.setResize(5); + subDiff.addUpdate(2, BSON("" << 4).firstElement()); + subDiff.addUpdate(2, + BSON("" + << "value") + .firstElement()); + + auto subSubDiff = subDiff.startSubObjDiff(22); + subSubDiff.addInsert("insert2", + BSON("" + << "") + .firstElement()); + subSubDiff.addUpdate("update3", BSON("" << BSONNULL).firstElement()); + } + { + // Ensure size of the sub-object diff is included. + auto subDiff = builder.startSubObjDiff("subObj"); + subDiff.addUpdate("setArray", + BSON("" << BSON_ARRAY("val1" + << "val2" << 3)) + .firstElement()); + } + // Update with a sub-object. + builder.addUpdate("update4", BSON("" << BSON("nestedObj" << 4LL)).firstElement()); + + auto computedSize = builder.computeApproxSize(); + auto out = builder.release(); + ASSERT_EQ(computedSize, out.objsize()); +} + } // namespace } // namespace mongo::doc_diff |