summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArun Banala <arun.banala@mongodb.com>2020-06-11 10:47:55 +0100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-06-17 12:57:47 +0000
commit069a2a9be9845991c8d6134e1efb56c0b85ab10d (patch)
tree5994fcbc533199c50b704011fda8610859049c6e
parent1edd4798f8e72f226ad69269a8b0154b247a8049 (diff)
downloadmongo-069a2a9be9845991c8d6134e1efb56c0b85ab10d.tar.gz
SERVER-47725 Implement algorithm for diffing BSON objects
-rw-r--r--src/mongo/db/update/SConscript4
-rw-r--r--src/mongo/db/update/document_diff_calculator.cpp170
-rw-r--r--src/mongo/db/update/document_diff_calculator.h44
-rw-r--r--src/mongo/db/update/document_diff_calculator_test.cpp228
-rw-r--r--src/mongo/db/update/document_diff_serialization.cpp1
-rw-r--r--src/mongo/db/update/document_diff_serialization.h58
-rw-r--r--src/mongo/db/update/document_diff_serialization_test.cpp43
7 files changed, 545 insertions, 3 deletions
diff --git a/src/mongo/db/update/SConscript b/src/mongo/db/update/SConscript
index f240694edbf..47073586e53 100644
--- a/src/mongo/db/update/SConscript
+++ b/src/mongo/db/update/SConscript
@@ -26,10 +26,11 @@ env.Library(
'addtoset_node.cpp',
'arithmetic_node.cpp',
'array_culling_node.cpp',
- 'document_diff_serialization.cpp',
'bit_node.cpp',
'compare_node.cpp',
'current_date_node.cpp',
+ 'document_diff_calculator.cpp',
+ 'document_diff_serialization.cpp',
'modifier_node.cpp',
'modifier_table.cpp',
'object_replace_executor.cpp',
@@ -78,6 +79,7 @@ env.CppUnitTest(
'bit_node_test.cpp',
'compare_node_test.cpp',
'current_date_node_test.cpp',
+ 'document_diff_calculator_test.cpp',
'document_diff_serialization_test.cpp',
'field_checker_test.cpp',
'log_builder_test.cpp',
diff --git a/src/mongo/db/update/document_diff_calculator.cpp b/src/mongo/db/update/document_diff_calculator.cpp
new file mode 100644
index 00000000000..7019d4a675c
--- /dev/null
+++ b/src/mongo/db/update/document_diff_calculator.cpp
@@ -0,0 +1,170 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/update/document_diff_calculator.h"
+
+namespace mongo::doc_diff {
+namespace {
+
+// Note: This function is mutually with computeArrayDiff() and computeDocDiff().
+template <class DiffBuilder, class T>
+void calculateSubDiffHelper(const BSONElement& preVal,
+ const BSONElement& postVal,
+ T fieldIdentifier,
+ DiffBuilder* builder);
+
+bool computeArrayDiff(const BSONObj& pre,
+ const BSONObj& post,
+ doc_diff::ArrayDiffBuilder* diffBuilder) {
+ auto preItr = BSONObjIterator(pre);
+ auto postItr = BSONObjIterator(post);
+ const size_t postObjSize = static_cast<size_t>(post.objsize());
+ size_t nFieldsInPostArray = 0;
+ for (; preItr.more() && postItr.more(); ++preItr, ++postItr, ++nFieldsInPostArray) {
+ // Bailout if the generated diff so far is larger than the 'post' object.
+ if (postObjSize < diffBuilder->computeApproxSize()) {
+ return false;
+ }
+ if (!(*preItr).binaryEqual(*postItr)) {
+ // If both are arrays or objects, then recursively compute the diff of the respective
+ // array or object.
+ if ((*preItr).type() == (*postItr).type() &&
+ ((*preItr).type() == BSONType::Object || (*preItr).type() == BSONType::Array)) {
+ calculateSubDiffHelper(*preItr, *postItr, nFieldsInPostArray, diffBuilder);
+ } else {
+ diffBuilder->addUpdate(nFieldsInPostArray, *postItr);
+ }
+ }
+ }
+
+ // When we reach here, only one of postItr or preItr can have more fields. If postItr has more
+ // fields, we need to add all the remaining fields.
+ for (; postItr.more(); ++postItr, ++nFieldsInPostArray) {
+ diffBuilder->addUpdate(nFieldsInPostArray, *postItr);
+ }
+
+ // If preItr has more fields, we can ignore the remaining fields, since we only need to do a
+ // resize operation.
+ if (preItr.more()) {
+ diffBuilder->setResize(nFieldsInPostArray);
+ }
+ return postObjSize > diffBuilder->computeApproxSize();
+}
+
+bool computeDocDiff(const BSONObj& pre,
+ const BSONObj& post,
+ doc_diff::DocumentDiffBuilder* diffBuilder) {
+ BSONObjIterator preItr(pre);
+ BSONObjIterator postItr(post);
+ const size_t postObjSize = static_cast<size_t>(post.objsize());
+ std::set<StringData> deletes;
+ while (preItr.more() && postItr.more()) {
+ auto preVal = *preItr;
+ auto postVal = *postItr;
+
+ // Bailout if the generated diff so far is larger than the 'post' object.
+ if (postObjSize < diffBuilder->computeApproxSize()) {
+ return false;
+ }
+ if (preVal.fieldNameStringData() == postVal.fieldNameStringData()) {
+ if (preVal.binaryEqual(postVal)) {
+ // They're identical. Move on.
+ } else if (preVal.type() == postVal.type() &&
+ (preVal.type() == BSONType::Object || preVal.type() == BSONType::Array)) {
+ // Both are either arrays or objects, recursively compute the diff of the respective
+ // array or object.
+ calculateSubDiffHelper(preVal, postVal, preVal.fieldNameStringData(), diffBuilder);
+ } else {
+ // Any other case, just replace with the 'postVal'.
+ diffBuilder->addUpdate((*preItr).fieldNameStringData(), postVal);
+ }
+ preItr.next();
+ postItr.next();
+ } else {
+ // If the 'preVal' field name does not exist in the 'post' object then, just remove it.
+ // If it present, we do nothing for now, since the field gets inserted later.
+ deletes.insert(preVal.fieldNameStringData());
+ preItr.next();
+ }
+ }
+
+ // When we reach here, only one of postItr or preItr can have more fields. Record remaining
+ // fields in preItr as removals.
+ for (; preItr.more(); preItr.next()) {
+ // Note that we don't need to record these into the 'deletes' set because there are no more
+ // fields in the post image.
+ invariant(!postItr.more());
+ diffBuilder->addDelete((*preItr).fieldNameStringData());
+ }
+
+ // Record remaining fields in postItr as creates.
+ for (; postItr.more(); postItr.next()) {
+ auto fieldName = (*postItr).fieldNameStringData();
+ diffBuilder->addInsert(fieldName, *postItr);
+ deletes.erase(fieldName);
+ }
+ for (auto&& deleteField : deletes) {
+ diffBuilder->addDelete(deleteField);
+ }
+ return postObjSize > diffBuilder->computeApproxSize();
+}
+
+template <class DiffBuilder, class T>
+void calculateSubDiffHelper(const BSONElement& preVal,
+ const BSONElement& postVal,
+ T fieldIdentifier,
+ DiffBuilder* builder) {
+ if (preVal.type() == BSONType::Object) {
+ auto subDiffBuilder = builder->startSubObjDiff(fieldIdentifier);
+ const auto hasSubDiff =
+ computeDocDiff(preVal.embeddedObject(), postVal.embeddedObject(), &subDiffBuilder);
+ if (!hasSubDiff) {
+ subDiffBuilder.abandon();
+ builder->addUpdate(fieldIdentifier, postVal);
+ }
+ } else {
+ auto subDiffBuilder = builder->startSubArrDiff(fieldIdentifier);
+ const auto hasSubDiff =
+ computeArrayDiff(preVal.embeddedObject(), postVal.embeddedObject(), &subDiffBuilder);
+ if (!hasSubDiff) {
+ subDiffBuilder.abandon();
+ builder->addUpdate(fieldIdentifier, postVal);
+ }
+ }
+}
+} // namespace
+
+boost::optional<doc_diff::Diff> computeDiff(const BSONObj& pre, const BSONObj& post) {
+ doc_diff::DocumentDiffBuilder diffBuilder;
+ bool hasDiff = computeDocDiff(pre, post, &diffBuilder);
+ return hasDiff ? boost::optional<doc_diff::Diff>(diffBuilder.release()) : boost::none;
+}
+} // namespace mongo::doc_diff
diff --git a/src/mongo/db/update/document_diff_calculator.h b/src/mongo/db/update/document_diff_calculator.h
new file mode 100644
index 00000000000..62436511252
--- /dev/null
+++ b/src/mongo/db/update/document_diff_calculator.h
@@ -0,0 +1,44 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/db/update/document_diff_serialization.h"
+
+
+namespace mongo::doc_diff {
+/**
+ * Returns the delta between 'pre' and 'post' by recursively iterating the object. If the size
+ * of the computed delta is larger than the 'post' object then the function returns
+ * 'boost::none'.
+ */
+boost::optional<doc_diff::Diff> computeDiff(const BSONObj& pre, const BSONObj& post);
+
+}; // namespace mongo::doc_diff
diff --git a/src/mongo/db/update/document_diff_calculator_test.cpp b/src/mongo/db/update/document_diff_calculator_test.cpp
new file mode 100644
index 00000000000..15cace74055
--- /dev/null
+++ b/src/mongo/db/update/document_diff_calculator_test.cpp
@@ -0,0 +1,228 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/bson/json.h"
+#include "mongo/db/update/document_diff_calculator.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+
+void assertBinaryEq(const BSONObj& objA, const BSONObj& objB) {
+ // This *MUST* check for binary equality, which is what we enforce between replica set
+ // members. Logical equality (through woCompare() or assertBinaryEq) is not strong enough.
+ if (!objA.binaryEqual(objB)) {
+ FAIL(str::stream() << "Objects not equal. Expected " << objA << " == " << objB);
+ }
+}
+
+TEST(DocumentDiffTest, SameObjectsNoDiff) {
+ auto assertDiffEmpty = [](const BSONObj& doc) {
+ auto diff = doc_diff::computeDiff(doc, doc);
+ ASSERT(diff);
+ assertBinaryEq(*diff, BSONObj());
+ };
+ assertDiffEmpty(fromjson("{field1: 1}"));
+ assertDiffEmpty(fromjson("{field1: []}"));
+ assertDiffEmpty(fromjson("{field1: [{}]}"));
+ assertDiffEmpty(fromjson("{field1: [0, 1, 2, 3, 4]}"));
+ assertDiffEmpty(fromjson("{field1: null}"));
+ assertDiffEmpty(fromjson("{'0': [0]}"));
+ assertDiffEmpty(fromjson("{'0': [[{'0': [[{'0': [[]]} ]]}]]}"));
+}
+
+TEST(DocumentDiffTest, LargeDelta) {
+ ASSERT_FALSE(
+ doc_diff::computeDiff(fromjson("{a: 1, b: 2, c: 3}"), fromjson("{A: 1, B: 2, C: 3}")));
+
+ // Inserting field at the beginning produces a large delta.
+ ASSERT_FALSE(doc_diff::computeDiff(fromjson("{b: 1, c: 1, d: 1}"),
+ fromjson("{a: 1, b: 1, c: 1, d: 1}")));
+
+ // Empty objects.
+ ASSERT_FALSE(doc_diff::computeDiff(BSONObj(), BSONObj()));
+
+ // Full diff.
+ ASSERT_FALSE(doc_diff::computeDiff(fromjson("{b: 1}"), BSONObj()));
+ ASSERT_FALSE(doc_diff::computeDiff(BSONObj(), fromjson("{b: 1}")));
+}
+
+TEST(DocumentDiffTest, DeleteAndInsertFieldAtTheEnd) {
+ auto diff = doc_diff::computeDiff(fromjson("{a: 1, b: 2, c: 3, d: 4}"),
+ fromjson("{b: 2, c: 3, d: 4, a: 1}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{i: {a: 1}}"));
+}
+
+TEST(DocumentDiffTest, DeletesRecordedInAscendingOrderOfFieldNames) {
+ auto diff = doc_diff::computeDiff(fromjson("{b: 1, a: 2, c: 3, d: 4, e: 'value'}"),
+ fromjson("{c: 3, d: 4, e: 'value'}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{d: {a: false, b: false}}"));
+
+ // Delete at the end still follow the order.
+ diff = doc_diff::computeDiff(
+ fromjson("{b: 1, a: 2, c: 'value', d: 'value', e: 'value', g: 1, f: 1}"),
+ fromjson("{c: 'value', d: 'value', e: 'value'}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{d: {g: false, f: false, a: false, b: false}}"));
+}
+
+TEST(DocumentDiffTest, DataTypeChangeRecorded) {
+ const auto objWithDoubleValue =
+ fromjson("{a: 1, b: 2, c: {subField1: 1, subField2: 3.0}, d: 4}");
+ const auto objWithIntValue = fromjson("{a: 1, b: 2, c: {subField1: 1, subField2: 3}, d: 4}");
+ const auto objWithLongValue =
+ fromjson("{a: 1, b: 2, c: {subField1: 1, subField2: NumberLong(3)}, d: 4}");
+ auto diff = doc_diff::computeDiff(objWithDoubleValue, objWithIntValue);
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {c: {u: {subField2: 3}} }}"));
+
+ diff = doc_diff::computeDiff(objWithIntValue, objWithLongValue);
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {c: {u: {subField2: NumberLong(3)}} }}"));
+
+ diff = doc_diff::computeDiff(objWithLongValue, objWithDoubleValue);
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {c: {u: {subField2: 3.0}} }}"));
+}
+
+TEST(DocumentDiffTest, NullAndMissing) {
+ auto diff = doc_diff::computeDiff(fromjson("{a: null}"), fromjson("{}"));
+ ASSERT_FALSE(diff);
+
+ diff = doc_diff::computeDiff(fromjson("{a: null, b: undefined, c: null, d: 'someVal'}"),
+ fromjson("{a: null, b: undefined, c: undefined, d: 'someVal'}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{u: {c: undefined}}"));
+}
+
+TEST(DocumentDiffTest, FieldOrder) {
+ auto diff = doc_diff::computeDiff(fromjson("{a: 1, b: 2, c: {p: 1, q: 1, s: 1, r: 2}, d: 4}"),
+ fromjson("{a: 1, b: 2, c: {p: 1, q: 1, r: 2, s: 1}, d: 4}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {c: {i: {s: 1}} }}"));
+}
+
+TEST(DocumentDiffTest, SimpleArrayPush) {
+ auto diff = doc_diff::computeDiff(fromjson("{field1: 'abcd', field2: [1, 2, 3]}"),
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, 4]}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {field2: {a: true, '3': {u: 4}}}}"));
+}
+
+TEST(DocumentDiffTest, NestedArray) {
+ auto diff = doc_diff::computeDiff(fromjson("{field1: 'abcd', field2: [1, 2, 3, [[2]]]}"),
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, [[4]]]}"));
+ ASSERT(diff);
+ // When the sub-array delta is larger than the size of the sub-array, we record it as an update
+ // operation.
+ assertBinaryEq(*diff, fromjson("{s: {field2: {a: true, '3': {u: [[4]]}}}}"));
+
+ diff = doc_diff::computeDiff(
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, [1, 'longString', [2], 4, 5, 6], 5, 5, 5]}"),
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, [1, 'longString', [4], 4], 5, 6]}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff,
+ fromjson("{s: {field2: {a: true, l: 6, '3': {s: {a: true, l: 4, "
+ "'2': {u: [4]}}}, '5': {u: 6}} }}"));
+}
+
+TEST(DocumentDiffTest, SubObjInSubArrayUpdateElements) {
+ auto diff = doc_diff::computeDiff(
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, "
+ "{field3: ['veryLargeStringValueveryLargeStringValue', 2, 3, 4]}]}"),
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, {'field3': "
+ "['veryLargeStringValueveryLargeStringValue', 2, 4, 3, "
+ "5]}]}"));
+
+ ASSERT(diff);
+ assertBinaryEq(*diff,
+ fromjson("{s: {field2: {a: true, '3': {s: {s: {field3: {a: true, '2': "
+ "{u: 4}, '3': {u: 3}, '4': {u: 5}} }} }} }}"));
+}
+
+TEST(DocumentDiffTest, SubObjInSubArrayDeleteElements) {
+ auto diff = doc_diff::computeDiff(
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, {'field3': ['largeString', 2, 3, 4, 5]}]}"),
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, {'field3': ['largeString', 2, 3, 5]}]}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff,
+ fromjson("{s: {field2: {a: true, '3': {s: {s: {field3: {a: true, l: 4, '3': "
+ "{u: 5}} }} }} }}"));
+}
+
+TEST(DocumentDiffTest, NestedSubObjs) {
+ auto diff = doc_diff::computeDiff(
+ fromjson("{level1Field1: 'abcd', level1Field2: {level2Field1: {level3Field1: {p: 1}, "
+ "level3Field2: {q: 1}}, level2Field2: 2}, level1Field3: 3}"),
+ fromjson("{level1Field1: 'abcd', level1Field2: {level2Field1: {level3Field1: {q: 1}, "
+ "level3Field2: {q: 1}}, level2Field2: 2}, level1Field3: '3'}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff,
+ fromjson("{u: {level1Field3: '3'}, s: {level1Field2: {s: {'level2Field1': "
+ "{u: {level3Field1: {q: 1}}} }} }}"));
+}
+
+TEST(DocumentDiffTest, SubArrayInSubArrayLargeDelta) {
+ auto diff = doc_diff::computeDiff(
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, {field3: [2, 3, 4, 5]}]}"),
+ fromjson("{field1: 'abcd', field2: [1, 2, 3, {field3: [1, 2, 3, 4, 5]}]}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff,
+ fromjson("{s: {field2: {a: true, '3': {u: {field3: [1, 2, 3, 4, 5]}}} }}"));
+}
+
+TEST(DocumentDiffTest, SubObjInSubArrayLargeDelta) {
+ auto diff =
+ doc_diff::computeDiff(fromjson("{field1: [1, 2, 3, 4, 5, 6, {a: 1, b: 2, c: 3, d: 4}, 7]}"),
+ fromjson("{field1: [1, 2, 3, 4, 5, 6, {p: 1, q: 2}, 7]}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {field1: {a: true, '6': {u: {p: 1, q: 2} }} }}"));
+}
+
+TEST(DocumentDiffTest, SubObjInSubObjLargeDelta) {
+ auto diff = doc_diff::computeDiff(
+ fromjson("{field: {p: 1, q: 2, r: {a: 1, b: 2, c: 3, 'd': 4}, s: 3}}"),
+ fromjson("{field: {p: 1, q: 2, r: {p: 1, q: 2}, s: 3}}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {field: {u: {r: {p: 1, q: 2} }} }}"));
+}
+
+TEST(DocumentDiffTest, SubArrayInSubObjLargeDelta) {
+ auto diff = doc_diff::computeDiff(fromjson("{field: {p: 1, q: 2, r: [1, 3, 4, 5], s: 3}}"),
+ fromjson("{field: {p: 1, q: 2, r: [1, 2, 3, 4], s: 3}}"));
+ ASSERT(diff);
+ assertBinaryEq(*diff, fromjson("{s: {field: {u: {r: [1, 2, 3, 4]}} }}"));
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/update/document_diff_serialization.cpp b/src/mongo/db/update/document_diff_serialization.cpp
index 0ef9758dd24..1bec8a0313e 100644
--- a/src/mongo/db/update/document_diff_serialization.cpp
+++ b/src/mongo/db/update/document_diff_serialization.cpp
@@ -33,7 +33,6 @@
#include "mongo/util/str.h"
#include "mongo/util/string_map.h"
-#include "mongo/util/visit_helper.h"
namespace mongo::doc_diff {
namespace {
diff --git a/src/mongo/db/update/document_diff_serialization.h b/src/mongo/db/update/document_diff_serialization.h
index bbed014d72a..03acb763aea 100644
--- a/src/mongo/db/update/document_diff_serialization.h
+++ b/src/mongo/db/update/document_diff_serialization.h
@@ -31,6 +31,7 @@
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/stdx/variant.h"
+#include "mongo/util/visit_helper.h"
// This file contains classes for serializing document diffs to a format that can be stored in the
// oplog. Any code/machinery which manipulates document diffs should do so through these classes.
@@ -127,6 +128,34 @@ public:
_childSubDiffIndex = {};
}
+ size_t computeApproxSize() const {
+ // TODO SERVER-48602: We need to ensure that this function returns in O(1). Incrementally
+ // computing this size might be one option.
+ size_t size = sizeof(int) /* Size of object */ + 1 /* Type byte */ + kArrayHeader.size() +
+ 1 /* null terminator */ + 1 /*type byte */ + 1 /* bool size*/;
+
+ if (_newSize) {
+ size += kResizeSectionFieldName.size() + 1 /* Null terminator */ + 1 /* Type byte */ +
+ sizeof(int) /* size of value */;
+ }
+
+ for (auto&& [idx, modification] : _modifications) {
+ stdx::visit(visit_helper::Overloaded{[idx = idx, &size](const Diff& subDiff) {
+ size += sizeof(idx) +
+ kSubDiffSectionFieldName.size() +
+ subDiff.objsize() + 2;
+ },
+ [idx = idx, &size](BSONElement elt) {
+ size += sizeof(idx) +
+ kUpdateSectionFieldName.size() +
+ elt.size() + 2;
+ }},
+ modification);
+ }
+
+ return size;
+ }
+
private:
// The top-level of a diff is never an array diff. One can only construct an ArrayDiffBuilder
// from a parent DocumentDiffBuilder.
@@ -149,7 +178,7 @@ private:
DiffBuilderBase* _parent = nullptr;
friend class DocumentDiffBuilder;
-};
+}; // namespace doc_diff
/**
* Class for building document diffs.
@@ -227,6 +256,33 @@ public:
_childSubDiffField = {};
}
+ size_t computeApproxSize() {
+ // TODO SERVER-48602: We need to ensure that this function returns in O(1). Incrementally
+ // computing this size might be one option.
+ size_t size = sizeof(int) /* Size of object */ + 1 /* Type byte */;
+
+ if (!_updates.asTempObj().isEmpty()) {
+ size += 1 /* Type byte */ + kUpdateSectionFieldName.size() /* FieldName */ +
+ 1 /* Null terminator */ + _updates.bb().len() + 1 /* Null terminator */;
+ }
+
+ if (!_inserts.asTempObj().isEmpty()) {
+ size += 1 /* Type byte */ + kInsertSectionFieldName.size() /* FieldName */ +
+ 1 /* Null terminator */ + _inserts.bb().len() + 1 /* Null terminator */;
+ }
+
+ if (!_deletes.asTempObj().isEmpty()) {
+ size += 1 /* Type byte */ + kDeleteSectionFieldName.size() /* FieldName */ +
+ 1 /* Null terminator */ + _deletes.bb().len() + 1 /* Null terminator */;
+ }
+
+ if (!_subDiffs.asTempObj().isEmpty()) {
+ size += 1 /* Type byte */ + kSubDiffSectionFieldName.size() /* FieldName */ +
+ 1 /* Null terminator */ + _subDiffs.bb().len() + 1 /* Null terminator */;
+ }
+ return size;
+ }
+
private:
DocumentDiffBuilder(DiffBuilderBase* parent) : _parent(parent) {}
diff --git a/src/mongo/db/update/document_diff_serialization_test.cpp b/src/mongo/db/update/document_diff_serialization_test.cpp
index 2754cb5ddb7..8c830e2343c 100644
--- a/src/mongo/db/update/document_diff_serialization_test.cpp
+++ b/src/mongo/db/update/document_diff_serialization_test.cpp
@@ -343,5 +343,48 @@ TEST(DiffSerializationTest, SubArrayDiffAbandon) {
fromjson("{d : {dField1: false, dField2: false}, u: {uField: 1},"
"s: {arr2: {a: true, l: 5}}}"));
}
+
+TEST(DiffSerializationTest, ValidateComputeApproxSize) {
+ DocumentDiffBuilder builder;
+ builder.addDelete("deleteField");
+ builder.addInsert("insert", BSON("" << 4).firstElement());
+ builder.addUpdate("update1",
+ BSON(""
+ << "update")
+ .firstElement());
+ builder.addDelete("");
+ {
+ // Ensure size of the sub-array diff is included.
+ auto subDiff = builder.startSubArrDiff("subArray");
+ subDiff.setResize(5);
+ subDiff.addUpdate(2, BSON("" << 4).firstElement());
+ subDiff.addUpdate(2,
+ BSON(""
+ << "value")
+ .firstElement());
+
+ auto subSubDiff = subDiff.startSubObjDiff(22);
+ subSubDiff.addInsert("insert2",
+ BSON(""
+ << "")
+ .firstElement());
+ subSubDiff.addUpdate("update3", BSON("" << BSONNULL).firstElement());
+ }
+ {
+ // Ensure size of the sub-object diff is included.
+ auto subDiff = builder.startSubObjDiff("subObj");
+ subDiff.addUpdate("setArray",
+ BSON("" << BSON_ARRAY("val1"
+ << "val2" << 3))
+ .firstElement());
+ }
+ // Update with a sub-object.
+ builder.addUpdate("update4", BSON("" << BSON("nestedObj" << 4LL)).firstElement());
+
+ auto computedSize = builder.computeApproxSize();
+ auto out = builder.release();
+ ASSERT_EQ(computedSize, out.objsize());
+}
+
} // namespace
} // namespace mongo::doc_diff