summaryrefslogtreecommitdiff
path: root/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
diff options
context:
space:
mode:
authorRomans Kasperovics <romans.kasperovics@mongodb.com>2023-04-06 06:55:11 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-06 08:04:22 +0000
commit41ece0ab8e660dd2993141334ce10a189516bdb4 (patch)
treeba851e945ab154f2c2cc124eeead5d3f4d7be598 /src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
parente8b6fe12d6a962e537ef2503a22789936f507a3f (diff)
downloadmongo-41ece0ab8e660dd2993141334ce10a189516bdb4.tar.gz
SERVER-74301 Optimize serialization behavior for $changeStreamSplitLargeEvent
Diffstat (limited to 'src/mongo/db/pipeline/change_stream_split_event_helpers.cpp')
-rw-r--r--src/mongo/db/pipeline/change_stream_split_event_helpers.cpp38
1 files changed, 14 insertions, 24 deletions
diff --git a/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp b/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
index a0cb8377d5b..19fba5afea5 100644
--- a/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
+++ b/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
@@ -35,39 +35,29 @@
namespace mongo {
namespace change_stream_split_event {
-std::pair<Document, size_t> processChangeEventBeforeSplit(Document event, bool withMetadata) {
- BSONObj eventBson;
- Document eventDocToReturn;
- // If this stream needs merging, then we will need to serialize the metadata as well.
+std::pair<Document, size_t> processChangeEventBeforeSplit(const Document& event,
+ bool withMetadata) {
if (withMetadata) {
- // TODO SERVER-74301: Use 'event.toBsonWithMetadata<BSONObj::LargeSizeTrait>()' here.
- eventBson = event.toBson<BSONObj::LargeSizeTrait>();
- eventDocToReturn = event;
+ auto eventBson = event.toBsonWithMetaData<BSONObj::LargeSizeTrait>();
+ return {Document::fromBsonWithMetaData(eventBson), eventBson.objsize()};
} else {
// Serialize just the user data, and add the metadata fields separately.
- eventBson = event.toBson<BSONObj::LargeSizeTrait>();
+ auto eventBson = event.toBson<BSONObj::LargeSizeTrait>();
MutableDocument mutDoc(Document{eventBson});
mutDoc.copyMetaDataFrom(event);
- eventDocToReturn = mutDoc.freeze();
+ return {mutDoc.freeze(), eventBson.objsize()};
}
- // Count the size of the _id field again since the output cursor will have a PBRT.
- size_t eventBsonSize = eventBson.objsize() + eventBson["_id"].size();
- return {eventDocToReturn, eventBsonSize};
-}
-
-size_t getBsonSizeWithMetaData(const Document& doc) {
- // TODO SERVER-74301: Make sure each event is serialized only once in a pipeline.
- return static_cast<size_t>(doc.toBsonWithMetaData().objsize());
-}
-
-size_t getFieldBsonSize(const Document& doc, const StringData& key) {
- // TODO SERVER-74301: Make sure each event is serialized only once in a pipeline.
- return static_cast<size_t>(doc.toBson<BSONObj::LargeSizeTrait>().getField(key).size());
}
std::queue<Document> splitChangeEvent(const Document& event,
size_t maxFragmentBsonSize,
size_t skipFirstFragments) {
+ // Extract the underlying BSON. We expect the event to be trivially convertible either with
+ // or without metadata, so we attempt to optimize the serialization here.
+ auto eventBson =
+ (event.isTriviallyConvertible() ? event.toBson<BSONObj::LargeSizeTrait>()
+ : event.toBsonWithMetaData<BSONObj::LargeSizeTrait>());
+
// Construct a sorted map of fields ordered by size and key for a deterministic greedy strategy
// to minimize the total number of fragments (the first fragment contains as many fields as
// possible). Don't include the original '_id' field, since each fragment will have its own.
@@ -75,7 +65,7 @@ std::queue<Document> splitChangeEvent(const Document& event,
for (auto it = event.fieldIterator(); it.more();) {
auto&& [key, value] = it.next();
if (key != kIdField) {
- sortedFieldMap.emplace(std::make_pair(getFieldBsonSize(event, key), key), value);
+ sortedFieldMap.emplace(std::make_pair(eventBson[key].size(), key), value);
}
}
@@ -102,7 +92,7 @@ std::queue<Document> splitChangeEvent(const Document& event,
Value(Document{{kFragmentNumberField, static_cast<int>(fragments.size())},
{kTotalFragmentsField, 0}}));
- auto fragmentBsonSize = getBsonSizeWithMetaData(fragment.peek());
+ auto fragmentBsonSize = static_cast<size_t>(fragment.peek().toBsonWithMetaData().objsize());
// Fill the fragment with as many fields as we can until we run out or exceed max size.
// Always make sure we add at least one new field on each iteration.