summaryrefslogtreecommitdiff
path: root/src/mongo
diff options
context:
space:
mode:
authorRomans Kasperovics <romans.kasperovics@mongodb.com>2023-04-06 06:55:11 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-06 08:04:22 +0000
commit41ece0ab8e660dd2993141334ce10a189516bdb4 (patch)
treeba851e945ab154f2c2cc124eeead5d3f4d7be598 /src/mongo
parente8b6fe12d6a962e537ef2503a22789936f507a3f (diff)
downloadmongo-41ece0ab8e660dd2993141334ce10a189516bdb4.tar.gz
SERVER-74301 Optimize serialization behavior for $changeStreamSplitLargeEvent
Diffstat (limited to 'src/mongo')
-rw-r--r--src/mongo/db/exec/document_value/document.cpp45
-rw-r--r--src/mongo/db/exec/document_value/document.h45
-rw-r--r--src/mongo/db/exec/document_value/document_internal.h33
-rw-r--r--src/mongo/db/exec/document_value/document_metadata_fields.cpp2
-rw-r--r--src/mongo/db/exec/document_value/document_metadata_fields.h100
-rw-r--r--src/mongo/db/exec/document_value/document_metadata_fields_test.cpp136
-rw-r--r--src/mongo/db/exec/document_value/document_value_test.cpp102
-rw-r--r--src/mongo/db/pipeline/change_stream_split_event_helpers.cpp38
-rw-r--r--src/mongo/db/pipeline/change_stream_split_event_helpers.h12
-rw-r--r--src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp6
-rw-r--r--src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp14
-rw-r--r--src/mongo/db/query/sbe_stage_builder_lookup_test.cpp2
12 files changed, 379 insertions, 156 deletions
diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp
index 8715d55e89e..b71b109aa65 100644
--- a/src/mongo/db/exec/document_value/document.cpp
+++ b/src/mongo/db/exec/document_value/document.cpp
@@ -131,7 +131,7 @@ bool DocumentStorageIterator::shouldSkipDeleted() {
// If we strip the metadata see if a field name matches the known list. All metadata fields
// start with '$' so optimize for a quick bailout.
- if (_storage->stripMetadata() && fieldName[0] == '$' &&
+ if (_storage->bsonHasMetadata() && fieldName[0] == '$' &&
Document::allMetadataFieldNames.contains(fieldName)) {
return true;
}
@@ -344,8 +344,8 @@ void DocumentStorage::reserveFields(size_t expectedFields) {
}
intrusive_ptr<DocumentStorage> DocumentStorage::clone() const {
- auto out =
- make_intrusive<DocumentStorage>(_bson, _stripMetadata, _modified, _numBytesFromBSONInCache);
+ auto out = make_intrusive<DocumentStorage>(
+ _bson, _bsonHasMetadata, _modified, _numBytesFromBSONInCache);
if (_cache) {
// Make a copy of the buffer with the fields.
@@ -391,10 +391,10 @@ DocumentStorage::~DocumentStorage() {
}
}
-void DocumentStorage::reset(const BSONObj& bson, bool stripMetadata) {
+void DocumentStorage::reset(const BSONObj& bson, bool bsonHasMetadata) {
_bson = bson;
_numBytesFromBSONInCache = 0;
- _stripMetadata = stripMetadata;
+ _bsonHasMetadata = bsonHasMetadata;
_modified = false;
_snapshottedSize = 0;
@@ -423,6 +423,8 @@ void DocumentStorage::loadLazyMetadata() const {
return;
}
+ bool oldModified = _metadataFields.isModified();
+
BSONObjIterator it(_bson);
while (it.more()) {
BSONElement elem(it.next());
@@ -469,6 +471,7 @@ void DocumentStorage::loadLazyMetadata() const {
}
}
+ _metadataFields.setModified(oldModified);
_haveLazyLoadedMetadata = true;
}
@@ -535,37 +538,35 @@ constexpr StringData Document::metaFieldSearchHighlights;
constexpr StringData Document::metaFieldSearchScoreDetails;
constexpr StringData Document::metaFieldSearchSortValues;
-BSONObj Document::toBsonWithMetaData() const {
- BSONObjBuilder bb;
- toBson(&bb);
+void Document::toBsonWithMetaData(BSONObjBuilder* builder) const {
+ toBson(builder);
if (!metadata()) {
- return bb.obj();
+ return;
}
if (metadata().hasTextScore())
- bb.append(metaFieldTextScore, metadata().getTextScore());
+ builder->append(metaFieldTextScore, metadata().getTextScore());
if (metadata().hasRandVal())
- bb.append(metaFieldRandVal, metadata().getRandVal());
+ builder->append(metaFieldRandVal, metadata().getRandVal());
if (metadata().hasSortKey())
- bb.append(metaFieldSortKey,
- DocumentMetadataFields::serializeSortKey(metadata().isSingleElementKey(),
- metadata().getSortKey()));
+ builder->append(metaFieldSortKey,
+ DocumentMetadataFields::serializeSortKey(metadata().isSingleElementKey(),
+ metadata().getSortKey()));
if (metadata().hasGeoNearDistance())
- bb.append(metaFieldGeoNearDistance, metadata().getGeoNearDistance());
+ builder->append(metaFieldGeoNearDistance, metadata().getGeoNearDistance());
if (metadata().hasGeoNearPoint())
- metadata().getGeoNearPoint().addToBsonObj(&bb, metaFieldGeoNearPoint);
+ metadata().getGeoNearPoint().addToBsonObj(builder, metaFieldGeoNearPoint);
if (metadata().hasSearchScore())
- bb.append(metaFieldSearchScore, metadata().getSearchScore());
+ builder->append(metaFieldSearchScore, metadata().getSearchScore());
if (metadata().hasSearchHighlights())
- metadata().getSearchHighlights().addToBsonObj(&bb, metaFieldSearchHighlights);
+ metadata().getSearchHighlights().addToBsonObj(builder, metaFieldSearchHighlights);
if (metadata().hasIndexKey())
- bb.append(metaFieldIndexKey, metadata().getIndexKey());
+ builder->append(metaFieldIndexKey, metadata().getIndexKey());
if (metadata().hasSearchScoreDetails())
- bb.append(metaFieldSearchScoreDetails, metadata().getSearchScoreDetails());
+ builder->append(metaFieldSearchScoreDetails, metadata().getSearchScoreDetails());
if (metadata().hasSearchSortValues()) {
- bb.append(metaFieldSearchSortValues, metadata().getSearchSortValues());
+ builder->append(metaFieldSearchSortValues, metadata().getSearchSortValues());
}
- return bb.obj();
}
Document Document::fromBsonWithMetaData(const BSONObj& bson) {
diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h
index 7499bee8d79..83db6052aeb 100644
--- a/src/mongo/db/exec/document_value/document.h
+++ b/src/mongo/db/exec/document_value/document.h
@@ -263,7 +263,15 @@ public:
* storage is already in BSON format and there are no damages.
*/
bool isTriviallyConvertible() const {
- return !storage().isModified() && !storage().stripMetadata();
+ return !storage().isModified() && !storage().bsonHasMetadata();
+ }
+
+ /**
+ * Returns true, if this document is trivially convertible to BSON with metadata, meaning the
+ * underlying storage is already in BSON format and there are no damages.
+ */
+ bool isTriviallyConvertibleWithMetadata() const {
+ return !storage().isModified() && !storage().isMetadataModified();
}
/**
@@ -296,7 +304,18 @@ public:
/**
* Like the 'toBson()' method, but includes metadata as top-level fields.
*/
- BSONObj toBsonWithMetaData() const;
+ void toBsonWithMetaData(BSONObjBuilder* builder) const;
+
+ template <typename BSONTraits = BSONObj::DefaultSizeTrait>
+ BSONObj toBsonWithMetaData() const {
+ if (isTriviallyConvertibleWithMetadata()) {
+ return storage().bsonObj();
+ }
+
+ BSONObjBuilder bb;
+ toBsonWithMetaData(&bb);
+ return bb.obj<BSONTraits>();
+ }
/**
* Like Document(BSONObj) but treats top-level fields with special names as metadata.
@@ -536,13 +555,11 @@ public:
}
/**
- * Replace the current base Document with bson.
- *
- * The paramater 'stripMetadata' controls whether we strip the metadata fields from the
- * underlying bson when converting the document object back to bson.
+ * Replace the current base Document with the BSON object. Setting 'bsonHasMetadata' to true
+ * signals that the BSON object contains metadata fields.
*/
- void reset(const BSONObj& bson, bool stripMetadata) {
- storage().reset(bson, stripMetadata);
+ void reset(const BSONObj& bson, bool bsonHasMetadata) {
+ storage().reset(bson, bsonHasMetadata);
}
/** Add the given field to the Document.
@@ -725,13 +742,13 @@ public:
storage().makeOwned();
}
- /** Create a new document storage with the BSON object.
- *
- * The optional paramater 'stripMetadata' controls whether we strip the metadata fields (the
- * complete list is in Document::allMetadataFieldNames).
+ /**
+ * Creates a new document storage with the BSON object. Setting 'bsonHasMetadata' to true
+ * signals that the BSON object contains metadata fields (the complete list is in
+ * Document::allMetadataFieldNames).
*/
- DocumentStorage& newStorageWithBson(const BSONObj& bson, bool stripMetadata) {
- reset(make_intrusive<DocumentStorage>(bson, stripMetadata, false, 0));
+ DocumentStorage& newStorageWithBson(const BSONObj& bson, bool bsonHasMetadata) {
+ reset(make_intrusive<DocumentStorage>(bson, bsonHasMetadata, false, 0));
return const_cast<DocumentStorage&>(*storagePtr());
}
diff --git a/src/mongo/db/exec/document_value/document_internal.h b/src/mongo/db/exec/document_value/document_internal.h
index 2507cb0417b..b380247baf6 100644
--- a/src/mongo/db/exec/document_value/document_internal.h
+++ b/src/mongo/db/exec/document_value/document_internal.h
@@ -359,11 +359,11 @@ public:
/**
* Construct a storage from the BSON. The BSON is lazily processed as fields are requested from
- * the document. If we know that the BSON does not contain any metadata fields we can set the
- * 'stripMetadata' flag to false that will speed up the field iteration.
+ * the document. If we know that the BSON contains metadata fields we can set the
+ * 'bsonHasMetadata' flag to true.
*/
DocumentStorage(const BSONObj& bson,
- bool stripMetadata,
+ bool bsonHasMetadata,
bool modified,
uint32_t numBytesFromBSONInCache)
: _cache(nullptr),
@@ -373,12 +373,12 @@ public:
_hashTabMask(0),
_bson(bson),
_numBytesFromBSONInCache(numBytesFromBSONInCache),
- _stripMetadata(stripMetadata),
+ _bsonHasMetadata(bsonHasMetadata),
_modified(modified) {}
~DocumentStorage();
- void reset(const BSONObj& bson, bool stripMetadata);
+ void reset(const BSONObj& bson, bool bsonHasMetadata);
/**
* Populates the cache by recursively walking the underlying BSON.
@@ -551,7 +551,7 @@ public:
* WorkingSetMember.
*/
const DocumentMetadataFields& metadata() const {
- if (_stripMetadata) {
+ if (_bsonHasMetadata) {
loadLazyMetadata();
}
return _metadataFields;
@@ -589,8 +589,8 @@ public:
return _firstElement ? _firstElement->plusBytes(_usedBytes) : nullptr;
}
- auto stripMetadata() const {
- return _stripMetadata;
+ auto bsonHasMetadata() const {
+ return _bsonHasMetadata;
}
Position constructInCache(const BSONElement& elem);
@@ -599,6 +599,10 @@ public:
return _modified;
}
+ auto isMetadataModified() const {
+ return _metadataFields.isModified();
+ }
+
auto bsonObj() const {
return _bson;
}
@@ -708,17 +712,16 @@ private:
// whole backing BSON, but only the portion of backing BSON that's not already in the cache.
uint32_t _numBytesFromBSONInCache = 0;
- // If '_stripMetadata' is true, tracks whether or not the metadata has been lazy-loaded from the
- // backing '_bson' object. If so, then no attempt will be made to load the metadata again, even
- // if the metadata has been released by a call to 'releaseMetadata()'.
+ // Tracks whether or not the metadata has been lazy-loaded from the backing '_bson' object. If
+ // so, then no attempt will be made to load the metadata again, even if the metadata has been
+ // released by a call to 'releaseMetadata()'.
mutable bool _haveLazyLoadedMetadata = false;
mutable DocumentMetadataFields _metadataFields;
- // The storage constructed from a BSON value may contain metadata. When we process the BSON we
- // have to move the metadata to the MetadataFields object. If we know that the BSON does not
- // have any metadata we can set _stripMetadata to false that will speed up the iteration.
- bool _stripMetadata{false};
+ // True if this storage was constructed from BSON with metadata. Serializing this object using
+ // the 'toBson()' method will omit (strip) the metadata fields.
+ bool _bsonHasMetadata{false};
// This flag is set to true anytime the storage returns a mutable field. It is used to optimize
// a conversion to BSON; i.e. if there are not any modifications we can directly return _bson.
diff --git a/src/mongo/db/exec/document_value/document_metadata_fields.cpp b/src/mongo/db/exec/document_value/document_metadata_fields.cpp
index 511c12027f9..d33eee6c140 100644
--- a/src/mongo/db/exec/document_value/document_metadata_fields.cpp
+++ b/src/mongo/db/exec/document_value/document_metadata_fields.cpp
@@ -46,6 +46,7 @@ DocumentMetadataFields::DocumentMetadataFields(const DocumentMetadataFields& oth
DocumentMetadataFields& DocumentMetadataFields::operator=(const DocumentMetadataFields& other) {
_holder = other._holder ? std::make_unique<MetadataHolder>(*other._holder) : nullptr;
+ _modified = true;
return *this;
}
@@ -54,6 +55,7 @@ DocumentMetadataFields::DocumentMetadataFields(DocumentMetadataFields&& other)
DocumentMetadataFields& DocumentMetadataFields::operator=(DocumentMetadataFields&& other) {
_holder = std::move(other._holder);
+ _modified = true;
return *this;
}
diff --git a/src/mongo/db/exec/document_value/document_metadata_fields.h b/src/mongo/db/exec/document_value/document_metadata_fields.h
index 6f758a52d1e..6ca3ad3198a 100644
--- a/src/mongo/db/exec/document_value/document_metadata_fields.h
+++ b/src/mongo/db/exec/document_value/document_metadata_fields.h
@@ -149,11 +149,7 @@ public:
}
void setTextScore(double score) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kTextScore);
+ _setCommon(MetaType::kTextScore);
_holder->textScore = score;
}
@@ -167,11 +163,7 @@ public:
}
void setRandVal(double val) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kRandVal);
+ _setCommon(MetaType::kRandVal);
_holder->randVal = val;
}
@@ -185,11 +177,7 @@ public:
}
void setSortKey(Value sortKey, bool isSingleElementKey) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kSortKey);
+ _setCommon(MetaType::kSortKey);
_holder->isSingleElementKey = isSingleElementKey;
_holder->sortKey = std::move(sortKey);
}
@@ -208,11 +196,7 @@ public:
}
void setGeoNearDistance(double dist) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kGeoNearDist);
+ _setCommon(MetaType::kGeoNearDist);
_holder->geoNearDistance = dist;
}
@@ -226,11 +210,7 @@ public:
}
void setGeoNearPoint(Value point) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kGeoNearPoint);
+ _setCommon(MetaType::kGeoNearPoint);
_holder->geoNearPoint = std::move(point);
}
@@ -244,11 +224,7 @@ public:
}
void setSearchScore(double score) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kSearchScore);
+ _setCommon(MetaType::kSearchScore);
_holder->searchScore = score;
}
@@ -262,11 +238,7 @@ public:
}
void setSearchHighlights(Value highlights) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kSearchHighlights);
+ _setCommon(MetaType::kSearchHighlights);
_holder->searchHighlights = highlights;
}
@@ -280,11 +252,7 @@ public:
}
void setIndexKey(BSONObj indexKey) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kIndexKey);
+ _setCommon(MetaType::kIndexKey);
_holder->indexKey = indexKey.getOwned();
}
@@ -298,11 +266,7 @@ public:
}
void setRecordId(RecordId rid) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
-
- _holder->metaFields.set(MetaType::kRecordId);
+ _setCommon(MetaType::kRecordId);
_holder->recordId = std::move(rid);
}
@@ -316,10 +280,7 @@ public:
}
void setSearchScoreDetails(BSONObj details) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
- _holder->metaFields.set(MetaType::kSearchScoreDetails);
+ _setCommon(MetaType::kSearchScoreDetails);
_holder->searchScoreDetails = details.getOwned();
}
@@ -335,10 +296,7 @@ public:
}
void setTimeseriesBucketMinTime(Date_t time) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
- _holder->metaFields.set(MetaType::kTimeseriesBucketMinTime);
+ _setCommon(MetaType::kTimeseriesBucketMinTime);
_holder->timeseriesBucketMinTime = time;
}
@@ -354,10 +312,7 @@ public:
}
void setTimeseriesBucketMaxTime(Date_t time) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
- _holder->metaFields.set(MetaType::kTimeseriesBucketMaxTime);
+ _setCommon(MetaType::kTimeseriesBucketMaxTime);
_holder->timeseriesBucketMaxTime = time;
}
@@ -371,16 +326,34 @@ public:
}
void setSearchSortValues(BSONObj vals) {
- if (!_holder) {
- _holder = std::make_unique<MetadataHolder>();
- }
- _holder->metaFields.set(MetaType::kSearchSortValues);
+ _setCommon(MetaType::kSearchSortValues);
_holder->searchSortValues = vals.getOwned();
}
void serializeForSorter(BufBuilder& buf) const;
+ bool isModified() const {
+ return _modified;
+ }
+
+ /**
+ * Sets the 'modified' flag to the given value. Necessary for implementing a lazy load
+ * optimization for the contained mutable 'DocumentMetadataFields' instance inside the
+ * 'Document' class.
+ */
+ void setModified(bool newValue) {
+ _modified = newValue;
+ }
+
private:
+ inline void _setCommon(MetaType mt) {
+ if (!_holder) {
+ _holder = std::make_unique<MetadataHolder>();
+ }
+ _holder->metaFields.set(mt);
+ _modified = true;
+ }
+
// A simple data struct housing all possible metadata fields.
struct MetadataHolder {
std::bitset<MetaType::kNumFields> metaFields;
@@ -407,6 +380,11 @@ private:
// Null until the first setter is called, at which point a MetadataHolder struct is allocated.
std::unique_ptr<MetadataHolder> _holder;
+
+ // This flag is set to true anytime a 'DocumentMetadataFields' instance is modified. It is used
+ // to optimize document conversion to BSON with metadata; i.e. if there are no modifications we
+ // can directly return the underlying BSON.
+ bool _modified{false};
};
using QueryMetadataBitSet = std::bitset<DocumentMetadataFields::MetaType::kNumFields>;
diff --git a/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp b/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp
index 37231faf159..4a76154f1f6 100644
--- a/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp
+++ b/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp
@@ -296,6 +296,142 @@ TEST(DocumentMetadataFieldsTest, GetTimeseriesBucketMaxTimeExists) {
ASSERT_EQ(source.getTimeseriesBucketMaxTime(), time);
}
+TEST(DocumentMetadataFieldsTest, MetadataIsMarkedModifiedOnSetMetadataField) {
+ // Test setting metadata fields directly.
+
+ auto testFieldSetter = [](std::function<void(DocumentMetadataFields&)> invokeSetter) {
+ DocumentMetadataFields metadata;
+ ASSERT_FALSE(metadata.isModified());
+ invokeSetter(metadata);
+ ASSERT_TRUE(metadata.isModified());
+ };
+
+ testFieldSetter([](DocumentMetadataFields& md) { md.setTextScore(10.0); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setRandVal(20.0); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setSortKey(Value(30), true); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setGeoNearDistance(40.0); });
+ testFieldSetter(
+ [](DocumentMetadataFields& md) { md.setGeoNearPoint(Value{BSON_ARRAY(1 << 2)}); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setSearchScore(50.0); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setSearchHighlights(Value{"foo"_sd}); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setIndexKey(BSON("b" << 1)); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setRecordId(RecordId{6}); });
+ testFieldSetter([](DocumentMetadataFields& md) {
+ md.setSearchScoreDetails(BSON("scoreDetails"
+ << "foo"));
+ });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setTimeseriesBucketMinTime(Date_t()); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setTimeseriesBucketMaxTime(Date_t()); });
+ testFieldSetter([](DocumentMetadataFields& md) { md.setSearchSortValues(BSON("a" << 1)); });
+}
+
+TEST(DocumentMetadataFieldsTest, MetadataIsConstructedUnmodified) {
+ // We consider new instances as unmodified for all constructors (default, copy, move) even when
+ // the original metadata has the 'modified' flag set.
+
+ // Testing the default constructor.
+ DocumentMetadataFields metadata1;
+ ASSERT_FALSE(metadata1.isModified());
+ metadata1.setTextScore(10.0);
+ ASSERT_TRUE(metadata1.isModified());
+
+ // Testing the copy-constructor.
+ DocumentMetadataFields metadata2(metadata1);
+ ASSERT_FALSE(metadata2.isModified());
+
+ // Testing the move-constructor.
+ DocumentMetadataFields metadata3(std::move(metadata1));
+ ASSERT_FALSE(metadata3.isModified());
+}
+
+TEST(DocumentMetadataFieldsTest, CopyAssignmentIsModification) {
+ // We consider copy-assignment as modification even when the instances are equal and the copied
+ // metadata does not have the 'modified' flag set.
+
+ DocumentMetadataFields metadata1;
+ ASSERT_FALSE(metadata1.isModified());
+
+ DocumentMetadataFields metadata2;
+ ASSERT_FALSE(metadata2.isModified());
+
+ // Testing copy-assignment to empty object (metadata2 == metadata1).
+ metadata2 = metadata1;
+ ASSERT_TRUE(metadata2.isModified());
+
+ // Testing copy-assignment in a more common case (metadata1 is empty, metadata2 is not empty).
+ metadata2.setTextScore(10.0);
+ metadata2.setModified(false);
+ ASSERT_FALSE(metadata2.isModified());
+ metadata1 = metadata2;
+ ASSERT_TRUE(metadata1.isModified());
+ ASSERT_EQ(metadata2.getTextScore(), metadata1.getTextScore());
+}
+
+TEST(DocumentMetadataFieldsTest, MoveAssignmentIsModification) {
+ // We consider move-assignment as modification even when the instances are equal and the moved
+ // metadata does not have the 'modified' flag set.
+
+ DocumentMetadataFields metadata1;
+ ASSERT_FALSE(metadata1.isModified());
+
+ DocumentMetadataFields metadata2;
+ ASSERT_FALSE(metadata2.isModified());
+
+ // Testing move-assignment to empty object (metadata2 == metadata1).
+ metadata2 = std::move(metadata1);
+ ASSERT_TRUE(metadata2.isModified());
+
+ // Testing move-assignment in a more common case (metadata3 is empty, metadata2 is not empty).
+ metadata2.setTextScore(10.0);
+ metadata2.setModified(false);
+ ASSERT_FALSE(metadata2.isModified());
+
+ DocumentMetadataFields metadata3;
+ ASSERT_FALSE(metadata3.isModified());
+
+ metadata3 = std::move(metadata2);
+ ASSERT_TRUE(metadata3.isModified());
+ ASSERT_EQ(10.0, metadata3.getTextScore());
+}
+
+TEST(DocumentMetadataFieldsTest, MetadataIsMarkedModifiedOnCopyFrom) {
+ DocumentMetadataFields metadata1;
+ metadata1.setRandVal(20.0);
+ ASSERT_TRUE(metadata1.isModified());
+
+ // Testing 'setModified(false)'.
+ metadata1.setModified(false);
+ ASSERT_FALSE(metadata1.isModified());
+
+ // Calling 'copyFrom(metadata1)' modifies metadata2 even when metadata2 == metadata1 and
+ // metadata1 is not marked as modified.
+ DocumentMetadataFields metadata2(metadata1);
+ ASSERT_FALSE(metadata2.isModified());
+ metadata2.copyFrom(metadata1);
+ ASSERT_TRUE(metadata2.isModified());
+}
+
+TEST(DocumentMetadataFieldsTest, MetadataIsMarkedModifiedOnMergeWith) {
+ DocumentMetadataFields metadata1;
+ metadata1.setRandVal(20.0);
+ ASSERT_TRUE(metadata1.isModified());
+
+ metadata1.setModified(false);
+ ASSERT_FALSE(metadata1.isModified());
+
+ // Calling 'mergeWith(metadata1)' modifies metadata2 only when metadata1 has fields not set in
+ // metadata1.
+ DocumentMetadataFields metadata2(metadata1);
+ ASSERT_FALSE(metadata2.isModified());
+ metadata2.mergeWith(metadata1);
+ ASSERT_FALSE(metadata2.isModified());
+
+ DocumentMetadataFields metadata3;
+ ASSERT_FALSE(metadata3.isModified());
+ metadata3.mergeWith(metadata2);
+ ASSERT_TRUE(metadata3.isModified());
+}
+
DEATH_TEST_REGEX(DocumentMetadataFieldsTest,
GetTimeseriesBucketMinTimeDoesntExist,
"Tripwire assertion.*6850100") {
diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp
index 36726dcad55..6dd68199a4c 100644
--- a/src/mongo/db/exec/document_value/document_value_test.cpp
+++ b/src/mongo/db/exec/document_value/document_value_test.cpp
@@ -1023,6 +1023,108 @@ TEST(MetaFields, ToAndFromBson) {
ASSERT_BSONOBJ_EQ(BSON("a" << 42), fromBson.metadata().getSearchSortValues());
}
+TEST(MetaFields, ToAndFromBsonTrivialConvertibility) {
+ Value sortKey{Document{{"token"_sd, "SOMENCODEDATA"_sd}}};
+ // Create a document with a backing BSONObj and separate metadata.
+ auto origObjNoMetadata = BSON("a" << 42);
+ ASSERT_FALSE(origObjNoMetadata.hasField(Document::metaFieldSortKey));
+
+ MutableDocument docBuilder;
+ docBuilder.reset(origObjNoMetadata, false);
+ docBuilder.metadata().setSortKey(sortKey, true);
+ Document docWithSeparateBsonAndMetadata = docBuilder.freeze();
+
+ BSONObj origObjWithMetadata = docWithSeparateBsonAndMetadata.toBsonWithMetaData();
+ ASSERT_TRUE(origObjWithMetadata.hasField(Document::metaFieldSortKey));
+ Document restoredDocWithMetadata = Document::fromBsonWithMetaData(origObjWithMetadata);
+ ASSERT_DOCUMENT_EQ(docWithSeparateBsonAndMetadata, restoredDocWithMetadata);
+
+ // Test the 'isTriviallyConvertible()' function.
+ // The original document is trivially convertible without metadata because the metadata was
+ // added to the document separately from the backing BSON object.
+ ASSERT_TRUE(docWithSeparateBsonAndMetadata.isTriviallyConvertible());
+ // The original document is NOT trivially convertible with metadata because the metadata was
+ // added to the document and does not exist in the BSONObj.
+ ASSERT_FALSE(docWithSeparateBsonAndMetadata.isTriviallyConvertibleWithMetadata());
+ // The restored document is trivially convertible with metadata because the underlying BSONObj
+ // contains the metadata serialized from the original document.
+ ASSERT_TRUE(restoredDocWithMetadata.isTriviallyConvertibleWithMetadata());
+ // The restored document is NOT trivially convertible without metadata because the metadata
+ // fields need to be stripped from the underlying BSONObj.
+ ASSERT_FALSE(restoredDocWithMetadata.isTriviallyConvertible());
+
+ // Test that the conversion with metadata 'origObjWithMetadata' -> 'restoredDocWithMetadata' ->
+ // 'restoredObjWithMetadata' is trivial because the backing BSON already contains metadata and
+ // neither the metadata nor the non-metadata fields have been modified.
+ BSONObj restoredObjWithMetadata = restoredDocWithMetadata.toBsonWithMetaData();
+ ASSERT_TRUE(restoredObjWithMetadata.hasField(Document::metaFieldSortKey));
+ // Test that 'restoredObjWithMetadata' is referring to the exact same memory location as
+ // 'origObjWithMetadata', i.e. both objdata() and objsize() match.
+ ASSERT_EQ(origObjWithMetadata.objdata(), restoredObjWithMetadata.objdata());
+ ASSERT_EQ(origObjWithMetadata.objsize(), restoredObjWithMetadata.objsize());
+
+ // Test that the conversion without metadata 'origObjWithMetadata' -> 'restoredDocWithMetadata'
+ // -> 'strippedRestoredObj' is NOT trivial because the backing BSON has metadata that must be
+ // omitted during serialization.
+ BSONObj strippedRestoredObj = restoredDocWithMetadata.toBson();
+ ASSERT_FALSE(strippedRestoredObj.hasField(Document::metaFieldSortKey));
+ // 'restoredDocWithMetadata' is trivially convertible with metadata and converting it to BSON
+ // without metadata will return a new BSON object.
+ ASSERT_TRUE(origObjNoMetadata.binaryEqual(strippedRestoredObj));
+ ASSERT_NE(origObjNoMetadata.objdata(), strippedRestoredObj.objdata());
+
+ // Test that the conversion without metadata 'origObjNoMetadata' ->
+ // 'docWithSeparateBsonAndMetadata' -> 'restoredObjNoMetadata' is trivial because
+ // 'origObjNoMetadata' does not contain any metadata.
+ BSONObj restoredObjNoMetadata = docWithSeparateBsonAndMetadata.toBson();
+ ASSERT_FALSE(restoredObjNoMetadata.hasField(Document::metaFieldSortKey));
+ // Test that 'restoredObjNoMetadata' is referring to the exact same memory location as
+ // 'origObjNoMetadata', i.e. both objdata() and objsize() match.
+ ASSERT_EQ(origObjNoMetadata.objdata(), restoredObjNoMetadata.objdata());
+ ASSERT_EQ(origObjNoMetadata.objsize(), restoredObjNoMetadata.objsize());
+}
+
+TEST(MetaFields, TrivialConvertibilityBsonWithoutMetadata) {
+ // Test that an unmodified document without metadata is trivially convertible to BSON with and
+ // without metadata.
+ auto bsonWithoutMetadata = BSON("a" << 42);
+ Document doc(bsonWithoutMetadata);
+ ASSERT_TRUE(doc.isTriviallyConvertible());
+ ASSERT_TRUE(doc.isTriviallyConvertibleWithMetadata());
+}
+
+TEST(MetaFields, TrivialConvertibilityNoBson) {
+ // A Document created with no backing BSON is not trivially convertible.
+ auto docNoBson = Document{{"a", 42}};
+ ASSERT_FALSE(docNoBson.isTriviallyConvertible());
+ ASSERT_FALSE(docNoBson.isTriviallyConvertibleWithMetadata());
+
+ // An empty Document is trivially convertible, since the default BSONObj is also empty.
+ auto emptyDoc = Document{};
+ ASSERT_TRUE(emptyDoc.isTriviallyConvertible());
+ ASSERT_TRUE(emptyDoc.isTriviallyConvertibleWithMetadata());
+}
+
+TEST(MetaFields, TrivialConvertibilityModified) {
+ // Modifying a document with a backing BSON renders it not trivially convertible.
+ MutableDocument mutDocModified(Document(BSON("a" << 42)));
+ mutDocModified.addField("b", Value(43));
+ auto modifiedDoc = mutDocModified.freeze();
+ ASSERT_FALSE(modifiedDoc.isTriviallyConvertible());
+ ASSERT_FALSE(modifiedDoc.isTriviallyConvertibleWithMetadata());
+}
+
+TEST(MetaFields, TrivialConvertibilityMetadataModified) {
+ // Modifying the metadata of a document with a backing BSON renders it not trivially convertible
+ // with metadata.
+ MutableDocument mutDocModifiedMd(
+ Document::fromBsonWithMetaData(BSON(Document::metaFieldTextScore << 10.0)));
+ mutDocModifiedMd.metadata().setRandVal(20.0);
+ auto modifiedMdDoc = mutDocModifiedMd.freeze();
+ ASSERT_FALSE(modifiedMdDoc.isTriviallyConvertible());
+ ASSERT_FALSE(modifiedMdDoc.isTriviallyConvertibleWithMetadata());
+}
+
TEST(MetaFields, MetaFieldsIncludedInDocumentApproximateSize) {
MutableDocument docBuilder;
docBuilder.metadata().setSearchHighlights(DOC_ARRAY("abc"_sd
diff --git a/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp b/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
index a0cb8377d5b..19fba5afea5 100644
--- a/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
+++ b/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp
@@ -35,39 +35,29 @@
namespace mongo {
namespace change_stream_split_event {
-std::pair<Document, size_t> processChangeEventBeforeSplit(Document event, bool withMetadata) {
- BSONObj eventBson;
- Document eventDocToReturn;
- // If this stream needs merging, then we will need to serialize the metadata as well.
+std::pair<Document, size_t> processChangeEventBeforeSplit(const Document& event,
+ bool withMetadata) {
if (withMetadata) {
- // TODO SERVER-74301: Use 'event.toBsonWithMetadata<BSONObj::LargeSizeTrait>()' here.
- eventBson = event.toBson<BSONObj::LargeSizeTrait>();
- eventDocToReturn = event;
+ auto eventBson = event.toBsonWithMetaData<BSONObj::LargeSizeTrait>();
+ return {Document::fromBsonWithMetaData(eventBson), eventBson.objsize()};
} else {
// Serialize just the user data, and add the metadata fields separately.
- eventBson = event.toBson<BSONObj::LargeSizeTrait>();
+ auto eventBson = event.toBson<BSONObj::LargeSizeTrait>();
MutableDocument mutDoc(Document{eventBson});
mutDoc.copyMetaDataFrom(event);
- eventDocToReturn = mutDoc.freeze();
+ return {mutDoc.freeze(), eventBson.objsize()};
}
- // Count the size of the _id field again since the output cursor will have a PBRT.
- size_t eventBsonSize = eventBson.objsize() + eventBson["_id"].size();
- return {eventDocToReturn, eventBsonSize};
-}
-
-size_t getBsonSizeWithMetaData(const Document& doc) {
- // TODO SERVER-74301: Make sure each event is serialized only once in a pipeline.
- return static_cast<size_t>(doc.toBsonWithMetaData().objsize());
-}
-
-size_t getFieldBsonSize(const Document& doc, const StringData& key) {
- // TODO SERVER-74301: Make sure each event is serialized only once in a pipeline.
- return static_cast<size_t>(doc.toBson<BSONObj::LargeSizeTrait>().getField(key).size());
}
std::queue<Document> splitChangeEvent(const Document& event,
size_t maxFragmentBsonSize,
size_t skipFirstFragments) {
+ // Extract the underlying BSON. We expect the event to be trivially convertible either with
+ // or without metadata, so we attempt to optimize the serialization here.
+ auto eventBson =
+ (event.isTriviallyConvertible() ? event.toBson<BSONObj::LargeSizeTrait>()
+ : event.toBsonWithMetaData<BSONObj::LargeSizeTrait>());
+
// Construct a sorted map of fields ordered by size and key for a deterministic greedy strategy
// to minimize the total number of fragments (the first fragment contains as many fields as
// possible). Don't include the original '_id' field, since each fragment will have its own.
@@ -75,7 +65,7 @@ std::queue<Document> splitChangeEvent(const Document& event,
for (auto it = event.fieldIterator(); it.more();) {
auto&& [key, value] = it.next();
if (key != kIdField) {
- sortedFieldMap.emplace(std::make_pair(getFieldBsonSize(event, key), key), value);
+ sortedFieldMap.emplace(std::make_pair(eventBson[key].size(), key), value);
}
}
@@ -102,7 +92,7 @@ std::queue<Document> splitChangeEvent(const Document& event,
Value(Document{{kFragmentNumberField, static_cast<int>(fragments.size())},
{kTotalFragmentsField, 0}}));
- auto fragmentBsonSize = getBsonSizeWithMetaData(fragment.peek());
+ auto fragmentBsonSize = static_cast<size_t>(fragment.peek().toBsonWithMetaData().objsize());
// Fill the fragment with as many fields as we can until we run out or exceed max size.
// Always make sure we add at least one new field on each iteration.
diff --git a/src/mongo/db/pipeline/change_stream_split_event_helpers.h b/src/mongo/db/pipeline/change_stream_split_event_helpers.h
index 472b7eda122..ee39b52dead 100644
--- a/src/mongo/db/pipeline/change_stream_split_event_helpers.h
+++ b/src/mongo/db/pipeline/change_stream_split_event_helpers.h
@@ -46,17 +46,7 @@ constexpr auto kTotalFragmentsField = "of"_sd;
* re-usable. The parameter 'withMetadata' desides whether the metadata is counted.
* Also returns a new document optimized for later serialization by PlanExecutorPipeline.
*/
-std::pair<Document, size_t> processChangeEventBeforeSplit(Document event, bool withMetadata);
-
-/**
- * Returns the size of the BSON object with metadata for the provided document.
- */
-size_t getBsonSizeWithMetaData(const Document& doc);
-
-/**
- * Returns the size of the BSON field for the provided 'key'.
- */
-size_t getFieldBsonSize(const Document& doc, const StringData& key);
+std::pair<Document, size_t> processChangeEventBeforeSplit(const Document& event, bool withMetadata);
/**
* Splits the given change stream 'event' to several sub-events, called fragments. The size of BSON
diff --git a/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp b/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp
index bc9d110449c..aea1ca6fce8 100644
--- a/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp
+++ b/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp
@@ -54,7 +54,11 @@ public:
fragment.addField(kIdField, fragment.metadata().getSortKey());
fragment.addField(kSplitEventField,
Value(Document{{kFragmentNumberField, 1}, {kTotalFragmentsField, 1}}));
- minFragmentSize = getBsonSizeWithMetaData(fragment.peek());
+ minFragmentSize = static_cast<size_t>(fragment.peek().toBsonWithMetaData().objsize());
+ }
+
+ size_t getFieldBsonSize(const Document& doc, const StringData& key) {
+ return static_cast<size_t>(doc.toBson<BSONObj::LargeSizeTrait>().getField(key).size());
}
MutableDocument doc;
diff --git a/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp b/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp
index e57326fb6f2..7c997eae608 100644
--- a/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp
+++ b/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp
@@ -123,23 +123,23 @@ DocumentSource::GetNextResult DocumentSourceChangeStreamSplitLargeEvent::doGetNe
// Process the event to see if it is within the size limit. We have to serialize the document to
// perform this check, but the helper will also produce a new 'Document' which - if it is small
// enough to be returned - will not need to be re-serialized by the plan executor.
- // TODO SERVER-74301: Consider 'this->pExpCtx->forPerShardCursor' here.
auto [eventDoc, eventBsonSize] = change_stream_split_event::processChangeEventBeforeSplit(
- input.releaseDocument(), this->pExpCtx->needsMerge);
+ input.getDocument(), this->pExpCtx->needsMerge || this->pExpCtx->forPerShardCursor);
+
+ // Make sure to leave some space for the postBatchResumeToken in the cursor response object.
+ size_t tokenSize = eventDoc.metadata().getSortKey().getDocument().toBson().objsize();
// If we are resuming from a split event, check whether this is it. If so, extract the fragment
// number from which we are resuming. Otherwise, we have already scanned past the resume point,
// which implies that it may be on another shard. Continue to split this event without skipping.
- size_t skipFragments = _handleResumeAfterSplit(eventDoc, eventBsonSize);
+ size_t skipFragments = _handleResumeAfterSplit(eventDoc, eventBsonSize + tokenSize);
// Before proceeding, check whether the event is small enough to be returned as-is.
- if (eventBsonSize <= kBSONObjMaxChangeEventSize) {
+ if (eventBsonSize + tokenSize <= kBSONObjMaxChangeEventSize) {
return std::move(eventDoc);
}
- // Split the event into N appropriately-sized fragments. Make sure to leave some space for the
- // postBatchResumeToken in the cursor response object.
- size_t tokenSize = eventDoc.metadata().getSortKey().getDocument().toBson().objsize();
+ // Split the event into N appropriately-sized fragments.
_splitEventQueue = change_stream_split_event::splitChangeEvent(
eventDoc, kBSONObjMaxChangeEventSize - tokenSize, skipFragments);
diff --git a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp
index 0665799e84d..d6ce29790d1 100644
--- a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp
@@ -224,7 +224,7 @@ public:
expectedDocuments.reserve(expectedPairs.size());
for (auto& [localDocument, matchedDocuments] : expectedPairs) {
MutableDocument expectedDocument;
- expectedDocument.reset(localDocument, false /* stripMetadata */);
+ expectedDocument.reset(localDocument, false /* bsonHasMetadata */);
std::vector<mongo::Value> matchedValues{matchedDocuments.begin(),
matchedDocuments.end()};