From 41ece0ab8e660dd2993141334ce10a189516bdb4 Mon Sep 17 00:00:00 2001 From: Romans Kasperovics Date: Thu, 6 Apr 2023 06:55:11 +0000 Subject: SERVER-74301 Optimize serialization behavior for $changeStreamSplitLargeEvent --- src/mongo/db/exec/document_value/document.cpp | 45 +++---- src/mongo/db/exec/document_value/document.h | 45 ++++--- .../db/exec/document_value/document_internal.h | 33 ++--- .../document_value/document_metadata_fields.cpp | 2 + .../exec/document_value/document_metadata_fields.h | 100 ++++++--------- .../document_metadata_fields_test.cpp | 136 +++++++++++++++++++++ .../db/exec/document_value/document_value_test.cpp | 102 ++++++++++++++++ .../pipeline/change_stream_split_event_helpers.cpp | 38 +++--- .../pipeline/change_stream_split_event_helpers.h | 12 +- .../change_stream_split_event_helpers_test.cpp | 6 +- ...ment_source_change_stream_split_large_event.cpp | 14 +-- .../db/query/sbe_stage_builder_lookup_test.cpp | 2 +- 12 files changed, 379 insertions(+), 156 deletions(-) diff --git a/src/mongo/db/exec/document_value/document.cpp b/src/mongo/db/exec/document_value/document.cpp index 8715d55e89e..b71b109aa65 100644 --- a/src/mongo/db/exec/document_value/document.cpp +++ b/src/mongo/db/exec/document_value/document.cpp @@ -131,7 +131,7 @@ bool DocumentStorageIterator::shouldSkipDeleted() { // If we strip the metadata see if a field name matches the known list. All metadata fields // start with '$' so optimize for a quick bailout. - if (_storage->stripMetadata() && fieldName[0] == '$' && + if (_storage->bsonHasMetadata() && fieldName[0] == '$' && Document::allMetadataFieldNames.contains(fieldName)) { return true; } @@ -344,8 +344,8 @@ void DocumentStorage::reserveFields(size_t expectedFields) { } intrusive_ptr DocumentStorage::clone() const { - auto out = - make_intrusive(_bson, _stripMetadata, _modified, _numBytesFromBSONInCache); + auto out = make_intrusive( + _bson, _bsonHasMetadata, _modified, _numBytesFromBSONInCache); if (_cache) { // Make a copy of the buffer with the fields. @@ -391,10 +391,10 @@ DocumentStorage::~DocumentStorage() { } } -void DocumentStorage::reset(const BSONObj& bson, bool stripMetadata) { +void DocumentStorage::reset(const BSONObj& bson, bool bsonHasMetadata) { _bson = bson; _numBytesFromBSONInCache = 0; - _stripMetadata = stripMetadata; + _bsonHasMetadata = bsonHasMetadata; _modified = false; _snapshottedSize = 0; @@ -423,6 +423,8 @@ void DocumentStorage::loadLazyMetadata() const { return; } + bool oldModified = _metadataFields.isModified(); + BSONObjIterator it(_bson); while (it.more()) { BSONElement elem(it.next()); @@ -469,6 +471,7 @@ void DocumentStorage::loadLazyMetadata() const { } } + _metadataFields.setModified(oldModified); _haveLazyLoadedMetadata = true; } @@ -535,37 +538,35 @@ constexpr StringData Document::metaFieldSearchHighlights; constexpr StringData Document::metaFieldSearchScoreDetails; constexpr StringData Document::metaFieldSearchSortValues; -BSONObj Document::toBsonWithMetaData() const { - BSONObjBuilder bb; - toBson(&bb); +void Document::toBsonWithMetaData(BSONObjBuilder* builder) const { + toBson(builder); if (!metadata()) { - return bb.obj(); + return; } if (metadata().hasTextScore()) - bb.append(metaFieldTextScore, metadata().getTextScore()); + builder->append(metaFieldTextScore, metadata().getTextScore()); if (metadata().hasRandVal()) - bb.append(metaFieldRandVal, metadata().getRandVal()); + builder->append(metaFieldRandVal, metadata().getRandVal()); if (metadata().hasSortKey()) - bb.append(metaFieldSortKey, - DocumentMetadataFields::serializeSortKey(metadata().isSingleElementKey(), - metadata().getSortKey())); + builder->append(metaFieldSortKey, + DocumentMetadataFields::serializeSortKey(metadata().isSingleElementKey(), + metadata().getSortKey())); if (metadata().hasGeoNearDistance()) - bb.append(metaFieldGeoNearDistance, metadata().getGeoNearDistance()); + builder->append(metaFieldGeoNearDistance, metadata().getGeoNearDistance()); if (metadata().hasGeoNearPoint()) - metadata().getGeoNearPoint().addToBsonObj(&bb, metaFieldGeoNearPoint); + metadata().getGeoNearPoint().addToBsonObj(builder, metaFieldGeoNearPoint); if (metadata().hasSearchScore()) - bb.append(metaFieldSearchScore, metadata().getSearchScore()); + builder->append(metaFieldSearchScore, metadata().getSearchScore()); if (metadata().hasSearchHighlights()) - metadata().getSearchHighlights().addToBsonObj(&bb, metaFieldSearchHighlights); + metadata().getSearchHighlights().addToBsonObj(builder, metaFieldSearchHighlights); if (metadata().hasIndexKey()) - bb.append(metaFieldIndexKey, metadata().getIndexKey()); + builder->append(metaFieldIndexKey, metadata().getIndexKey()); if (metadata().hasSearchScoreDetails()) - bb.append(metaFieldSearchScoreDetails, metadata().getSearchScoreDetails()); + builder->append(metaFieldSearchScoreDetails, metadata().getSearchScoreDetails()); if (metadata().hasSearchSortValues()) { - bb.append(metaFieldSearchSortValues, metadata().getSearchSortValues()); + builder->append(metaFieldSearchSortValues, metadata().getSearchSortValues()); } - return bb.obj(); } Document Document::fromBsonWithMetaData(const BSONObj& bson) { diff --git a/src/mongo/db/exec/document_value/document.h b/src/mongo/db/exec/document_value/document.h index 7499bee8d79..83db6052aeb 100644 --- a/src/mongo/db/exec/document_value/document.h +++ b/src/mongo/db/exec/document_value/document.h @@ -263,7 +263,15 @@ public: * storage is already in BSON format and there are no damages. */ bool isTriviallyConvertible() const { - return !storage().isModified() && !storage().stripMetadata(); + return !storage().isModified() && !storage().bsonHasMetadata(); + } + + /** + * Returns true, if this document is trivially convertible to BSON with metadata, meaning the + * underlying storage is already in BSON format and there are no damages. + */ + bool isTriviallyConvertibleWithMetadata() const { + return !storage().isModified() && !storage().isMetadataModified(); } /** @@ -296,7 +304,18 @@ public: /** * Like the 'toBson()' method, but includes metadata as top-level fields. */ - BSONObj toBsonWithMetaData() const; + void toBsonWithMetaData(BSONObjBuilder* builder) const; + + template + BSONObj toBsonWithMetaData() const { + if (isTriviallyConvertibleWithMetadata()) { + return storage().bsonObj(); + } + + BSONObjBuilder bb; + toBsonWithMetaData(&bb); + return bb.obj(); + } /** * Like Document(BSONObj) but treats top-level fields with special names as metadata. @@ -536,13 +555,11 @@ public: } /** - * Replace the current base Document with bson. - * - * The paramater 'stripMetadata' controls whether we strip the metadata fields from the - * underlying bson when converting the document object back to bson. + * Replace the current base Document with the BSON object. Setting 'bsonHasMetadata' to true + * signals that the BSON object contains metadata fields. */ - void reset(const BSONObj& bson, bool stripMetadata) { - storage().reset(bson, stripMetadata); + void reset(const BSONObj& bson, bool bsonHasMetadata) { + storage().reset(bson, bsonHasMetadata); } /** Add the given field to the Document. @@ -725,13 +742,13 @@ public: storage().makeOwned(); } - /** Create a new document storage with the BSON object. - * - * The optional paramater 'stripMetadata' controls whether we strip the metadata fields (the - * complete list is in Document::allMetadataFieldNames). + /** + * Creates a new document storage with the BSON object. Setting 'bsonHasMetadata' to true + * signals that the BSON object contains metadata fields (the complete list is in + * Document::allMetadataFieldNames). */ - DocumentStorage& newStorageWithBson(const BSONObj& bson, bool stripMetadata) { - reset(make_intrusive(bson, stripMetadata, false, 0)); + DocumentStorage& newStorageWithBson(const BSONObj& bson, bool bsonHasMetadata) { + reset(make_intrusive(bson, bsonHasMetadata, false, 0)); return const_cast(*storagePtr()); } diff --git a/src/mongo/db/exec/document_value/document_internal.h b/src/mongo/db/exec/document_value/document_internal.h index 2507cb0417b..b380247baf6 100644 --- a/src/mongo/db/exec/document_value/document_internal.h +++ b/src/mongo/db/exec/document_value/document_internal.h @@ -359,11 +359,11 @@ public: /** * Construct a storage from the BSON. The BSON is lazily processed as fields are requested from - * the document. If we know that the BSON does not contain any metadata fields we can set the - * 'stripMetadata' flag to false that will speed up the field iteration. + * the document. If we know that the BSON contains metadata fields we can set the + * 'bsonHasMetadata' flag to true. */ DocumentStorage(const BSONObj& bson, - bool stripMetadata, + bool bsonHasMetadata, bool modified, uint32_t numBytesFromBSONInCache) : _cache(nullptr), @@ -373,12 +373,12 @@ public: _hashTabMask(0), _bson(bson), _numBytesFromBSONInCache(numBytesFromBSONInCache), - _stripMetadata(stripMetadata), + _bsonHasMetadata(bsonHasMetadata), _modified(modified) {} ~DocumentStorage(); - void reset(const BSONObj& bson, bool stripMetadata); + void reset(const BSONObj& bson, bool bsonHasMetadata); /** * Populates the cache by recursively walking the underlying BSON. @@ -551,7 +551,7 @@ public: * WorkingSetMember. */ const DocumentMetadataFields& metadata() const { - if (_stripMetadata) { + if (_bsonHasMetadata) { loadLazyMetadata(); } return _metadataFields; @@ -589,8 +589,8 @@ public: return _firstElement ? _firstElement->plusBytes(_usedBytes) : nullptr; } - auto stripMetadata() const { - return _stripMetadata; + auto bsonHasMetadata() const { + return _bsonHasMetadata; } Position constructInCache(const BSONElement& elem); @@ -599,6 +599,10 @@ public: return _modified; } + auto isMetadataModified() const { + return _metadataFields.isModified(); + } + auto bsonObj() const { return _bson; } @@ -708,17 +712,16 @@ private: // whole backing BSON, but only the portion of backing BSON that's not already in the cache. uint32_t _numBytesFromBSONInCache = 0; - // If '_stripMetadata' is true, tracks whether or not the metadata has been lazy-loaded from the - // backing '_bson' object. If so, then no attempt will be made to load the metadata again, even - // if the metadata has been released by a call to 'releaseMetadata()'. + // Tracks whether or not the metadata has been lazy-loaded from the backing '_bson' object. If + // so, then no attempt will be made to load the metadata again, even if the metadata has been + // released by a call to 'releaseMetadata()'. mutable bool _haveLazyLoadedMetadata = false; mutable DocumentMetadataFields _metadataFields; - // The storage constructed from a BSON value may contain metadata. When we process the BSON we - // have to move the metadata to the MetadataFields object. If we know that the BSON does not - // have any metadata we can set _stripMetadata to false that will speed up the iteration. - bool _stripMetadata{false}; + // True if this storage was constructed from BSON with metadata. Serializing this object using + // the 'toBson()' method will omit (strip) the metadata fields. + bool _bsonHasMetadata{false}; // This flag is set to true anytime the storage returns a mutable field. It is used to optimize // a conversion to BSON; i.e. if there are not any modifications we can directly return _bson. diff --git a/src/mongo/db/exec/document_value/document_metadata_fields.cpp b/src/mongo/db/exec/document_value/document_metadata_fields.cpp index 511c12027f9..d33eee6c140 100644 --- a/src/mongo/db/exec/document_value/document_metadata_fields.cpp +++ b/src/mongo/db/exec/document_value/document_metadata_fields.cpp @@ -46,6 +46,7 @@ DocumentMetadataFields::DocumentMetadataFields(const DocumentMetadataFields& oth DocumentMetadataFields& DocumentMetadataFields::operator=(const DocumentMetadataFields& other) { _holder = other._holder ? std::make_unique(*other._holder) : nullptr; + _modified = true; return *this; } @@ -54,6 +55,7 @@ DocumentMetadataFields::DocumentMetadataFields(DocumentMetadataFields&& other) DocumentMetadataFields& DocumentMetadataFields::operator=(DocumentMetadataFields&& other) { _holder = std::move(other._holder); + _modified = true; return *this; } diff --git a/src/mongo/db/exec/document_value/document_metadata_fields.h b/src/mongo/db/exec/document_value/document_metadata_fields.h index 6f758a52d1e..6ca3ad3198a 100644 --- a/src/mongo/db/exec/document_value/document_metadata_fields.h +++ b/src/mongo/db/exec/document_value/document_metadata_fields.h @@ -149,11 +149,7 @@ public: } void setTextScore(double score) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kTextScore); + _setCommon(MetaType::kTextScore); _holder->textScore = score; } @@ -167,11 +163,7 @@ public: } void setRandVal(double val) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kRandVal); + _setCommon(MetaType::kRandVal); _holder->randVal = val; } @@ -185,11 +177,7 @@ public: } void setSortKey(Value sortKey, bool isSingleElementKey) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kSortKey); + _setCommon(MetaType::kSortKey); _holder->isSingleElementKey = isSingleElementKey; _holder->sortKey = std::move(sortKey); } @@ -208,11 +196,7 @@ public: } void setGeoNearDistance(double dist) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kGeoNearDist); + _setCommon(MetaType::kGeoNearDist); _holder->geoNearDistance = dist; } @@ -226,11 +210,7 @@ public: } void setGeoNearPoint(Value point) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kGeoNearPoint); + _setCommon(MetaType::kGeoNearPoint); _holder->geoNearPoint = std::move(point); } @@ -244,11 +224,7 @@ public: } void setSearchScore(double score) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kSearchScore); + _setCommon(MetaType::kSearchScore); _holder->searchScore = score; } @@ -262,11 +238,7 @@ public: } void setSearchHighlights(Value highlights) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kSearchHighlights); + _setCommon(MetaType::kSearchHighlights); _holder->searchHighlights = highlights; } @@ -280,11 +252,7 @@ public: } void setIndexKey(BSONObj indexKey) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kIndexKey); + _setCommon(MetaType::kIndexKey); _holder->indexKey = indexKey.getOwned(); } @@ -298,11 +266,7 @@ public: } void setRecordId(RecordId rid) { - if (!_holder) { - _holder = std::make_unique(); - } - - _holder->metaFields.set(MetaType::kRecordId); + _setCommon(MetaType::kRecordId); _holder->recordId = std::move(rid); } @@ -316,10 +280,7 @@ public: } void setSearchScoreDetails(BSONObj details) { - if (!_holder) { - _holder = std::make_unique(); - } - _holder->metaFields.set(MetaType::kSearchScoreDetails); + _setCommon(MetaType::kSearchScoreDetails); _holder->searchScoreDetails = details.getOwned(); } @@ -335,10 +296,7 @@ public: } void setTimeseriesBucketMinTime(Date_t time) { - if (!_holder) { - _holder = std::make_unique(); - } - _holder->metaFields.set(MetaType::kTimeseriesBucketMinTime); + _setCommon(MetaType::kTimeseriesBucketMinTime); _holder->timeseriesBucketMinTime = time; } @@ -354,10 +312,7 @@ public: } void setTimeseriesBucketMaxTime(Date_t time) { - if (!_holder) { - _holder = std::make_unique(); - } - _holder->metaFields.set(MetaType::kTimeseriesBucketMaxTime); + _setCommon(MetaType::kTimeseriesBucketMaxTime); _holder->timeseriesBucketMaxTime = time; } @@ -371,16 +326,34 @@ public: } void setSearchSortValues(BSONObj vals) { - if (!_holder) { - _holder = std::make_unique(); - } - _holder->metaFields.set(MetaType::kSearchSortValues); + _setCommon(MetaType::kSearchSortValues); _holder->searchSortValues = vals.getOwned(); } void serializeForSorter(BufBuilder& buf) const; + bool isModified() const { + return _modified; + } + + /** + * Sets the 'modified' flag to the given value. Necessary for implementing a lazy load + * optimization for the contained mutable 'DocumentMetadataFields' instance inside the + * 'Document' class. + */ + void setModified(bool newValue) { + _modified = newValue; + } + private: + inline void _setCommon(MetaType mt) { + if (!_holder) { + _holder = std::make_unique(); + } + _holder->metaFields.set(mt); + _modified = true; + } + // A simple data struct housing all possible metadata fields. struct MetadataHolder { std::bitset metaFields; @@ -407,6 +380,11 @@ private: // Null until the first setter is called, at which point a MetadataHolder struct is allocated. std::unique_ptr _holder; + + // This flag is set to true anytime a 'DocumentMetadataFields' instance is modified. It is used + // to optimize document conversion to BSON with metadata; i.e. if there are no modifications we + // can directly return the underlying BSON. + bool _modified{false}; }; using QueryMetadataBitSet = std::bitset; diff --git a/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp b/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp index 37231faf159..4a76154f1f6 100644 --- a/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp +++ b/src/mongo/db/exec/document_value/document_metadata_fields_test.cpp @@ -296,6 +296,142 @@ TEST(DocumentMetadataFieldsTest, GetTimeseriesBucketMaxTimeExists) { ASSERT_EQ(source.getTimeseriesBucketMaxTime(), time); } +TEST(DocumentMetadataFieldsTest, MetadataIsMarkedModifiedOnSetMetadataField) { + // Test setting metadata fields directly. + + auto testFieldSetter = [](std::function invokeSetter) { + DocumentMetadataFields metadata; + ASSERT_FALSE(metadata.isModified()); + invokeSetter(metadata); + ASSERT_TRUE(metadata.isModified()); + }; + + testFieldSetter([](DocumentMetadataFields& md) { md.setTextScore(10.0); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setRandVal(20.0); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setSortKey(Value(30), true); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setGeoNearDistance(40.0); }); + testFieldSetter( + [](DocumentMetadataFields& md) { md.setGeoNearPoint(Value{BSON_ARRAY(1 << 2)}); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setSearchScore(50.0); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setSearchHighlights(Value{"foo"_sd}); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setIndexKey(BSON("b" << 1)); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setRecordId(RecordId{6}); }); + testFieldSetter([](DocumentMetadataFields& md) { + md.setSearchScoreDetails(BSON("scoreDetails" + << "foo")); + }); + testFieldSetter([](DocumentMetadataFields& md) { md.setTimeseriesBucketMinTime(Date_t()); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setTimeseriesBucketMaxTime(Date_t()); }); + testFieldSetter([](DocumentMetadataFields& md) { md.setSearchSortValues(BSON("a" << 1)); }); +} + +TEST(DocumentMetadataFieldsTest, MetadataIsConstructedUnmodified) { + // We consider new instances as unmodified for all constructors (default, copy, move) even when + // the original metadata has the 'modified' flag set. + + // Testing the default constructor. + DocumentMetadataFields metadata1; + ASSERT_FALSE(metadata1.isModified()); + metadata1.setTextScore(10.0); + ASSERT_TRUE(metadata1.isModified()); + + // Testing the copy-constructor. + DocumentMetadataFields metadata2(metadata1); + ASSERT_FALSE(metadata2.isModified()); + + // Testing the move-constructor. + DocumentMetadataFields metadata3(std::move(metadata1)); + ASSERT_FALSE(metadata3.isModified()); +} + +TEST(DocumentMetadataFieldsTest, CopyAssignmentIsModification) { + // We consider copy-assignment as modification even when the instances are equal and the copied + // metadata does not have the 'modified' flag set. + + DocumentMetadataFields metadata1; + ASSERT_FALSE(metadata1.isModified()); + + DocumentMetadataFields metadata2; + ASSERT_FALSE(metadata2.isModified()); + + // Testing copy-assignment to empty object (metadata2 == metadata1). + metadata2 = metadata1; + ASSERT_TRUE(metadata2.isModified()); + + // Testing copy-assignment in a more common case (metadata1 is empty, metadata2 is not empty). + metadata2.setTextScore(10.0); + metadata2.setModified(false); + ASSERT_FALSE(metadata2.isModified()); + metadata1 = metadata2; + ASSERT_TRUE(metadata1.isModified()); + ASSERT_EQ(metadata2.getTextScore(), metadata1.getTextScore()); +} + +TEST(DocumentMetadataFieldsTest, MoveAssignmentIsModification) { + // We consider move-assignment as modification even when the instances are equal and the moved + // metadata does not have the 'modified' flag set. + + DocumentMetadataFields metadata1; + ASSERT_FALSE(metadata1.isModified()); + + DocumentMetadataFields metadata2; + ASSERT_FALSE(metadata2.isModified()); + + // Testing move-assignment to empty object (metadata2 == metadata1). + metadata2 = std::move(metadata1); + ASSERT_TRUE(metadata2.isModified()); + + // Testing move-assignment in a more common case (metadata3 is empty, metadata2 is not empty). + metadata2.setTextScore(10.0); + metadata2.setModified(false); + ASSERT_FALSE(metadata2.isModified()); + + DocumentMetadataFields metadata3; + ASSERT_FALSE(metadata3.isModified()); + + metadata3 = std::move(metadata2); + ASSERT_TRUE(metadata3.isModified()); + ASSERT_EQ(10.0, metadata3.getTextScore()); +} + +TEST(DocumentMetadataFieldsTest, MetadataIsMarkedModifiedOnCopyFrom) { + DocumentMetadataFields metadata1; + metadata1.setRandVal(20.0); + ASSERT_TRUE(metadata1.isModified()); + + // Testing 'setModified(false)'. + metadata1.setModified(false); + ASSERT_FALSE(metadata1.isModified()); + + // Calling 'copyFrom(metadata1)' modifies metadata2 even when metadata2 == metadata1 and + // metadata1 is not marked as modified. + DocumentMetadataFields metadata2(metadata1); + ASSERT_FALSE(metadata2.isModified()); + metadata2.copyFrom(metadata1); + ASSERT_TRUE(metadata2.isModified()); +} + +TEST(DocumentMetadataFieldsTest, MetadataIsMarkedModifiedOnMergeWith) { + DocumentMetadataFields metadata1; + metadata1.setRandVal(20.0); + ASSERT_TRUE(metadata1.isModified()); + + metadata1.setModified(false); + ASSERT_FALSE(metadata1.isModified()); + + // Calling 'mergeWith(metadata1)' modifies metadata2 only when metadata1 has fields not set in + // metadata1. + DocumentMetadataFields metadata2(metadata1); + ASSERT_FALSE(metadata2.isModified()); + metadata2.mergeWith(metadata1); + ASSERT_FALSE(metadata2.isModified()); + + DocumentMetadataFields metadata3; + ASSERT_FALSE(metadata3.isModified()); + metadata3.mergeWith(metadata2); + ASSERT_TRUE(metadata3.isModified()); +} + DEATH_TEST_REGEX(DocumentMetadataFieldsTest, GetTimeseriesBucketMinTimeDoesntExist, "Tripwire assertion.*6850100") { diff --git a/src/mongo/db/exec/document_value/document_value_test.cpp b/src/mongo/db/exec/document_value/document_value_test.cpp index 36726dcad55..6dd68199a4c 100644 --- a/src/mongo/db/exec/document_value/document_value_test.cpp +++ b/src/mongo/db/exec/document_value/document_value_test.cpp @@ -1023,6 +1023,108 @@ TEST(MetaFields, ToAndFromBson) { ASSERT_BSONOBJ_EQ(BSON("a" << 42), fromBson.metadata().getSearchSortValues()); } +TEST(MetaFields, ToAndFromBsonTrivialConvertibility) { + Value sortKey{Document{{"token"_sd, "SOMENCODEDATA"_sd}}}; + // Create a document with a backing BSONObj and separate metadata. + auto origObjNoMetadata = BSON("a" << 42); + ASSERT_FALSE(origObjNoMetadata.hasField(Document::metaFieldSortKey)); + + MutableDocument docBuilder; + docBuilder.reset(origObjNoMetadata, false); + docBuilder.metadata().setSortKey(sortKey, true); + Document docWithSeparateBsonAndMetadata = docBuilder.freeze(); + + BSONObj origObjWithMetadata = docWithSeparateBsonAndMetadata.toBsonWithMetaData(); + ASSERT_TRUE(origObjWithMetadata.hasField(Document::metaFieldSortKey)); + Document restoredDocWithMetadata = Document::fromBsonWithMetaData(origObjWithMetadata); + ASSERT_DOCUMENT_EQ(docWithSeparateBsonAndMetadata, restoredDocWithMetadata); + + // Test the 'isTriviallyConvertible()' function. + // The original document is trivially convertible without metadata because the metadata was + // added to the document separately from the backing BSON object. + ASSERT_TRUE(docWithSeparateBsonAndMetadata.isTriviallyConvertible()); + // The original document is NOT trivially convertible with metadata because the metadata was + // added to the document and does not exist in the BSONObj. + ASSERT_FALSE(docWithSeparateBsonAndMetadata.isTriviallyConvertibleWithMetadata()); + // The restored document is trivially convertible with metadata because the underlying BSONObj + // contains the metadata serialized from the original document. + ASSERT_TRUE(restoredDocWithMetadata.isTriviallyConvertibleWithMetadata()); + // The restored document is NOT trivially convertible without metadata because the metadata + // fields need to be stripped from the underlying BSONObj. + ASSERT_FALSE(restoredDocWithMetadata.isTriviallyConvertible()); + + // Test that the conversion with metadata 'origObjWithMetadata' -> 'restoredDocWithMetadata' -> + // 'restoredObjWithMetadata' is trivial because the backing BSON already contains metadata and + // neither the metadata nor the non-metadata fields have been modified. + BSONObj restoredObjWithMetadata = restoredDocWithMetadata.toBsonWithMetaData(); + ASSERT_TRUE(restoredObjWithMetadata.hasField(Document::metaFieldSortKey)); + // Test that 'restoredObjWithMetadata' is referring to the exact same memory location as + // 'origObjWithMetadata', i.e. both objdata() and objsize() match. + ASSERT_EQ(origObjWithMetadata.objdata(), restoredObjWithMetadata.objdata()); + ASSERT_EQ(origObjWithMetadata.objsize(), restoredObjWithMetadata.objsize()); + + // Test that the conversion without metadata 'origObjWithMetadata' -> 'restoredDocWithMetadata' + // -> 'strippedRestoredObj' is NOT trivial because the backing BSON has metadata that must be + // omitted during serialization. + BSONObj strippedRestoredObj = restoredDocWithMetadata.toBson(); + ASSERT_FALSE(strippedRestoredObj.hasField(Document::metaFieldSortKey)); + // 'restoredDocWithMetadata' is trivially convertible with metadata and converting it to BSON + // without metadata will return a new BSON object. + ASSERT_TRUE(origObjNoMetadata.binaryEqual(strippedRestoredObj)); + ASSERT_NE(origObjNoMetadata.objdata(), strippedRestoredObj.objdata()); + + // Test that the conversion without metadata 'origObjNoMetadata' -> + // 'docWithSeparateBsonAndMetadata' -> 'restoredObjNoMetadata' is trivial because + // 'origObjNoMetadata' does not contain any metadata. + BSONObj restoredObjNoMetadata = docWithSeparateBsonAndMetadata.toBson(); + ASSERT_FALSE(restoredObjNoMetadata.hasField(Document::metaFieldSortKey)); + // Test that 'restoredObjNoMetadata' is referring to the exact same memory location as + // 'origObjNoMetadata', i.e. both objdata() and objsize() match. + ASSERT_EQ(origObjNoMetadata.objdata(), restoredObjNoMetadata.objdata()); + ASSERT_EQ(origObjNoMetadata.objsize(), restoredObjNoMetadata.objsize()); +} + +TEST(MetaFields, TrivialConvertibilityBsonWithoutMetadata) { + // Test that an unmodified document without metadata is trivially convertible to BSON with and + // without metadata. + auto bsonWithoutMetadata = BSON("a" << 42); + Document doc(bsonWithoutMetadata); + ASSERT_TRUE(doc.isTriviallyConvertible()); + ASSERT_TRUE(doc.isTriviallyConvertibleWithMetadata()); +} + +TEST(MetaFields, TrivialConvertibilityNoBson) { + // A Document created with no backing BSON is not trivially convertible. + auto docNoBson = Document{{"a", 42}}; + ASSERT_FALSE(docNoBson.isTriviallyConvertible()); + ASSERT_FALSE(docNoBson.isTriviallyConvertibleWithMetadata()); + + // An empty Document is trivially convertible, since the default BSONObj is also empty. + auto emptyDoc = Document{}; + ASSERT_TRUE(emptyDoc.isTriviallyConvertible()); + ASSERT_TRUE(emptyDoc.isTriviallyConvertibleWithMetadata()); +} + +TEST(MetaFields, TrivialConvertibilityModified) { + // Modifying a document with a backing BSON renders it not trivially convertible. + MutableDocument mutDocModified(Document(BSON("a" << 42))); + mutDocModified.addField("b", Value(43)); + auto modifiedDoc = mutDocModified.freeze(); + ASSERT_FALSE(modifiedDoc.isTriviallyConvertible()); + ASSERT_FALSE(modifiedDoc.isTriviallyConvertibleWithMetadata()); +} + +TEST(MetaFields, TrivialConvertibilityMetadataModified) { + // Modifying the metadata of a document with a backing BSON renders it not trivially convertible + // with metadata. + MutableDocument mutDocModifiedMd( + Document::fromBsonWithMetaData(BSON(Document::metaFieldTextScore << 10.0))); + mutDocModifiedMd.metadata().setRandVal(20.0); + auto modifiedMdDoc = mutDocModifiedMd.freeze(); + ASSERT_FALSE(modifiedMdDoc.isTriviallyConvertible()); + ASSERT_FALSE(modifiedMdDoc.isTriviallyConvertibleWithMetadata()); +} + TEST(MetaFields, MetaFieldsIncludedInDocumentApproximateSize) { MutableDocument docBuilder; docBuilder.metadata().setSearchHighlights(DOC_ARRAY("abc"_sd diff --git a/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp b/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp index a0cb8377d5b..19fba5afea5 100644 --- a/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp +++ b/src/mongo/db/pipeline/change_stream_split_event_helpers.cpp @@ -35,39 +35,29 @@ namespace mongo { namespace change_stream_split_event { -std::pair processChangeEventBeforeSplit(Document event, bool withMetadata) { - BSONObj eventBson; - Document eventDocToReturn; - // If this stream needs merging, then we will need to serialize the metadata as well. +std::pair processChangeEventBeforeSplit(const Document& event, + bool withMetadata) { if (withMetadata) { - // TODO SERVER-74301: Use 'event.toBsonWithMetadata()' here. - eventBson = event.toBson(); - eventDocToReturn = event; + auto eventBson = event.toBsonWithMetaData(); + return {Document::fromBsonWithMetaData(eventBson), eventBson.objsize()}; } else { // Serialize just the user data, and add the metadata fields separately. - eventBson = event.toBson(); + auto eventBson = event.toBson(); MutableDocument mutDoc(Document{eventBson}); mutDoc.copyMetaDataFrom(event); - eventDocToReturn = mutDoc.freeze(); + return {mutDoc.freeze(), eventBson.objsize()}; } - // Count the size of the _id field again since the output cursor will have a PBRT. - size_t eventBsonSize = eventBson.objsize() + eventBson["_id"].size(); - return {eventDocToReturn, eventBsonSize}; -} - -size_t getBsonSizeWithMetaData(const Document& doc) { - // TODO SERVER-74301: Make sure each event is serialized only once in a pipeline. - return static_cast(doc.toBsonWithMetaData().objsize()); -} - -size_t getFieldBsonSize(const Document& doc, const StringData& key) { - // TODO SERVER-74301: Make sure each event is serialized only once in a pipeline. - return static_cast(doc.toBson().getField(key).size()); } std::queue splitChangeEvent(const Document& event, size_t maxFragmentBsonSize, size_t skipFirstFragments) { + // Extract the underlying BSON. We expect the event to be trivially convertible either with + // or without metadata, so we attempt to optimize the serialization here. + auto eventBson = + (event.isTriviallyConvertible() ? event.toBson() + : event.toBsonWithMetaData()); + // Construct a sorted map of fields ordered by size and key for a deterministic greedy strategy // to minimize the total number of fragments (the first fragment contains as many fields as // possible). Don't include the original '_id' field, since each fragment will have its own. @@ -75,7 +65,7 @@ std::queue splitChangeEvent(const Document& event, for (auto it = event.fieldIterator(); it.more();) { auto&& [key, value] = it.next(); if (key != kIdField) { - sortedFieldMap.emplace(std::make_pair(getFieldBsonSize(event, key), key), value); + sortedFieldMap.emplace(std::make_pair(eventBson[key].size(), key), value); } } @@ -102,7 +92,7 @@ std::queue splitChangeEvent(const Document& event, Value(Document{{kFragmentNumberField, static_cast(fragments.size())}, {kTotalFragmentsField, 0}})); - auto fragmentBsonSize = getBsonSizeWithMetaData(fragment.peek()); + auto fragmentBsonSize = static_cast(fragment.peek().toBsonWithMetaData().objsize()); // Fill the fragment with as many fields as we can until we run out or exceed max size. // Always make sure we add at least one new field on each iteration. diff --git a/src/mongo/db/pipeline/change_stream_split_event_helpers.h b/src/mongo/db/pipeline/change_stream_split_event_helpers.h index 472b7eda122..ee39b52dead 100644 --- a/src/mongo/db/pipeline/change_stream_split_event_helpers.h +++ b/src/mongo/db/pipeline/change_stream_split_event_helpers.h @@ -46,17 +46,7 @@ constexpr auto kTotalFragmentsField = "of"_sd; * re-usable. The parameter 'withMetadata' desides whether the metadata is counted. * Also returns a new document optimized for later serialization by PlanExecutorPipeline. */ -std::pair processChangeEventBeforeSplit(Document event, bool withMetadata); - -/** - * Returns the size of the BSON object with metadata for the provided document. - */ -size_t getBsonSizeWithMetaData(const Document& doc); - -/** - * Returns the size of the BSON field for the provided 'key'. - */ -size_t getFieldBsonSize(const Document& doc, const StringData& key); +std::pair processChangeEventBeforeSplit(const Document& event, bool withMetadata); /** * Splits the given change stream 'event' to several sub-events, called fragments. The size of BSON diff --git a/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp b/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp index bc9d110449c..aea1ca6fce8 100644 --- a/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp +++ b/src/mongo/db/pipeline/change_stream_split_event_helpers_test.cpp @@ -54,7 +54,11 @@ public: fragment.addField(kIdField, fragment.metadata().getSortKey()); fragment.addField(kSplitEventField, Value(Document{{kFragmentNumberField, 1}, {kTotalFragmentsField, 1}})); - minFragmentSize = getBsonSizeWithMetaData(fragment.peek()); + minFragmentSize = static_cast(fragment.peek().toBsonWithMetaData().objsize()); + } + + size_t getFieldBsonSize(const Document& doc, const StringData& key) { + return static_cast(doc.toBson().getField(key).size()); } MutableDocument doc; diff --git a/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp b/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp index e57326fb6f2..7c997eae608 100644 --- a/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp +++ b/src/mongo/db/pipeline/document_source_change_stream_split_large_event.cpp @@ -123,23 +123,23 @@ DocumentSource::GetNextResult DocumentSourceChangeStreamSplitLargeEvent::doGetNe // Process the event to see if it is within the size limit. We have to serialize the document to // perform this check, but the helper will also produce a new 'Document' which - if it is small // enough to be returned - will not need to be re-serialized by the plan executor. - // TODO SERVER-74301: Consider 'this->pExpCtx->forPerShardCursor' here. auto [eventDoc, eventBsonSize] = change_stream_split_event::processChangeEventBeforeSplit( - input.releaseDocument(), this->pExpCtx->needsMerge); + input.getDocument(), this->pExpCtx->needsMerge || this->pExpCtx->forPerShardCursor); + + // Make sure to leave some space for the postBatchResumeToken in the cursor response object. + size_t tokenSize = eventDoc.metadata().getSortKey().getDocument().toBson().objsize(); // If we are resuming from a split event, check whether this is it. If so, extract the fragment // number from which we are resuming. Otherwise, we have already scanned past the resume point, // which implies that it may be on another shard. Continue to split this event without skipping. - size_t skipFragments = _handleResumeAfterSplit(eventDoc, eventBsonSize); + size_t skipFragments = _handleResumeAfterSplit(eventDoc, eventBsonSize + tokenSize); // Before proceeding, check whether the event is small enough to be returned as-is. - if (eventBsonSize <= kBSONObjMaxChangeEventSize) { + if (eventBsonSize + tokenSize <= kBSONObjMaxChangeEventSize) { return std::move(eventDoc); } - // Split the event into N appropriately-sized fragments. Make sure to leave some space for the - // postBatchResumeToken in the cursor response object. - size_t tokenSize = eventDoc.metadata().getSortKey().getDocument().toBson().objsize(); + // Split the event into N appropriately-sized fragments. _splitEventQueue = change_stream_split_event::splitChangeEvent( eventDoc, kBSONObjMaxChangeEventSize - tokenSize, skipFragments); diff --git a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp index 0665799e84d..d6ce29790d1 100644 --- a/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp +++ b/src/mongo/db/query/sbe_stage_builder_lookup_test.cpp @@ -224,7 +224,7 @@ public: expectedDocuments.reserve(expectedPairs.size()); for (auto& [localDocument, matchedDocuments] : expectedPairs) { MutableDocument expectedDocument; - expectedDocument.reset(localDocument, false /* stripMetadata */); + expectedDocument.reset(localDocument, false /* bsonHasMetadata */); std::vector matchedValues{matchedDocuments.begin(), matchedDocuments.end()}; -- cgit v1.2.1