diff options
author | Haley Connelly <haley.connelly@mongodb.com> | 2023-03-24 18:45:55 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-03-24 21:55:47 +0000 |
commit | 5ad203a0e39df64c86e94c56de848d9add02adbc (patch) | |
tree | 4e7dbd127f2049530815c8c7fb376db1cc51122d /src/mongo/db/storage | |
parent | 0127ea7f63f4fa80a77ca77a306e2a50e73101ac (diff) | |
download | mongo-5ad203a0e39df64c86e94c56de848d9add02adbc.tar.gz |
SERVER-75081 Preserve OplogStone initialization behavior in OplogTruncateMarkers
Diffstat (limited to 'src/mongo/db/storage')
4 files changed, 68 insertions, 5 deletions
diff --git a/src/mongo/db/storage/collection_markers.cpp b/src/mongo/db/storage/collection_markers.cpp index 321426b233d..78f592fe844 100644 --- a/src/mongo/db/storage/collection_markers.cpp +++ b/src/mongo/db/storage/collection_markers.cpp @@ -431,7 +431,8 @@ CollectionTruncateMarkers::createFromExistingRecordStore( OperationContext* opCtx, RecordStore* rs, int64_t minBytesPerMarker, - std::function<RecordIdAndWallTime(const Record&)> getRecordIdAndWallTime) { + std::function<RecordIdAndWallTime(const Record&)> getRecordIdAndWallTime, + boost::optional<int64_t> numberOfMarkersToKeepLegacy) { long long numRecords = rs->numRecords(opCtx); long long dataSize = rs->dataSize(opCtx); @@ -456,7 +457,11 @@ CollectionTruncateMarkers::createFromExistingRecordStore( // If the collection doesn't contain enough records to make sampling more efficient, then scan // the collection to determine where to put down markers. - auto numMarkers = dataSize / minBytesPerMarker; + // + // Unless preserving legacy behavior, compute the number of markers which would be generated + // based on the estimated data size. + auto numMarkers = numberOfMarkersToKeepLegacy ? numberOfMarkersToKeepLegacy.get() + : dataSize / minBytesPerMarker; if (numRecords <= 0 || dataSize <= 0 || uint64_t(numRecords) < kMinSampleRatioForRandCursor * kRandomSamplesPerMarker * numMarkers) { diff --git a/src/mongo/db/storage/collection_markers.h b/src/mongo/db/storage/collection_markers.h index f42b2be9c6d..a6849af58d1 100644 --- a/src/mongo/db/storage/collection_markers.h +++ b/src/mongo/db/storage/collection_markers.h @@ -156,11 +156,16 @@ public: // Creates the initial set of markers. This will decide whether to perform a collection scan or // sampling based on the size of the collection. + // + // 'numberOfMarkersToKeepLegacy' exists solely to maintain legacy behavior of + // 'OplogTruncateMarkers' previously known as 'OplogStones'. It serves as the maximum number of + // truncate markers to keep before reclaiming the oldest truncate markers. static InitialSetOfMarkers createFromExistingRecordStore( OperationContext* opCtx, RecordStore* rs, int64_t minBytesPerMarker, - std::function<RecordIdAndWallTime(const Record&)> getRecordIdAndWallTime); + std::function<RecordIdAndWallTime(const Record&)> getRecordIdAndWallTime, + boost::optional<int64_t> numberOfMarkersToKeepLegacy = boost::none); // Creates the initial set of markers by fully scanning the collection. The set of markers // returned will have correct metrics. diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp index c80a3343ebe..3043e722aef 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store.cpp @@ -175,11 +175,15 @@ WiredTigerRecordStore::OplogTruncateMarkers::createOplogTruncateMarkers(Operatio minBytesPerTruncateMarker > 0); auto initialSetOfMarkers = CollectionTruncateMarkers::createFromExistingRecordStore( - opCtx, rs, minBytesPerTruncateMarker, [](const Record& record) { + opCtx, + rs, + minBytesPerTruncateMarker, + [](const Record& record) { BSONObj obj = record.data.toBson(); auto wallTime = obj.hasField("wall") ? obj["wall"].Date() : obj["ts"].timestampTime(); return RecordIdAndWallTime(record.id, wallTime); - }); + }, + numTruncateMarkersToKeep); LOGV2(22382, "WiredTiger record store oplog processing took {duration}ms", "WiredTiger record store oplog processing finished", diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp index 05444f0d3f0..73feeb4eea3 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_record_store_test.cpp @@ -911,6 +911,55 @@ TEST(WiredTigerRecordStoreTest, OplogTruncateMarkers_AscendingOrder) { } } +// When the oplog collection is non-empty, but no OplogTruncateMarkers are +// generated because the estimated 'dataSize' is smaller than the minimum size for a truncate +// marker, tets that +// (1) The oplog is scanned +// (2) OplogTruncateMarkers::currentBytes() reflects the actual size of the oplog instead of the +// estimated size. +TEST(WiredTigerRecordStoreTest, OplogTruncateMarkers_NoMarkersGeneratedFromScanning) { + std::unique_ptr<RecordStoreHarnessHelper> harnessHelper = newRecordStoreHarnessHelper(); + auto wtHarnessHelper = dynamic_cast<WiredTigerHarnessHelper*>(harnessHelper.get()); + std::unique_ptr<RecordStore> rs(wtHarnessHelper->newOplogRecordStoreNoInit()); + + WiredTigerRecordStore* wtrs = static_cast<WiredTigerRecordStore*>(rs.get()); + + int realNumRecords = 4; + int realSizePerRecord = 100; + { + ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext()); + for (int i = 1; i <= realNumRecords; i++) { + ASSERT_EQ(insertBSONWithSize(opCtx.get(), rs.get(), Timestamp(i, 0), realSizePerRecord), + RecordId(i, 0)); + } + } + + // Force the oplog visibility timestamp to be up-to-date to the last record. + auto wtKvEngine = dynamic_cast<WiredTigerKVEngine*>(harnessHelper->getEngine()); + wtKvEngine->getOplogManager()->setOplogReadTimestamp(Timestamp(realNumRecords, 0)); + + + // Force the estimates of 'dataSize' and 'numRecords' to be lower than the real values. + wtrs->setNumRecords(realNumRecords - 1); + wtrs->setDataSize((realNumRecords - 1) * realSizePerRecord); + + // Initialize the truncate markers. + ServiceContext::UniqueOperationContext opCtx(harnessHelper->newOperationContext()); + wtrs->postConstructorInit(opCtx.get()); + + auto oplogTruncateMarkers = wtrs->oplogTruncateMarkers(); + ASSERT_FALSE(oplogTruncateMarkers->processedBySampling()); + + auto numMarkers = oplogTruncateMarkers->numMarkers(); + ASSERT_EQ(numMarkers, 0U); + + // A forced scan over the RecordStore should force the 'currentBytes' to be accurate in the + // truncate markers as well as the RecordStore's 'numRecords' and 'dataSize'. + ASSERT_EQ(oplogTruncateMarkers->currentBytes(), realNumRecords * realSizePerRecord); + ASSERT_EQ(wtrs->dataSize(opCtx.get()), realNumRecords * realSizePerRecord); + ASSERT_EQ(wtrs->numRecords(opCtx.get()), realNumRecords); +} + // Ensure that if we sample and create duplicate oplog truncate markers, perform truncation // correctly, and with no crashing behavior. This scenario may be possible if the same record is // sampled multiple times during startup, which can be very likely if the size storer is very |