From 25c694f365db0f07a445bd17b6cd5cbf32f5f2f9 Mon Sep 17 00:00:00 2001 From: Louis Williams Date: Tue, 12 May 2020 13:39:31 -0400 Subject: SERVER-46721 Secondary readers should read at the no-overlap time instead of lastApplied The no-overlap time, ReadSource::kNoOverlap, is the minimum of replication's lastApplied timestamp and WiredTiger's all_durable time. This time is independent of replication state and ensures queries do not see oplog holes after state transitions from secondary to primary. --- src/mongo/db/SConscript | 1 + src/mongo/db/catalog_raii_test.cpp | 4 +- src/mongo/db/commands/getmore_cmd.cpp | 3 + src/mongo/db/db_raii.cpp | 213 ++++++--------------- src/mongo/db/db_raii.h | 13 -- src/mongo/db/db_raii_test.cpp | 114 ++++++++++- src/mongo/db/index_builds_coordinator.cpp | 3 + src/mongo/db/repl/oplog_batcher.cpp | 4 +- .../repl/replication_coordinator_external_state.h | 6 +- ...replication_coordinator_external_state_impl.cpp | 6 +- .../replication_coordinator_external_state_impl.h | 2 +- ...replication_coordinator_external_state_mock.cpp | 2 +- .../replication_coordinator_external_state_mock.h | 2 +- src/mongo/db/repl/replication_coordinator_impl.cpp | 13 +- src/mongo/db/repl/replication_recovery.cpp | 1 + .../db/repl/transaction_oplog_application.cpp | 4 +- src/mongo/db/storage/SConscript | 17 ++ .../db/storage/kv/kv_engine_timestamps_test.cpp | 60 +++--- src/mongo/db/storage/recovery_unit.h | 24 ++- src/mongo/db/storage/snapshot_helper.cpp | 151 +++++++++++++++ src/mongo/db/storage/snapshot_helper.h | 43 +++++ src/mongo/db/storage/snapshot_manager.h | 8 +- .../db/storage/wiredtiger/wiredtiger_kv_engine.cpp | 6 + .../wiredtiger/wiredtiger_recovery_unit.cpp | 61 +++--- .../wiredtiger/wiredtiger_recovery_unit_test.cpp | 83 ++++++++ .../wiredtiger/wiredtiger_snapshot_manager.cpp | 33 +--- .../wiredtiger/wiredtiger_snapshot_manager.h | 22 +-- src/mongo/dbtests/storage_timestamp_tests.cpp | 4 +- 28 files changed, 604 insertions(+), 299 deletions(-) create mode 100644 src/mongo/db/storage/snapshot_helper.cpp create mode 100644 src/mongo/db/storage/snapshot_helper.h diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 55595f5999b..9519d1207f1 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -602,6 +602,7 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/idl/server_parameter', 'catalog/database_holder', + 'storage/snapshot_helper', ], ) diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp index 50913896118..733ed190250 100644 --- a/src/mongo/db/catalog_raii_test.cpp +++ b/src/mongo/db/catalog_raii_test.cpp @@ -260,8 +260,8 @@ TEST_F(ReadSourceScopeTest, RestoreReadSource) { ReadSourceScope scope(opCtx()); ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kUnset); - opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kLastApplied); - ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kLastApplied); + opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kNoOverlap); + ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoOverlap); ASSERT_EQ(opCtx()->recoveryUnit()->getPointInTimeReadTimestamp(), boost::none); } ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kProvided); diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index f183225cd8c..97e6f6b79ea 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -148,6 +148,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg switch (rcArgs.getMajorityReadMechanism()) { case repl::ReadConcernArgs::MajorityReadMechanism::kMajoritySnapshot: { // Make sure we read from the majority snapshot. + opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource( RecoveryUnit::ReadSource::kMajorityCommitted); uassertStatusOK(opCtx->recoveryUnit()->obtainMajorityCommittedSnapshot()); @@ -156,6 +157,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg case repl::ReadConcernArgs::MajorityReadMechanism::kSpeculative: { // Mark the operation as speculative and select the correct read source. repl::SpeculativeMajorityReadInfo::get(opCtx).setIsSpeculativeRead(); + opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); break; } @@ -167,6 +169,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg !opCtx->inMultiDocumentTransaction()) { auto atClusterTime = rcArgs.getArgsAtClusterTime(); invariant(atClusterTime && *atClusterTime != LogicalTime::kUninitialized); + opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, atClusterTime->asTimestamp()); } diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp index 34775dd7d01..53dda3ca490 100644 --- a/src/mongo/db/db_raii.cpp +++ b/src/mongo/db/db_raii.cpp @@ -39,6 +39,7 @@ #include "mongo/db/db_raii_gen.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/collection_sharding_state.h" +#include "mongo/db/storage/snapshot_helper.h" #include "mongo/logv2/log.h" namespace mongo { @@ -112,7 +113,7 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, _autoColl.emplace(opCtx, nsOrUUID, collectionLockMode, viewMode, deadline); // If the read source is explicitly set to kNoTimestamp, we read the most up to date data and do - // not consider reading at last applied (e.g. FTDC needs that). + // not consider reading at the no-overlap point (e.g. FTDC needs that). if (opCtx->recoveryUnit()->getTimestampReadSource() == RecoveryUnit::ReadSource::kNoTimestamp) return; @@ -123,10 +124,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, // need to check for pending catalog changes. while (auto coll = _autoColl->getCollection()) { - auto readSource = opCtx->recoveryUnit()->getTimestampReadSource(); - auto minSnapshot = coll->getMinimumVisibleSnapshot(); - auto mySnapshot = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(); - // TODO(SERVER-47824): Also ban transaction snapshot reads on capped collections. uassert(ErrorCodes::SnapshotUnavailable, "Reading from capped collections with readConcern snapshot is not supported " @@ -135,77 +132,85 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern || opCtx->inMultiDocumentTransaction()); - // If we are reading at a provided timestamp earlier than the latest catalog changes, - // then we must return an error. - if (readSource == RecoveryUnit::ReadSource::kProvided && minSnapshot && - (*mySnapshot < *minSnapshot)) { - uasserted(ErrorCodes::SnapshotUnavailable, - str::stream() - << "Unable to read from a snapshot due to pending collection catalog " - "changes; please retry the operation. Snapshot timestamp is " - << mySnapshot->toString() << ". Collection minimum is " - << minSnapshot->toString()); - } - // During batch application on secondaries, there is a potential to read inconsistent states // that would normally be protected by the PBWM lock. In order to serve secondary reads // during this period, we default to not acquiring the lock (by setting // _shouldNotConflictWithSecondaryBatchApplicationBlock). On primaries, we always read at a // consistent time, so not taking the PBWM lock is not a problem. On secondaries, we have to - // guarantee we read at a consistent state, so we must read at the last applied timestamp, - // which is set after each complete batch. + // guarantee we read at a consistent state, so we must read at the no-overlap timestamp, + // which is a function of the lastApplied timestamp, which is set after each complete batch. // - // If an attempt to read at the last applied timestamp is unsuccessful because there are - // pending catalog changes that occur after the last applied timestamp, we release our locks + // If an attempt to read at the no-overlap timestamp is unsuccessful because there are + // pending catalog changes that occur after the no-overlap timestamp, we release our locks // and try again with the PBWM lock (by unsetting // _shouldNotConflictWithSecondaryBatchApplicationBlock). const NamespaceString nss = coll->ns(); + auto readSource = opCtx->recoveryUnit()->getTimestampReadSource(); - bool readAtLastAppliedTimestamp = - _shouldReadAtLastAppliedTimestamp(opCtx, nss, readConcernLevel); - - if (readAtLastAppliedTimestamp) { - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied); - readSource = opCtx->recoveryUnit()->getTimestampReadSource(); + // Once we have our locks, check whether or not we should override the ReadSource that was + // set before acquiring locks. + if (auto newReadSource = SnapshotHelper::getNewReadSource(opCtx, nss)) { + opCtx->recoveryUnit()->setTimestampReadSource(*newReadSource); + readSource = *newReadSource; } - // This timestamp could be earlier than the timestamp seen when the transaction is opened - // because it is set asynchonously. This is not problematic because holding the collection - // lock guarantees no metadata changes will occur in that time. - auto lastAppliedTimestamp = readAtLastAppliedTimestamp - ? boost::optional(replCoord->getMyLastAppliedOpTime().getTimestamp()) - : boost::none; + const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(); + const auto afterClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAfterClusterTime(); + if (readTimestamp && afterClusterTime) { + // Readers that use afterClusterTime have already waited at a higher level for the + // lastApplied time to advance to a specified optime, and they assume the read timestamp + // of the operation is at least that waited-for timestamp. For kNoOverlap, which is the + // minimum of lastApplied and all_durable, this invariant ensures that afterClusterTime + // reads do not choose a read timestamp older than the one requested. + invariant(*readTimestamp >= afterClusterTime->asTimestamp(), + str::stream() << "read timestamp " << readTimestamp->toString() + << "was less than afterClusterTime: " + << afterClusterTime->asTimestamp().toString()); + } - if (!_conflictingCatalogChanges(opCtx, minSnapshot, lastAppliedTimestamp)) { + auto minSnapshot = coll->getMinimumVisibleSnapshot(); + if (!SnapshotHelper::collectionChangesConflictWithRead(minSnapshot, readTimestamp)) { return; } - invariant(lastAppliedTimestamp || - // The kMajorityCommitted and kNoOverlap read sources already read from timestamps - // that are safe with respect to concurrent secondary batch application. - readSource == RecoveryUnit::ReadSource::kMajorityCommitted || - readSource == RecoveryUnit::ReadSource::kNoOverlap); + // If we are reading at a provided timestamp earlier than the latest catalog changes, + // then we must return an error. + if (readSource == RecoveryUnit::ReadSource::kProvided) { + uasserted(ErrorCodes::SnapshotUnavailable, + str::stream() + << "Unable to read from a snapshot due to pending collection catalog " + "changes; please retry the operation. Snapshot timestamp is " + << readTimestamp->toString() << ". Collection minimum is " + << minSnapshot->toString()); + } + + invariant( + // The kMajorityCommitted and kNoOverlap read sources already read from timestamps + // that are safe with respect to concurrent secondary batch application, and are + // eligible for retrying. + readSource == RecoveryUnit::ReadSource::kMajorityCommitted || + readSource == RecoveryUnit::ReadSource::kNoOverlap); invariant(readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern); // Yield locks in order to do the blocking call below. _autoColl = boost::none; - // If there are pending catalog changes, we should conflict with any in-progress batches (by - // taking the PBWM lock) and choose not to read from the last applied timestamp by unsetting - // _shouldNotConflictWithSecondaryBatchApplicationBlock. Index builds on secondaries can - // complete at timestamps later than the lastAppliedTimestamp during initial sync. After - // initial sync finishes, if we waited instead of retrying, readers would block indefinitely - // waiting for the lastAppliedTimestamp to move forward. Instead we force the reader take - // the PBWM lock and retry. - if (lastAppliedTimestamp) { + // If there are pending catalog changes when using a no-overlap read source, we choose to + // take the PBWM lock to conflict with any in-progress batches. This prevents us from idly + // spinning in this loop trying to get a new read timestamp ahead of the minimum visible + // snapshot. This helps us guarantee liveness (i.e. we can eventually get a suitable read + // timestamp) but should not be necessary for correctness. After initial sync finishes, if + // we waited instead of retrying, readers would block indefinitely waiting for the + // noOverlap time to move forward. Instead we force the reader take the PBWM lock and retry. + if (readSource == RecoveryUnit::ReadSource::kNoOverlap) { + invariant(readTimestamp); LOGV2(20576, - "tried reading at last-applied time: {lastAppliedTimestamp} on ns: {nss_ns}, but " - "future catalog changes are pending at time {minSnapshot}. Trying again without " - "reading at last-applied time.", - "lastAppliedTimestamp"_attr = *lastAppliedTimestamp, - "nss_ns"_attr = nss.ns(), - "minSnapshot"_attr = *minSnapshot); + "Tried reading at no-overlap time, but future catalog changes are pending. " + "Trying again without reading at no-overlap time.", + "noOverlapTimestamp"_attr = *readTimestamp, + "collection"_attr = nss.ns(), + "collectionMinSnapshot"_attr = *minSnapshot); // Destructing the block sets _shouldConflictWithSecondaryBatchApplication back to the // previous value. If the previous value is false (because there is another // shouldNotConflictWithSecondaryBatchApplicationBlock outside of this function), this @@ -220,28 +225,12 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, ErrorCodes::SnapshotUnavailable, str::stream() << "Unable to read from a snapshot due to pending collection catalog " "changes; please retry the operation. Snapshot timestamp is " - << (mySnapshot ? mySnapshot->toString() : "(none)") - << ". Collection minimum is " << minSnapshot->toString(), + << readTimestamp->toString() << ". Collection minimum is " + << minSnapshot->toString(), opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()); - // Cannot change ReadSource while a RecoveryUnit is active, which may result from - // calling getPointInTimeReadTimestamp(). - opCtx->recoveryUnit()->abandonSnapshot(); - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); - } - - // If there are pending catalog changes when using a no-overlap read source, we choose to - // take the PBWM lock to conflict with any in-progress batches. This prevents us from idly - // spinning in this loop trying to get a new read timestamp ahead of the minimum visible - // snapshot. This helps us guarantee liveness (i.e. we can eventually get a suitable read - // timestamp) but should not be necessary for correctness. - if (readSource == RecoveryUnit::ReadSource::kNoOverlap) { - invariant(!lastAppliedTimestamp); // no-overlap read source selects its own timestamp. - _shouldNotConflictWithSecondaryBatchApplicationBlock = boost::none; - invariant(opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()); - - // Abandon our snapshot but don't change our read source, so that we can select a new - // read timestamp on the next loop iteration. + // Abandon our snapshot. We may select a new read timestamp or ReadSource in the next + // loop iteration. opCtx->recoveryUnit()->abandonSnapshot(); } @@ -259,84 +248,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, } } -bool AutoGetCollectionForRead::_shouldReadAtLastAppliedTimestamp( - OperationContext* opCtx, - const NamespaceString& nss, - repl::ReadConcernLevel readConcernLevel) const { - - // If this block is unset, then the operation did not opt-out of the PBWM lock, implying that it - // cannot read at lastApplied. It's important to note that it is possible for this to be set, - // but still be holding the PBWM lock, explained below. - if (!_shouldNotConflictWithSecondaryBatchApplicationBlock) { - return false; - } - - // If we are already holding the PBWM lock, do not read at last-applied. This is because once an - // operation reads without a timestamp (effectively seeing all writes), it is no longer safe to - // start reading at a timestamp, as writes or catalog operations may appear to vanish. - // This may occur when multiple collection locks are held concurrently, which is often the case - // when DBDirectClient is used. - if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) { - LOGV2_DEBUG(20577, 1, "not reading at last-applied because the PBWM lock is held"); - return false; - } - - // Majority and snapshot readConcern levels should not read from lastApplied; these read - // concerns already have a designated timestamp to read from. - if (readConcernLevel != repl::ReadConcernLevel::kLocalReadConcern && - readConcernLevel != repl::ReadConcernLevel::kAvailableReadConcern) { - return false; - } - - // If we are in a replication state (like secondary or primary catch-up) where we are not - // accepting writes, we should read at lastApplied. If this node can accept writes, then no - // conflicting replication batches are being applied and we can read from the default snapshot. - if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) { - return false; - } - - // Non-replicated collections do not need to read at lastApplied, as those collections are not - // written by the replication system. However, the oplog is special, as it *is* written by the - // replication system. - if (!nss.isReplicated() && !nss.isOplog()) { - return false; - } - - return true; -} - -bool AutoGetCollectionForRead::_conflictingCatalogChanges( - OperationContext* opCtx, - boost::optional minSnapshot, - boost::optional lastAppliedTimestamp) const { - // This is the timestamp of the most recent catalog changes to this collection. If this is - // greater than any point in time read timestamps, we should either wait or return an error. - if (!minSnapshot) { - return false; - } - - // If we are reading from the lastAppliedTimestamp and it is up-to-date with any catalog - // changes, we can return. - if (lastAppliedTimestamp && - (lastAppliedTimestamp->isNull() || *lastAppliedTimestamp >= *minSnapshot)) { - return false; - } - - // This can be set when readConcern is "snapshot" or "majority". - auto mySnapshot = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(); - - // If we do not have a point in time to conflict with minSnapshot, return. - if (!mySnapshot && !lastAppliedTimestamp) { - return false; - } - - // Return if there are no conflicting catalog changes with mySnapshot. - if (mySnapshot && *mySnapshot >= *minSnapshot) { - return false; - } - - return true; -} AutoGetCollectionForReadCommand::AutoGetCollectionForReadCommand( OperationContext* opCtx, diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h index edf32cd0010..51208643c6a 100644 --- a/src/mongo/db/db_raii.h +++ b/src/mongo/db/db_raii.h @@ -133,19 +133,6 @@ private: // This field is optional, because the code to wait for majority committed snapshot needs to // release locks in order to block waiting boost::optional _autoColl; - - // Returns true if we should read at the last applied timestamp instead of at "no" timestamp - // (i.e. reading with the "latest" snapshot reflecting all writes). Reading at the last applied - // timestamp avoids reading in-flux data actively being written by the replication system. - bool _shouldReadAtLastAppliedTimestamp(OperationContext* opCtx, - const NamespaceString& nss, - repl::ReadConcernLevel readConcernLevel) const; - - // Returns true if the minSnapshot causes conflicting catalog changes for either the provided - // lastAppliedTimestamp or the point-in-time snapshot of the RecoveryUnit on 'opCtx'. - bool _conflictingCatalogChanges(OperationContext* opCtx, - boost::optional minSnapshot, - boost::optional lastAppliedTimestamp) const; }; /** diff --git a/src/mongo/db/db_raii_test.cpp b/src/mongo/db/db_raii_test.cpp index 4b9a3bac3c4..5258a51a33b 100644 --- a/src/mongo/db/db_raii_test.cpp +++ b/src/mongo/db/db_raii_test.cpp @@ -37,6 +37,8 @@ #include "mongo/db/client.h" #include "mongo/db/concurrency/lock_state.h" #include "mongo/db/db_raii.h" +#include "mongo/db/query/internal_plans.h" +#include "mongo/db/storage/snapshot_manager.h" #include "mongo/logv2/log.h" #include "mongo/unittest/unittest.h" #include "mongo/util/time_support.h" @@ -199,8 +201,13 @@ TEST_F(DBRAIITestFixture, ASSERT_OK( storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions)); ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock + // class and does not update the correct state in the SnapshotManager. repl::OpTime opTime(Timestamp(200, 1), 1); - replCoord->setMyLastAppliedOpTimeAndWallTime({opTime, Date_t() + Seconds(1)}); + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + snapshotManager->setLastApplied(opTime.getTimestamp()); Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX); ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX)); @@ -220,18 +227,119 @@ TEST_F(DBRAIITestFixture, // for the collection. If we now manually set our last applied time to something very early, we // will be guaranteed to hit the logic that triggers when the minimum snapshot time is greater // than the read-at time, since we default to reading at last-applied when in SECONDARY state. + + // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock + // class and does not update the correct state in the SnapshotManager. repl::OpTime opTime(Timestamp(2, 1), 1); - replCoord->setMyLastAppliedOpTimeAndWallTime({opTime, Date_t() + Seconds(1)}); + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + snapshotManager->setLastApplied(opTime.getTimestamp()); + Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX); ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX)); + AutoGetCollectionForRead coll(client2.second.get(), NamespaceString("local.system.js")); + // Reading from an unreplicated collection does not change the ReadSource to kNoOverlap. + ASSERT_EQ(client2.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kUnset); - // The current code uasserts in this situation, so we confirm that happens here. + // Reading from a replicated collection will try to switch to kNoOverlap. Because we are + // already reading without a timestamp and we can't reacquire the PBWM lock to continue reading + // without a timestamp, we uassert in this situation. ASSERT_THROWS_CODE(AutoGetCollectionForRead(client2.second.get(), nss), DBException, ErrorCodes::SnapshotUnavailable); } +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedConflict) { + // This test simulates a situation where AutoGetCollectionForRead cant read at the no-overlap + // point (minimum of all_durable and lastApplied) because it is set to a point earlier than the + // catalog change. We expect to read without a timestamp and hold the PBWM lock. + auto replCoord = repl::ReplicationCoordinator::get(client1.second.get()); + CollectionOptions defaultCollectionOptions; + ASSERT_OK( + storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions)); + ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Note that when the collection was created, above, the system chooses a minimum snapshot time + // for the collection. If we now manually set our last applied time to something very early, we + // will be guaranteed to hit the logic that triggers when the minimum snapshot time is greater + // than the read-at time, since we default to reading at last-applied when in SECONDARY state. + + // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock + // class and does not update the correct state in the SnapshotManager. + repl::OpTime opTime(Timestamp(2, 1), 1); + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + snapshotManager->setLastApplied(opTime.getTimestamp()); + AutoGetCollectionForRead coll(client1.second.get(), nss); + + // We can't read from kNoOverlap in this scenario because there is a catalog conflict. Resort + // to taking the PBWM lock and reading without a timestamp. + ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kUnset); + ASSERT_TRUE(client1.second.get()->lockState()->isLockHeldForMode( + resourceIdParallelBatchWriterMode, MODE_IS)); +} + +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) { + // This test simulates a situation where AutoGetCollectionForRead reads at the no-overlap + // point (minimum of all_durable and lastApplied) even though lastApplied is not available. + auto replCoord = repl::ReplicationCoordinator::get(client1.second.get()); + CollectionOptions defaultCollectionOptions; + ASSERT_OK( + storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions)); + ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Note that when the collection was created, above, the system chooses a minimum snapshot time + // for the collection. Since last-applied isn't available, we default to all_durable, which is + // available, and is greater than the collection minimum snapshot. + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + ASSERT_FALSE(snapshotManager->getLastApplied()); + AutoGetCollectionForRead coll(client1.second.get(), nss); + + // Even though lastApplied isn't available, the ReadSource is set to kNoOverlap, which reads + // at the all_durable time. + ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(client1.second.get()->recoveryUnit()->getPointInTimeReadTimestamp()); + ASSERT_FALSE(client1.second.get()->lockState()->isLockHeldForMode( + resourceIdParallelBatchWriterMode, MODE_IS)); +} + +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesNoOverlapOnSecondary) { + auto opCtx = client1.second.get(); + ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {})); + ASSERT_OK( + repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY)); + AutoGetCollectionForRead autoColl(opCtx, nss); + auto exec = InternalPlanner::collectionScan(opCtx, + nss.ns(), + autoColl.getCollection(), + PlanExecutor::YIELD_MANUAL, + InternalPlanner::FORWARD); + + // The collection scan should use the default ReadSource on a secondary. + ASSERT_EQ(RecoveryUnit::ReadSource::kNoOverlap, + opCtx->recoveryUnit()->getTimestampReadSource()); + + // While yielding the collection scan, simulate stepping-up to a primary. + exec->saveState(); + Locker::LockSnapshot lockSnapshot; + ASSERT_TRUE(opCtx->lockState()->saveLockStateAndUnlock(&lockSnapshot)); + ASSERT_OK( + repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_PRIMARY)); + + // After restoring, the collection scan should now be reading with kNoOverlap, the default on + // secondaries. + opCtx->lockState()->restoreLockState(opCtx, lockSnapshot); + exec->restoreState(); + ASSERT_EQ(RecoveryUnit::ReadSource::kNoOverlap, + opCtx->recoveryUnit()->getTimestampReadSource()); + BSONObj obj; + ASSERT_EQUALS(PlanExecutor::IS_EOF, exec->getNext(&obj, nullptr)); +} } // namespace } // namespace mongo diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 839ca031062..16e2a483ca7 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2037,6 +2037,9 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx, void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx, std::shared_ptr replState, const IndexBuildOptions& indexBuildOptions) { + // Read without a timestamp. When we commit, we block writes which guarantees all writes are + // visible. + opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); _scanCollectionAndInsertKeysIntoSorter(opCtx, replState); _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState); _signalPrimaryForCommitReadiness(opCtx, replState); diff --git a/src/mongo/db/repl/oplog_batcher.cpp b/src/mongo/db/repl/oplog_batcher.cpp index 27653ab21dc..aba27772547 100644 --- a/src/mongo/db/repl/oplog_batcher.cpp +++ b/src/mongo/db/repl/oplog_batcher.cpp @@ -363,8 +363,8 @@ std::size_t getBatchLimitOplogBytes(OperationContext* opCtx, StorageInterface* s // We can't change the timestamp source within a write unit of work. invariant(!opCtx->lockState()->inAWriteUnitOfWork()); // We're only reading oplog metadata, so the timestamp is not important. If we read with the - // default (which is kLastApplied on secondaries), we may end up with a reader that is at - // kLastApplied. If we then roll back, then when we reconstruct prepared transactions during + // default (which is lastApplied on secondaries), we may end up with a reader that is at + // lastApplied. If we then roll back, then when we reconstruct prepared transactions during // rollback recovery we will be preparing transactions before the read timestamp, which triggers // an assertion in WiredTiger. ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h index e5ca3cfe12c..29cc6dd9c5b 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state.h +++ b/src/mongo/db/repl/replication_coordinator_external_state.h @@ -260,11 +260,11 @@ public: virtual void updateCommittedSnapshot(const OpTime& newCommitPoint) = 0; /** - * Updates the local snapshot to a consistent point for secondary reads. + * Updates the lastApplied snapshot to a consistent point for secondary reads. * - * It is illegal to call with a optime that does not name an existing snapshot. + * It is illegal to call with a non-existent optime. */ - virtual void updateLocalSnapshot(const OpTime& optime) = 0; + virtual void updateLastAppliedSnapshot(const OpTime& optime) = 0; /** * Returns whether or not the SnapshotThread is active. diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 9fa64da2a2e..ce80ab0acac 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -356,7 +356,7 @@ void ReplicationCoordinatorExternalStateImpl::clearAppliedThroughIfCleanShutdown } // Ensure that all writes are visible before reading. If we failed mid-batch, it would be - // possible to read from a kLastApplied ReadSource where not all writes to the minValid document + // possible to read from a kNoOverlap ReadSource where not all writes to the minValid document // are visible, generating a writeConflict that would not resolve. opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); @@ -972,10 +972,10 @@ void ReplicationCoordinatorExternalStateImpl::updateCommittedSnapshot( notifyOplogMetadataWaiters(newCommitPoint); } -void ReplicationCoordinatorExternalStateImpl::updateLocalSnapshot(const OpTime& optime) { +void ReplicationCoordinatorExternalStateImpl::updateLastAppliedSnapshot(const OpTime& optime) { auto manager = _service->getStorageEngine()->getSnapshotManager(); if (manager) { - manager->setLocalSnapshot(optime.getTimestamp()); + manager->setLastApplied(optime.getTimestamp()); } } diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h index d1d12e285b3..f38aee76a39 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h @@ -104,7 +104,7 @@ public: virtual bool tooStale(); void dropAllSnapshots() final; void updateCommittedSnapshot(const OpTime& newCommitPoint) final; - void updateLocalSnapshot(const OpTime& optime) final; + void updateLastAppliedSnapshot(const OpTime& optime) final; virtual bool snapshotsEnabled() const; virtual void notifyOplogMetadataWaiters(const OpTime& committedOpTime); boost::optional getEarliestDropPendingOpTime() const final; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp index 84986f783c8..4c9ddbaf7f1 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp @@ -250,7 +250,7 @@ void ReplicationCoordinatorExternalStateMock::dropAllSnapshots() {} void ReplicationCoordinatorExternalStateMock::updateCommittedSnapshot( const OpTime& newCommitPoint) {} -void ReplicationCoordinatorExternalStateMock::updateLocalSnapshot(const OpTime& optime) {} +void ReplicationCoordinatorExternalStateMock::updateLastAppliedSnapshot(const OpTime& optime) {} bool ReplicationCoordinatorExternalStateMock::snapshotsEnabled() const { return _areSnapshotsEnabled; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h index 0ef9ad2e893..fd867df8ac7 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h @@ -93,7 +93,7 @@ public: virtual bool tooStale(); virtual void dropAllSnapshots(); virtual void updateCommittedSnapshot(const OpTime& newCommitPoint); - virtual void updateLocalSnapshot(const OpTime& optime); + virtual void updateLastAppliedSnapshot(const OpTime& optime); virtual bool snapshotsEnabled() const; virtual void notifyOplogMetadataWaiters(const OpTime& committedOpTime); boost::optional getEarliestDropPendingOpTime() const final; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index ebde2673416..bcae984ce91 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1389,6 +1389,14 @@ void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndWallTime( // No need to wake up replication waiters because there should not be any replication waiters // waiting on our own lastApplied. + // Update the storage engine's lastApplied snapshot before updating the stable timestamp on the + // storage engine. New transactions reading from the lastApplied snapshot should start before + // the oldest timestamp is advanced to avoid races. Additionally, update this snapshot before + // signaling optime waiters. This avoids a race that would allow optime waiters to open + // transactions on stale lastApplied values because they do not hold or reacquire the + // replication coordinator mutex when signaled. + _externalState->updateLastAppliedSnapshot(opTime); + // Signal anyone waiting on optime changes. _opTimeWaiterList.setValueIf_inlock( [opTime](const OpTime& waitOpTime, const SharedWaiterHandle& waiter) { @@ -1396,11 +1404,6 @@ void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndWallTime( }, opTime); - // Update the local snapshot before updating the stable timestamp on the storage engine. New - // transactions reading from the local snapshot should start before the oldest timestamp is - // advanced to avoid races. - _externalState->updateLocalSnapshot(opTime); - // Notify the oplog waiters after updating the local snapshot. signalOplogWaiters(); diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index 82c406dc480..fc680d1a67a 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -130,6 +130,7 @@ public: _oplogApplicationEndPoint(oplogApplicationEndPoint) {} void startup(OperationContext* opCtx) final { + opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); _client = std::make_unique(opCtx); BSONObj predicate = _oplogApplicationEndPoint ? BSON("$gte" << _oplogApplicationStartPoint << "$lte" << *_oplogApplicationEndPoint) diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index b9f75c07d1a..8608926919c 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -512,8 +512,8 @@ void reconstructPreparedTransactions(OperationContext* opCtx, repl::OplogApplica } // Read the transactions table and the oplog collection without a timestamp. // The below DBDirectClient read uses AutoGetCollectionForRead which could implicitly change the - // read source to kLastApplied. So we need to explicitly set the read source to kNoTimestamp to - // force reads in this scope to be untimestamped. + // read source. So we need to explicitly set the read source to kNoTimestamp to force reads in + // this scope to be untimestamped. ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); DBDirectClient client(opCtx); diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript index ccd5d048e1d..e32b8ba329a 100644 --- a/src/mongo/db/storage/SConscript +++ b/src/mongo/db/storage/SConscript @@ -29,6 +29,23 @@ env.Library( ], ) +env.Library( + target='snapshot_helper', + source=[ + 'snapshot_helper.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/namespace_string', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/concurrency/lock_manager', + '$BUILD_DIR/mongo/db/repl/read_concern_args', + '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', + 'recovery_unit_base', + ], + ) + env.Library( target='duplicate_key_error_info', source=[ diff --git a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp index aeef71561ae..baecec3c8af 100644 --- a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp +++ b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp @@ -150,9 +150,9 @@ public: return itCountOn(op); } - int itCountLocal() { + int itCountLastApplied() { auto op = makeOperation(); - op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied); + op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); return itCountOn(op); } @@ -176,14 +176,14 @@ public: return std::string(record->data.data()); } - boost::optional readRecordLocal(RecordId id) { + boost::optional readRecordLastApplied(RecordId id) { auto op = makeOperation(); - op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied); + op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); return readRecordOn(op, id); } - std::string readStringLocal(RecordId id) { - auto record = readRecordLocal(id); + std::string readStringLastApplied(RecordId id) { + auto record = readRecordLastApplied(id); ASSERT(record); return std::string(record->data.data()); } @@ -360,7 +360,7 @@ TEST_F(SnapshotManagerTests, UpdateAndDelete) { ASSERT(!readRecordCommitted(id)); } -TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) { +TEST_F(SnapshotManagerTests, InsertAndReadOnLastAppliedSnapshot) { if (!snapshotManager) return; // This test is only for engines that DO support SnapshotManagers. @@ -369,7 +369,7 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) { auto id = insertRecordAndCommit(); auto afterInsert = fetchAndIncrementTimestamp(); - // Not reading on the last local timestamp returns the most recent data. + // Not reading on the last applied timestamp returns the most recent data. auto op = makeOperation(); auto ru = op->recoveryUnit(); ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); @@ -379,18 +379,18 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) { deleteRecordAndCommit(id); auto afterDelete = fetchAndIncrementTimestamp(); - // Reading at the local snapshot timestamps returns data in order. - snapshotManager->setLocalSnapshot(beforeInsert); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + // Reading at the last applied snapshot timestamps returns data in order. + snapshotManager->setLastApplied(beforeInsert); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); - snapshotManager->setLocalSnapshot(afterInsert); - ASSERT_EQ(itCountLocal(), 1); - ASSERT(readRecordLocal(id)); + snapshotManager->setLastApplied(afterInsert); + ASSERT_EQ(itCountLastApplied(), 1); + ASSERT(readRecordLastApplied(id)); - snapshotManager->setLocalSnapshot(afterDelete); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + snapshotManager->setLastApplied(afterDelete); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); } TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) { @@ -416,20 +416,20 @@ TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) { deleteRecordAndCommit(id); auto afterDelete = fetchAndIncrementTimestamp(); - snapshotManager->setLocalSnapshot(beforeInsert); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + snapshotManager->setLastApplied(beforeInsert); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); - snapshotManager->setLocalSnapshot(afterInsert); - ASSERT_EQ(itCountLocal(), 1); - ASSERT_EQ(readStringLocal(id), "Aardvark"); + snapshotManager->setLastApplied(afterInsert); + ASSERT_EQ(itCountLastApplied(), 1); + ASSERT_EQ(readStringLastApplied(id), "Aardvark"); - snapshotManager->setLocalSnapshot(afterUpdate); - ASSERT_EQ(itCountLocal(), 1); - ASSERT_EQ(readStringLocal(id), "Blue spotted stingray"); + snapshotManager->setLastApplied(afterUpdate); + ASSERT_EQ(itCountLastApplied(), 1); + ASSERT_EQ(readStringLastApplied(id), "Blue spotted stingray"); - snapshotManager->setLocalSnapshot(afterDelete); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + snapshotManager->setLastApplied(afterDelete); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); } } // namespace mongo diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h index f8c70309ae2..eeddfafd624 100644 --- a/src/mongo/db/storage/recovery_unit.h +++ b/src/mongo/db/storage/recovery_unit.h @@ -253,7 +253,6 @@ public: * - when using ReadSource::kNoOverlap, the timestamp chosen by the storage engine. * - when using ReadSource::kAllDurableSnapshot, the timestamp chosen using the storage * engine's all_durable timestamp. - * - when using ReadSource::kLastApplied, the timestamp chosen using the storage engine's last * applied timestamp. Can return boost::none if no timestamp has been established. * - when using ReadSource::kMajorityCommitted, the majority committed timestamp chosen by the * storage engine after a transaction has been opened or after a call to @@ -398,11 +397,6 @@ public: * Read from the latest timestamp where no future transactions will commit at or before. */ kNoOverlap, - /** - * Read from the last applied timestamp. New transactions start at the most up-to-date - * timestamp. - */ - kLastApplied, /** * Read from the all_durable timestamp. New transactions will always read from the same * timestamp and never advance. @@ -414,6 +408,24 @@ public: kProvided }; + static std::string toString(ReadSource rs) { + switch (rs) { + case ReadSource::kUnset: + return "kUnset"; + case ReadSource::kNoTimestamp: + return "kNoTimestamp"; + case ReadSource::kMajorityCommitted: + return "kMajorityCommitted"; + case ReadSource::kNoOverlap: + return "kNoOverlap"; + case ReadSource::kAllDurableSnapshot: + return "kAllDurableSnapshot"; + case ReadSource::kProvided: + return "kProvided"; + } + MONGO_UNREACHABLE; + } + /** * Sets which timestamp to use for read transactions. If 'provided' is supplied, only kProvided * is an acceptable input. diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp new file mode 100644 index 00000000000..777868f7830 --- /dev/null +++ b/src/mongo/db/storage/snapshot_helper.cpp @@ -0,0 +1,151 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/snapshot_helper.h" + +#include "mongo/db/repl/read_concern_args.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/logv2/log.h" + +namespace mongo { +namespace SnapshotHelper { +bool canSwitchReadSource(OperationContext* opCtx) { + + // Most readConcerns have behavior controlled at higher levels. Local and available are the only + // ReadConcerns that should consider changing, since they read without a timestamp by default. + const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel(); + if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern || + readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) { + return true; + } + + return false; +} + +bool shouldReadAtNoOverlap(OperationContext* opCtx, + const NamespaceString& nss, + std::string* reason) { + + // If this is true, then the operation opted-in to the PBWM lock, implying that it cannot read + // at no-overlap. It's important to note that it is possible for this to be false, but still be + // holding the PBWM lock, explained below. + if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) { + *reason = "conflicts with batch application"; + return false; + } + + // If we are already holding the PBWM lock, do not read at no-overlap. Snapshots acquired by an + // operation after a yield/restore must see all writes in the pre-yield snapshot. Once a + // snapshot is reading without a timestamp, we choose to continue acquiring snapshots without a + // timestamp. This is done in lieu of determining a timestamp far enough in the future that's + // guaranteed to observe all previous writes. This may occur when multiple collection locks are + // held concurrently, which is often the case when DBDirectClient is used. + if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) { + *reason = "PBWM lock is held"; + LOGV2_DEBUG(20577, 1, "not reading at no-overlap because the PBWM lock is held"); + return false; + } + + // If we are in a replication state (like secondary or primary catch-up) where we are not + // accepting writes, we should read at no-overlap. If this node can accept writes, then no + // conflicting replication batches are being applied and we can read from the default snapshot. + if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) { + *reason = "primary"; + return false; + } + + // Non-replicated collections do not need to read at no-overlap, as those collections are not + // written by the replication system. However, the oplog is special, as it *is* written by the + // replication system. + if (!nss.isReplicated() && !nss.isOplog()) { + *reason = "unreplicated collection"; + return false; + } + + return true; +} +boost::optional getNewReadSource(OperationContext* opCtx, + const NamespaceString& nss) { + const bool canSwitch = canSwitchReadSource(opCtx); + if (!canSwitch) { + return boost::none; + } + + const auto existing = opCtx->recoveryUnit()->getTimestampReadSource(); + std::string reason; + const bool readAtNoOverlap = shouldReadAtNoOverlap(opCtx, nss, &reason); + if (existing == RecoveryUnit::ReadSource::kUnset) { + // Shifting from reading without a timestamp to reading with a timestamp can be dangerous + // because writes will appear to vanish. This case is intended for new reads on secondaries + // and query yield recovery after state transitions from primary to secondary. + if (readAtNoOverlap) { + LOGV2_DEBUG(4452901, 2, "Changing ReadSource to kNoOverlap", logAttrs(nss)); + return RecoveryUnit::ReadSource::kNoOverlap; + } + } else if (existing == RecoveryUnit::ReadSource::kNoOverlap) { + // For some reason, we can no longer read at kNoOverlap. + // An operation that yields a timestamped snapshot must restore a snapshot with at least as + // large of a timestamp, or with proper consideration of rollback scenarios, no timestamp. + // Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to + // reading without one. More writes will become visible. + if (!readAtNoOverlap) { + LOGV2_DEBUG( + 4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason); + // This shift to kUnset assumes that callers will not make future attempts to manipulate + // their ReadSources after performing reads at an un-timetamped snapshot. The only + // exception is callers of this function that may need to change from kUnset to + // kNoOverlap in the event of a catalog conflict or query yield. + return RecoveryUnit::ReadSource::kUnset; + } + } + return boost::none; +} + +bool collectionChangesConflictWithRead(boost::optional collectionMin, + boost::optional readTimestamp) { + // This is the timestamp of the most recent catalog changes to this collection. If this is + // greater than any point in time read timestamps, we should either wait or return an error. + if (!collectionMin) { + return false; + } + + // If we do not have a point in time to conflict with collectionMin, return. + if (!readTimestamp || readTimestamp->isNull()) { + return false; + } + + // Return if there are no conflicting catalog changes with the readTimestamp. + return *collectionMin > readTimestamp; +} +} // namespace SnapshotHelper +} // namespace mongo \ No newline at end of file diff --git a/src/mongo/db/storage/snapshot_helper.h b/src/mongo/db/storage/snapshot_helper.h new file mode 100644 index 00000000000..fa8fdd85f24 --- /dev/null +++ b/src/mongo/db/storage/snapshot_helper.h @@ -0,0 +1,43 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * . + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/operation_context.h" + +namespace mongo { +namespace SnapshotHelper { +// Returns a ReadSource if we should change our current ReadSource. Returns boost::none otherwise. +boost::optional getNewReadSource(OperationContext* opCtx, + const NamespaceString& nss); + +bool collectionChangesConflictWithRead(boost::optional collectionMin, + boost::optional readTimestamp); +} // namespace SnapshotHelper +} // namespace mongo diff --git a/src/mongo/db/storage/snapshot_manager.h b/src/mongo/db/storage/snapshot_manager.h index d41df1c5013..7839ef179ba 100644 --- a/src/mongo/db/storage/snapshot_manager.h +++ b/src/mongo/db/storage/snapshot_manager.h @@ -59,14 +59,14 @@ public: virtual void setCommittedSnapshot(const Timestamp& timestamp) = 0; /** - * Sets the snapshot for the last stable timestamp for reading on secondaries. + * Sets the lastApplied timestamp. */ - virtual void setLocalSnapshot(const Timestamp& timestamp) = 0; + virtual void setLastApplied(const Timestamp& timestamp) = 0; /** - * Returns the local snapshot timestamp. + * Returns the lastApplied timestamp. */ - virtual boost::optional getLocalSnapshot() = 0; + virtual boost::optional getLastApplied() = 0; /** * Drops all snapshots and clears the "committed" snapshot. diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 661b0e009c1..352699a9de8 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -668,6 +668,12 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName, setInitialDataTimestamp(_recoveryTimestamp); setOldestTimestamp(_recoveryTimestamp, false); setStableTimestamp(_recoveryTimestamp, false); + + _sessionCache->snapshotManager().setLastApplied(_recoveryTimestamp); + { + stdx::lock_guard lk(_highestDurableTimestampMutex); + _highestSeenDurableTimestamp = _recoveryTimestamp.asULL(); + } } } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp index 8cc45e0201e..f9d9751d464 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp @@ -453,7 +453,6 @@ boost::optional WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() // The following ReadSources can only establish a read timestamp when a transaction is // opened. case ReadSource::kNoOverlap: - case ReadSource::kLastApplied: case ReadSource::kAllDurableSnapshot: break; } @@ -462,14 +461,14 @@ boost::optional WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() getSession(); switch (_timestampReadSource) { - case ReadSource::kLastApplied: - // The lastApplied timestamp is not always available, so it is not possible to invariant - // that it exists as other ReadSources do. + case ReadSource::kNoOverlap: + // The lastApplied and allDurable timestamps are not always available if the system has + // not accepted writes, so it is not possible to invariant that it exists as other + // ReadSources do. if (!_readAtTimestamp.isNull()) { return _readAtTimestamp; } return boost::none; - case ReadSource::kNoOverlap: case ReadSource::kAllDurableSnapshot: invariant(!_readAtTimestamp.isNull()); return _readAtTimestamp; @@ -515,17 +514,6 @@ void WiredTigerRecoveryUnit::_txnOpen() { session, _prepareConflictBehavior, _roundUpPreparedTimestamps); break; } - case ReadSource::kLastApplied: { - if (_sessionCache->snapshotManager().getLocalSnapshot()) { - _readAtTimestamp = _sessionCache->snapshotManager().beginTransactionOnLocalSnapshot( - session, _prepareConflictBehavior, _roundUpPreparedTimestamps); - } else { - WiredTigerBeginTxnBlock( - session, _prepareConflictBehavior, _roundUpPreparedTimestamps) - .done(); - } - break; - } case ReadSource::kNoOverlap: { _readAtTimestamp = _beginTransactionAtNoOverlapTimestamp(session); break; @@ -555,8 +543,9 @@ void WiredTigerRecoveryUnit::_txnOpen() { LOGV2_DEBUG(22414, 3, - "WT begin_transaction for snapshot id {getSnapshotId_toNumber}", - "getSnapshotId_toNumber"_attr = getSnapshotId().toNumber()); + "WT begin_transaction", + "snapshotId"_attr = getSnapshotId().toNumber(), + "readSource"_attr = toString(_timestampReadSource)); } Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllDurableTimestamp(WT_SESSION* session) { @@ -578,8 +567,8 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllDurableTimestamp(WT_SESS Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSION* session) { - auto lastApplied = _sessionCache->snapshotManager().getLocalSnapshot(); - Timestamp allDurable = Timestamp(_oplogManager->fetchAllDurableValue(session->connection)); + auto lastApplied = _sessionCache->snapshotManager().getLastApplied(); + Timestamp allDurable = Timestamp(_sessionCache->getKVEngine()->getAllDurableTimestamp()); // When using timestamps for reads and writes, it's important that readers and writers don't // overlap with the timestamps they use. In other words, at any point in the system there should @@ -607,17 +596,34 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSI // should read afterward. Timestamp readTimestamp = (lastApplied) ? std::min(*lastApplied, allDurable) : allDurable; + if (readTimestamp.isNull()) { + // When there is not an all_durable or lastApplied timestamp available, read without a + // timestamp. Do not round up the read timestamp to the oldest timestamp. + + // There is a race that allows new transactions to start between the time we check for a + // read timestamp and start our transaction, which can temporarily violate the contract of + // kNoOverlap. That is, writes will be visible that occur after the all_durable time. This + // is only possible for readers that start immediately after an initial sync that did not + // replicate any oplog entries. Future transactions will start reading at a timestamp once + // timestamped writes have been made. + WiredTigerBeginTxnBlock txnOpen( + session, _prepareConflictBehavior, _roundUpPreparedTimestamps); + LOGV2_DEBUG(4452900, 1, "no read timestamp available for kNoOverlap"); + txnOpen.done(); + return readTimestamp; + } + WiredTigerBeginTxnBlock txnOpen(session, _prepareConflictBehavior, _roundUpPreparedTimestamps, RoundUpReadTimestamp::kRound); auto status = txnOpen.setReadSnapshot(readTimestamp); fassert(51066, status); + txnOpen.done(); - // We might have rounded to oldest between calling getAllDurable and setReadSnapshot. We need - // to get the actual read timestamp we used. + // We might have rounded to oldest between calling getAllDurable and setReadSnapshot. We + // need to get the actual read timestamp we used. readTimestamp = _getTransactionReadTimestamp(session); - txnOpen.done(); return readTimestamp; } @@ -769,15 +775,14 @@ void WiredTigerRecoveryUnit::setTimestampReadSource(ReadSource readSource, boost::optional provided) { LOGV2_DEBUG(22416, 3, - "setting timestamp read source: {static_cast_int_readSource}, provided timestamp: " - "{provided_provided_none}", - "static_cast_int_readSource"_attr = static_cast(readSource), - "provided_provided_none"_attr = ((provided) ? provided->toString() : "none")); + "setting timestamp read source", + "readSource"_attr = toString(readSource), + "provided"_attr = ((provided) ? provided->toString() : "none")); invariant(!_isActive() || _timestampReadSource == readSource, str::stream() << "Current state: " << toString(_getState()) << ". Invalid internal state while setting timestamp read source: " - << static_cast(readSource) << ", provided timestamp: " + << toString(readSource) << ", provided timestamp: " << (provided ? provided->toString() : "none")); invariant(!provided == (readSource != ReadSource::kProvided)); invariant(!(provided && provided->isNull())); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp index 314a524063e..1f440d319b7 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp @@ -163,11 +163,14 @@ public: clientAndCtx2 = makeClientAndOpCtx(harnessHelper.get(), "reader"); ru1 = checked_cast(clientAndCtx1.second->recoveryUnit()); ru2 = checked_cast(clientAndCtx2.second->recoveryUnit()); + snapshotManager = dynamic_cast( + harnessHelper->getEngine()->getSnapshotManager()); } std::unique_ptr harnessHelper; ClientAndCtx clientAndCtx1, clientAndCtx2; WiredTigerRecoveryUnit *ru1, *ru2; + WiredTigerSnapshotManager* snapshotManager; private: const char* wt_uri = "table:prepare_transaction"; @@ -180,6 +183,86 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, SetReadSource) { ASSERT_EQ(Timestamp(1, 1), ru1->getPointInTimeReadTimestamp()); } +TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) { + OperationContext* opCtx1 = clientAndCtx1.second.get(); + std::unique_ptr rs(harnessHelper->createRecordStore(opCtx1, "a.b")); + + const std::string str = str::stream() << "test"; + const Timestamp ts1{1, 1}; + const Timestamp ts2{1, 2}; + const Timestamp ts3{1, 2}; + + RecordId rid1; + { + WriteUnitOfWork wuow(opCtx1); + StatusWith res = rs->insertRecord(opCtx1, str.c_str(), str.size() + 1, ts1); + ASSERT_OK(res); + wuow.commit(); + rid1 = res.getValue(); + snapshotManager->setLastApplied(ts1); + } + + // Read without a timestamp. The write should be visible. + ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(), RecoveryUnit::ReadSource::kUnset); + RecordData unused; + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + + // Read with kNoOverlap. The write should be visible. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + + RecordId rid2, rid3; + { + // Start, but do not commit a transaction with opCtx2. This sets a timestamp at ts2, which + // creates a hole. kNoOverlap, which is a function of all_durable, will only be able to read + // at the time immediately before. + OperationContext* opCtx2 = clientAndCtx2.second.get(); + WriteUnitOfWork wuow(opCtx2); + StatusWith res = + rs->insertRecord(opCtx2, str.c_str(), str.size() + 1, Timestamp()); + ASSERT_OK(opCtx2->recoveryUnit()->setTimestamp(ts2)); + ASSERT_OK(res); + + // While holding open a transaction with opCtx2, perform an insert at ts3 with opCtx1. This + // creates a "hole". + { + WriteUnitOfWork wuow(opCtx1); + StatusWith res = rs->insertRecord(opCtx1, str.c_str(), str.size() + 1, ts3); + ASSERT_OK(res); + wuow.commit(); + rid3 = res.getValue(); + snapshotManager->setLastApplied(ts3); + } + + // Read without a timestamp, and we should see the first and third records. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused)); + ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused)); + + // Now read at kNoOverlap. Since the transaction at ts2 has not committed, all_durable is + // held back to ts1. LastApplied has advanced to ts3, but because kNoOverlap is the minimum, + // we should only see one record. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused)); + ASSERT_FALSE(rs->findRecord(opCtx1, rid3, &unused)); + + wuow.commit(); + rid2 = res.getValue(); + } + + // Now that the hole has been closed, kNoOverlap should see all 3 records. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + ASSERT_TRUE(rs->findRecord(opCtx1, rid2, &unused)); + ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused)); +} + TEST_F(WiredTigerRecoveryUnitTestFixture, CreateAndCheckForCachePressure) { int time = 1; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp index 74988508a4c..efc2d8adfe5 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp @@ -48,17 +48,17 @@ void WiredTigerSnapshotManager::setCommittedSnapshot(const Timestamp& timestamp) _committedSnapshot = timestamp; } -void WiredTigerSnapshotManager::setLocalSnapshot(const Timestamp& timestamp) { - stdx::lock_guard lock(_localSnapshotMutex); +void WiredTigerSnapshotManager::setLastApplied(const Timestamp& timestamp) { + stdx::lock_guard lock(_lastAppliedMutex); if (timestamp.isNull()) - _localSnapshot = boost::none; + _lastApplied = boost::none; else - _localSnapshot = timestamp; + _lastApplied = timestamp; } -boost::optional WiredTigerSnapshotManager::getLocalSnapshot() { - stdx::lock_guard lock(_localSnapshotMutex); - return _localSnapshot; +boost::optional WiredTigerSnapshotManager::getLastApplied() { + stdx::lock_guard lock(_lastAppliedMutex); + return _lastApplied; } void WiredTigerSnapshotManager::dropAllSnapshots() { @@ -93,23 +93,4 @@ Timestamp WiredTigerSnapshotManager::beginTransactionOnCommittedSnapshot( return *_committedSnapshot; } -Timestamp WiredTigerSnapshotManager::beginTransactionOnLocalSnapshot( - WT_SESSION* session, - PrepareConflictBehavior prepareConflictBehavior, - RoundUpPreparedTimestamps roundUpPreparedTimestamps) const { - WiredTigerBeginTxnBlock txnOpen(session, prepareConflictBehavior, roundUpPreparedTimestamps); - - stdx::lock_guard lock(_localSnapshotMutex); - invariant(_localSnapshot); - LOGV2_DEBUG(22427, - 3, - "begin_transaction on local snapshot {localSnapshot_get}", - "localSnapshot_get"_attr = _localSnapshot.get().toString()); - auto status = txnOpen.setReadSnapshot(_localSnapshot.get()); - fassert(50775, status); - - txnOpen.done(); - return *_localSnapshot; -} - } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h index 1726a7d4c2b..b285c694e70 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h @@ -51,8 +51,8 @@ public: WiredTigerSnapshotManager() = default; void setCommittedSnapshot(const Timestamp& timestamp) final; - void setLocalSnapshot(const Timestamp& timestamp) final; - boost::optional getLocalSnapshot() final; + void setLastApplied(const Timestamp& timestamp) final; + boost::optional getLastApplied() final; void dropAllSnapshots() final; // @@ -69,16 +69,6 @@ public: PrepareConflictBehavior prepareConflictBehavior, RoundUpPreparedTimestamps roundUpPreparedTimestamps) const; - /** - * Starts a transaction on the last stable local timestamp, set by setLocalSnapshot. - * - * Throws if no local snapshot has been set. - */ - Timestamp beginTransactionOnLocalSnapshot( - WT_SESSION* session, - PrepareConflictBehavior prepareConflictBehavior, - RoundUpPreparedTimestamps roundUpPreparedTimestamps) const; - /** * Returns lowest SnapshotName that could possibly be used by a future call to * beginTransactionOnCommittedSnapshot, or boost::none if there is currently no committed @@ -95,9 +85,9 @@ private: MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_committedSnapshotMutex"); boost::optional _committedSnapshot; - // Snapshot to use for reads at a local stable timestamp. - mutable Mutex _localSnapshotMutex = // Guards _localSnapshot. - MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_localSnapshotMutex"); - boost::optional _localSnapshot; + // Timestamp to use for reads at a the lastApplied timestamp. + mutable Mutex _lastAppliedMutex = // Guards _lastApplied. + MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_lastAppliedMutex"); + boost::optional _lastApplied; }; } // namespace mongo diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index d148f48d416..fca1968b3e4 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -1525,7 +1525,7 @@ public: // This test does not run a real ReplicationCoordinator, so must advance the snapshot // manager manually. auto storageEngine = cc().getServiceContext()->getStorageEngine(); - storageEngine->getSnapshotManager()->setLocalSnapshot(presentTs); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); const auto beforeTxnTime = _clock->reserveTicks(1); auto beforeTxnTs = beforeTxnTime.asTimestamp(); @@ -3106,7 +3106,7 @@ public: // This test does not run a real ReplicationCoordinator, so must advance the snapshot // manager manually. auto storageEngine = cc().getServiceContext()->getStorageEngine(); - storageEngine->getSnapshotManager()->setLocalSnapshot(presentTs); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); const auto beforeTxnTime = _clock->reserveTicks(1); beforeTxnTs = beforeTxnTime.asTimestamp(); commitEntryTs = beforeTxnTime.addTicks(1).asTimestamp(); -- cgit v1.2.1