diff options
author | Louis Williams <louis.williams@mongodb.com> | 2020-05-12 13:39:31 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-12 18:03:48 +0000 |
commit | 25c694f365db0f07a445bd17b6cd5cbf32f5f2f9 (patch) | |
tree | c90451e347838f428b8cad851531b42c42cce6fa /src | |
parent | e2602ad053b2120982fbcac8e33e1ad64e6ec30a (diff) | |
download | mongo-25c694f365db0f07a445bd17b6cd5cbf32f5f2f9.tar.gz |
SERVER-46721 Secondary readers should read at the no-overlap time instead of lastApplied
The no-overlap time, ReadSource::kNoOverlap, is the minimum of replication's lastApplied timestamp
and WiredTiger's all_durable time. This time is independent of replication state and ensures
queries do not see oplog holes after state transitions from secondary to primary.
Diffstat (limited to 'src')
28 files changed, 604 insertions, 299 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript index 55595f5999b..9519d1207f1 100644 --- a/src/mongo/db/SConscript +++ b/src/mongo/db/SConscript @@ -602,6 +602,7 @@ env.Library( LIBDEPS_PRIVATE=[ '$BUILD_DIR/mongo/idl/server_parameter', 'catalog/database_holder', + 'storage/snapshot_helper', ], ) diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp index 50913896118..733ed190250 100644 --- a/src/mongo/db/catalog_raii_test.cpp +++ b/src/mongo/db/catalog_raii_test.cpp @@ -260,8 +260,8 @@ TEST_F(ReadSourceScopeTest, RestoreReadSource) { ReadSourceScope scope(opCtx()); ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kUnset); - opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kLastApplied); - ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kLastApplied); + opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kNoOverlap); + ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoOverlap); ASSERT_EQ(opCtx()->recoveryUnit()->getPointInTimeReadTimestamp(), boost::none); } ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kProvided); diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp index f183225cd8c..97e6f6b79ea 100644 --- a/src/mongo/db/commands/getmore_cmd.cpp +++ b/src/mongo/db/commands/getmore_cmd.cpp @@ -148,6 +148,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg switch (rcArgs.getMajorityReadMechanism()) { case repl::ReadConcernArgs::MajorityReadMechanism::kMajoritySnapshot: { // Make sure we read from the majority snapshot. + opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource( RecoveryUnit::ReadSource::kMajorityCommitted); uassertStatusOK(opCtx->recoveryUnit()->obtainMajorityCommittedSnapshot()); @@ -156,6 +157,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg case repl::ReadConcernArgs::MajorityReadMechanism::kSpeculative: { // Mark the operation as speculative and select the correct read source. repl::SpeculativeMajorityReadInfo::get(opCtx).setIsSpeculativeRead(); + opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); break; } @@ -167,6 +169,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg !opCtx->inMultiDocumentTransaction()) { auto atClusterTime = rcArgs.getArgsAtClusterTime(); invariant(atClusterTime && *atClusterTime != LogicalTime::kUninitialized); + opCtx->recoveryUnit()->abandonSnapshot(); opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, atClusterTime->asTimestamp()); } diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp index 34775dd7d01..53dda3ca490 100644 --- a/src/mongo/db/db_raii.cpp +++ b/src/mongo/db/db_raii.cpp @@ -39,6 +39,7 @@ #include "mongo/db/db_raii_gen.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/s/collection_sharding_state.h" +#include "mongo/db/storage/snapshot_helper.h" #include "mongo/logv2/log.h" namespace mongo { @@ -112,7 +113,7 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, _autoColl.emplace(opCtx, nsOrUUID, collectionLockMode, viewMode, deadline); // If the read source is explicitly set to kNoTimestamp, we read the most up to date data and do - // not consider reading at last applied (e.g. FTDC needs that). + // not consider reading at the no-overlap point (e.g. FTDC needs that). if (opCtx->recoveryUnit()->getTimestampReadSource() == RecoveryUnit::ReadSource::kNoTimestamp) return; @@ -123,10 +124,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, // need to check for pending catalog changes. while (auto coll = _autoColl->getCollection()) { - auto readSource = opCtx->recoveryUnit()->getTimestampReadSource(); - auto minSnapshot = coll->getMinimumVisibleSnapshot(); - auto mySnapshot = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(); - // TODO(SERVER-47824): Also ban transaction snapshot reads on capped collections. uassert(ErrorCodes::SnapshotUnavailable, "Reading from capped collections with readConcern snapshot is not supported " @@ -135,77 +132,85 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern || opCtx->inMultiDocumentTransaction()); - // If we are reading at a provided timestamp earlier than the latest catalog changes, - // then we must return an error. - if (readSource == RecoveryUnit::ReadSource::kProvided && minSnapshot && - (*mySnapshot < *minSnapshot)) { - uasserted(ErrorCodes::SnapshotUnavailable, - str::stream() - << "Unable to read from a snapshot due to pending collection catalog " - "changes; please retry the operation. Snapshot timestamp is " - << mySnapshot->toString() << ". Collection minimum is " - << minSnapshot->toString()); - } - // During batch application on secondaries, there is a potential to read inconsistent states // that would normally be protected by the PBWM lock. In order to serve secondary reads // during this period, we default to not acquiring the lock (by setting // _shouldNotConflictWithSecondaryBatchApplicationBlock). On primaries, we always read at a // consistent time, so not taking the PBWM lock is not a problem. On secondaries, we have to - // guarantee we read at a consistent state, so we must read at the last applied timestamp, - // which is set after each complete batch. + // guarantee we read at a consistent state, so we must read at the no-overlap timestamp, + // which is a function of the lastApplied timestamp, which is set after each complete batch. // - // If an attempt to read at the last applied timestamp is unsuccessful because there are - // pending catalog changes that occur after the last applied timestamp, we release our locks + // If an attempt to read at the no-overlap timestamp is unsuccessful because there are + // pending catalog changes that occur after the no-overlap timestamp, we release our locks // and try again with the PBWM lock (by unsetting // _shouldNotConflictWithSecondaryBatchApplicationBlock). const NamespaceString nss = coll->ns(); + auto readSource = opCtx->recoveryUnit()->getTimestampReadSource(); - bool readAtLastAppliedTimestamp = - _shouldReadAtLastAppliedTimestamp(opCtx, nss, readConcernLevel); - - if (readAtLastAppliedTimestamp) { - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied); - readSource = opCtx->recoveryUnit()->getTimestampReadSource(); + // Once we have our locks, check whether or not we should override the ReadSource that was + // set before acquiring locks. + if (auto newReadSource = SnapshotHelper::getNewReadSource(opCtx, nss)) { + opCtx->recoveryUnit()->setTimestampReadSource(*newReadSource); + readSource = *newReadSource; } - // This timestamp could be earlier than the timestamp seen when the transaction is opened - // because it is set asynchonously. This is not problematic because holding the collection - // lock guarantees no metadata changes will occur in that time. - auto lastAppliedTimestamp = readAtLastAppliedTimestamp - ? boost::optional<Timestamp>(replCoord->getMyLastAppliedOpTime().getTimestamp()) - : boost::none; + const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(); + const auto afterClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAfterClusterTime(); + if (readTimestamp && afterClusterTime) { + // Readers that use afterClusterTime have already waited at a higher level for the + // lastApplied time to advance to a specified optime, and they assume the read timestamp + // of the operation is at least that waited-for timestamp. For kNoOverlap, which is the + // minimum of lastApplied and all_durable, this invariant ensures that afterClusterTime + // reads do not choose a read timestamp older than the one requested. + invariant(*readTimestamp >= afterClusterTime->asTimestamp(), + str::stream() << "read timestamp " << readTimestamp->toString() + << "was less than afterClusterTime: " + << afterClusterTime->asTimestamp().toString()); + } - if (!_conflictingCatalogChanges(opCtx, minSnapshot, lastAppliedTimestamp)) { + auto minSnapshot = coll->getMinimumVisibleSnapshot(); + if (!SnapshotHelper::collectionChangesConflictWithRead(minSnapshot, readTimestamp)) { return; } - invariant(lastAppliedTimestamp || - // The kMajorityCommitted and kNoOverlap read sources already read from timestamps - // that are safe with respect to concurrent secondary batch application. - readSource == RecoveryUnit::ReadSource::kMajorityCommitted || - readSource == RecoveryUnit::ReadSource::kNoOverlap); + // If we are reading at a provided timestamp earlier than the latest catalog changes, + // then we must return an error. + if (readSource == RecoveryUnit::ReadSource::kProvided) { + uasserted(ErrorCodes::SnapshotUnavailable, + str::stream() + << "Unable to read from a snapshot due to pending collection catalog " + "changes; please retry the operation. Snapshot timestamp is " + << readTimestamp->toString() << ". Collection minimum is " + << minSnapshot->toString()); + } + + invariant( + // The kMajorityCommitted and kNoOverlap read sources already read from timestamps + // that are safe with respect to concurrent secondary batch application, and are + // eligible for retrying. + readSource == RecoveryUnit::ReadSource::kMajorityCommitted || + readSource == RecoveryUnit::ReadSource::kNoOverlap); invariant(readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern); // Yield locks in order to do the blocking call below. _autoColl = boost::none; - // If there are pending catalog changes, we should conflict with any in-progress batches (by - // taking the PBWM lock) and choose not to read from the last applied timestamp by unsetting - // _shouldNotConflictWithSecondaryBatchApplicationBlock. Index builds on secondaries can - // complete at timestamps later than the lastAppliedTimestamp during initial sync. After - // initial sync finishes, if we waited instead of retrying, readers would block indefinitely - // waiting for the lastAppliedTimestamp to move forward. Instead we force the reader take - // the PBWM lock and retry. - if (lastAppliedTimestamp) { + // If there are pending catalog changes when using a no-overlap read source, we choose to + // take the PBWM lock to conflict with any in-progress batches. This prevents us from idly + // spinning in this loop trying to get a new read timestamp ahead of the minimum visible + // snapshot. This helps us guarantee liveness (i.e. we can eventually get a suitable read + // timestamp) but should not be necessary for correctness. After initial sync finishes, if + // we waited instead of retrying, readers would block indefinitely waiting for the + // noOverlap time to move forward. Instead we force the reader take the PBWM lock and retry. + if (readSource == RecoveryUnit::ReadSource::kNoOverlap) { + invariant(readTimestamp); LOGV2(20576, - "tried reading at last-applied time: {lastAppliedTimestamp} on ns: {nss_ns}, but " - "future catalog changes are pending at time {minSnapshot}. Trying again without " - "reading at last-applied time.", - "lastAppliedTimestamp"_attr = *lastAppliedTimestamp, - "nss_ns"_attr = nss.ns(), - "minSnapshot"_attr = *minSnapshot); + "Tried reading at no-overlap time, but future catalog changes are pending. " + "Trying again without reading at no-overlap time.", + "noOverlapTimestamp"_attr = *readTimestamp, + "collection"_attr = nss.ns(), + "collectionMinSnapshot"_attr = *minSnapshot); // Destructing the block sets _shouldConflictWithSecondaryBatchApplication back to the // previous value. If the previous value is false (because there is another // shouldNotConflictWithSecondaryBatchApplicationBlock outside of this function), this @@ -220,28 +225,12 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, ErrorCodes::SnapshotUnavailable, str::stream() << "Unable to read from a snapshot due to pending collection catalog " "changes; please retry the operation. Snapshot timestamp is " - << (mySnapshot ? mySnapshot->toString() : "(none)") - << ". Collection minimum is " << minSnapshot->toString(), + << readTimestamp->toString() << ". Collection minimum is " + << minSnapshot->toString(), opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()); - // Cannot change ReadSource while a RecoveryUnit is active, which may result from - // calling getPointInTimeReadTimestamp(). - opCtx->recoveryUnit()->abandonSnapshot(); - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); - } - - // If there are pending catalog changes when using a no-overlap read source, we choose to - // take the PBWM lock to conflict with any in-progress batches. This prevents us from idly - // spinning in this loop trying to get a new read timestamp ahead of the minimum visible - // snapshot. This helps us guarantee liveness (i.e. we can eventually get a suitable read - // timestamp) but should not be necessary for correctness. - if (readSource == RecoveryUnit::ReadSource::kNoOverlap) { - invariant(!lastAppliedTimestamp); // no-overlap read source selects its own timestamp. - _shouldNotConflictWithSecondaryBatchApplicationBlock = boost::none; - invariant(opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()); - - // Abandon our snapshot but don't change our read source, so that we can select a new - // read timestamp on the next loop iteration. + // Abandon our snapshot. We may select a new read timestamp or ReadSource in the next + // loop iteration. opCtx->recoveryUnit()->abandonSnapshot(); } @@ -259,84 +248,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, } } -bool AutoGetCollectionForRead::_shouldReadAtLastAppliedTimestamp( - OperationContext* opCtx, - const NamespaceString& nss, - repl::ReadConcernLevel readConcernLevel) const { - - // If this block is unset, then the operation did not opt-out of the PBWM lock, implying that it - // cannot read at lastApplied. It's important to note that it is possible for this to be set, - // but still be holding the PBWM lock, explained below. - if (!_shouldNotConflictWithSecondaryBatchApplicationBlock) { - return false; - } - - // If we are already holding the PBWM lock, do not read at last-applied. This is because once an - // operation reads without a timestamp (effectively seeing all writes), it is no longer safe to - // start reading at a timestamp, as writes or catalog operations may appear to vanish. - // This may occur when multiple collection locks are held concurrently, which is often the case - // when DBDirectClient is used. - if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) { - LOGV2_DEBUG(20577, 1, "not reading at last-applied because the PBWM lock is held"); - return false; - } - - // Majority and snapshot readConcern levels should not read from lastApplied; these read - // concerns already have a designated timestamp to read from. - if (readConcernLevel != repl::ReadConcernLevel::kLocalReadConcern && - readConcernLevel != repl::ReadConcernLevel::kAvailableReadConcern) { - return false; - } - - // If we are in a replication state (like secondary or primary catch-up) where we are not - // accepting writes, we should read at lastApplied. If this node can accept writes, then no - // conflicting replication batches are being applied and we can read from the default snapshot. - if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) { - return false; - } - - // Non-replicated collections do not need to read at lastApplied, as those collections are not - // written by the replication system. However, the oplog is special, as it *is* written by the - // replication system. - if (!nss.isReplicated() && !nss.isOplog()) { - return false; - } - - return true; -} - -bool AutoGetCollectionForRead::_conflictingCatalogChanges( - OperationContext* opCtx, - boost::optional<Timestamp> minSnapshot, - boost::optional<Timestamp> lastAppliedTimestamp) const { - // This is the timestamp of the most recent catalog changes to this collection. If this is - // greater than any point in time read timestamps, we should either wait or return an error. - if (!minSnapshot) { - return false; - } - - // If we are reading from the lastAppliedTimestamp and it is up-to-date with any catalog - // changes, we can return. - if (lastAppliedTimestamp && - (lastAppliedTimestamp->isNull() || *lastAppliedTimestamp >= *minSnapshot)) { - return false; - } - - // This can be set when readConcern is "snapshot" or "majority". - auto mySnapshot = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(); - - // If we do not have a point in time to conflict with minSnapshot, return. - if (!mySnapshot && !lastAppliedTimestamp) { - return false; - } - - // Return if there are no conflicting catalog changes with mySnapshot. - if (mySnapshot && *mySnapshot >= *minSnapshot) { - return false; - } - - return true; -} AutoGetCollectionForReadCommand::AutoGetCollectionForReadCommand( OperationContext* opCtx, diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h index edf32cd0010..51208643c6a 100644 --- a/src/mongo/db/db_raii.h +++ b/src/mongo/db/db_raii.h @@ -133,19 +133,6 @@ private: // This field is optional, because the code to wait for majority committed snapshot needs to // release locks in order to block waiting boost::optional<AutoGetCollection> _autoColl; - - // Returns true if we should read at the last applied timestamp instead of at "no" timestamp - // (i.e. reading with the "latest" snapshot reflecting all writes). Reading at the last applied - // timestamp avoids reading in-flux data actively being written by the replication system. - bool _shouldReadAtLastAppliedTimestamp(OperationContext* opCtx, - const NamespaceString& nss, - repl::ReadConcernLevel readConcernLevel) const; - - // Returns true if the minSnapshot causes conflicting catalog changes for either the provided - // lastAppliedTimestamp or the point-in-time snapshot of the RecoveryUnit on 'opCtx'. - bool _conflictingCatalogChanges(OperationContext* opCtx, - boost::optional<Timestamp> minSnapshot, - boost::optional<Timestamp> lastAppliedTimestamp) const; }; /** diff --git a/src/mongo/db/db_raii_test.cpp b/src/mongo/db/db_raii_test.cpp index 4b9a3bac3c4..5258a51a33b 100644 --- a/src/mongo/db/db_raii_test.cpp +++ b/src/mongo/db/db_raii_test.cpp @@ -37,6 +37,8 @@ #include "mongo/db/client.h" #include "mongo/db/concurrency/lock_state.h" #include "mongo/db/db_raii.h" +#include "mongo/db/query/internal_plans.h" +#include "mongo/db/storage/snapshot_manager.h" #include "mongo/logv2/log.h" #include "mongo/unittest/unittest.h" #include "mongo/util/time_support.h" @@ -199,8 +201,13 @@ TEST_F(DBRAIITestFixture, ASSERT_OK( storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions)); ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock + // class and does not update the correct state in the SnapshotManager. repl::OpTime opTime(Timestamp(200, 1), 1); - replCoord->setMyLastAppliedOpTimeAndWallTime({opTime, Date_t() + Seconds(1)}); + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + snapshotManager->setLastApplied(opTime.getTimestamp()); Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX); ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX)); @@ -220,18 +227,119 @@ TEST_F(DBRAIITestFixture, // for the collection. If we now manually set our last applied time to something very early, we // will be guaranteed to hit the logic that triggers when the minimum snapshot time is greater // than the read-at time, since we default to reading at last-applied when in SECONDARY state. + + // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock + // class and does not update the correct state in the SnapshotManager. repl::OpTime opTime(Timestamp(2, 1), 1); - replCoord->setMyLastAppliedOpTimeAndWallTime({opTime, Date_t() + Seconds(1)}); + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + snapshotManager->setLastApplied(opTime.getTimestamp()); + Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX); ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX)); + AutoGetCollectionForRead coll(client2.second.get(), NamespaceString("local.system.js")); + // Reading from an unreplicated collection does not change the ReadSource to kNoOverlap. + ASSERT_EQ(client2.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kUnset); - // The current code uasserts in this situation, so we confirm that happens here. + // Reading from a replicated collection will try to switch to kNoOverlap. Because we are + // already reading without a timestamp and we can't reacquire the PBWM lock to continue reading + // without a timestamp, we uassert in this situation. ASSERT_THROWS_CODE(AutoGetCollectionForRead(client2.second.get(), nss), DBException, ErrorCodes::SnapshotUnavailable); } +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedConflict) { + // This test simulates a situation where AutoGetCollectionForRead cant read at the no-overlap + // point (minimum of all_durable and lastApplied) because it is set to a point earlier than the + // catalog change. We expect to read without a timestamp and hold the PBWM lock. + auto replCoord = repl::ReplicationCoordinator::get(client1.second.get()); + CollectionOptions defaultCollectionOptions; + ASSERT_OK( + storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions)); + ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Note that when the collection was created, above, the system chooses a minimum snapshot time + // for the collection. If we now manually set our last applied time to something very early, we + // will be guaranteed to hit the logic that triggers when the minimum snapshot time is greater + // than the read-at time, since we default to reading at last-applied when in SECONDARY state. + + // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock + // class and does not update the correct state in the SnapshotManager. + repl::OpTime opTime(Timestamp(2, 1), 1); + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + snapshotManager->setLastApplied(opTime.getTimestamp()); + AutoGetCollectionForRead coll(client1.second.get(), nss); + + // We can't read from kNoOverlap in this scenario because there is a catalog conflict. Resort + // to taking the PBWM lock and reading without a timestamp. + ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kUnset); + ASSERT_TRUE(client1.second.get()->lockState()->isLockHeldForMode( + resourceIdParallelBatchWriterMode, MODE_IS)); +} + +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) { + // This test simulates a situation where AutoGetCollectionForRead reads at the no-overlap + // point (minimum of all_durable and lastApplied) even though lastApplied is not available. + auto replCoord = repl::ReplicationCoordinator::get(client1.second.get()); + CollectionOptions defaultCollectionOptions; + ASSERT_OK( + storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions)); + ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Note that when the collection was created, above, the system chooses a minimum snapshot time + // for the collection. Since last-applied isn't available, we default to all_durable, which is + // available, and is greater than the collection minimum snapshot. + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + ASSERT_FALSE(snapshotManager->getLastApplied()); + AutoGetCollectionForRead coll(client1.second.get(), nss); + + // Even though lastApplied isn't available, the ReadSource is set to kNoOverlap, which reads + // at the all_durable time. + ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(client1.second.get()->recoveryUnit()->getPointInTimeReadTimestamp()); + ASSERT_FALSE(client1.second.get()->lockState()->isLockHeldForMode( + resourceIdParallelBatchWriterMode, MODE_IS)); +} + +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesNoOverlapOnSecondary) { + auto opCtx = client1.second.get(); + ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {})); + ASSERT_OK( + repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY)); + AutoGetCollectionForRead autoColl(opCtx, nss); + auto exec = InternalPlanner::collectionScan(opCtx, + nss.ns(), + autoColl.getCollection(), + PlanExecutor::YIELD_MANUAL, + InternalPlanner::FORWARD); + + // The collection scan should use the default ReadSource on a secondary. + ASSERT_EQ(RecoveryUnit::ReadSource::kNoOverlap, + opCtx->recoveryUnit()->getTimestampReadSource()); + + // While yielding the collection scan, simulate stepping-up to a primary. + exec->saveState(); + Locker::LockSnapshot lockSnapshot; + ASSERT_TRUE(opCtx->lockState()->saveLockStateAndUnlock(&lockSnapshot)); + ASSERT_OK( + repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_PRIMARY)); + + // After restoring, the collection scan should now be reading with kNoOverlap, the default on + // secondaries. + opCtx->lockState()->restoreLockState(opCtx, lockSnapshot); + exec->restoreState(); + ASSERT_EQ(RecoveryUnit::ReadSource::kNoOverlap, + opCtx->recoveryUnit()->getTimestampReadSource()); + BSONObj obj; + ASSERT_EQUALS(PlanExecutor::IS_EOF, exec->getNext(&obj, nullptr)); +} } // namespace } // namespace mongo diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index 839ca031062..16e2a483ca7 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2037,6 +2037,9 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx, void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState, const IndexBuildOptions& indexBuildOptions) { + // Read without a timestamp. When we commit, we block writes which guarantees all writes are + // visible. + opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); _scanCollectionAndInsertKeysIntoSorter(opCtx, replState); _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState); _signalPrimaryForCommitReadiness(opCtx, replState); diff --git a/src/mongo/db/repl/oplog_batcher.cpp b/src/mongo/db/repl/oplog_batcher.cpp index 27653ab21dc..aba27772547 100644 --- a/src/mongo/db/repl/oplog_batcher.cpp +++ b/src/mongo/db/repl/oplog_batcher.cpp @@ -363,8 +363,8 @@ std::size_t getBatchLimitOplogBytes(OperationContext* opCtx, StorageInterface* s // We can't change the timestamp source within a write unit of work. invariant(!opCtx->lockState()->inAWriteUnitOfWork()); // We're only reading oplog metadata, so the timestamp is not important. If we read with the - // default (which is kLastApplied on secondaries), we may end up with a reader that is at - // kLastApplied. If we then roll back, then when we reconstruct prepared transactions during + // default (which is lastApplied on secondaries), we may end up with a reader that is at + // lastApplied. If we then roll back, then when we reconstruct prepared transactions during // rollback recovery we will be preparing transactions before the read timestamp, which triggers // an assertion in WiredTiger. ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h index e5ca3cfe12c..29cc6dd9c5b 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state.h +++ b/src/mongo/db/repl/replication_coordinator_external_state.h @@ -260,11 +260,11 @@ public: virtual void updateCommittedSnapshot(const OpTime& newCommitPoint) = 0; /** - * Updates the local snapshot to a consistent point for secondary reads. + * Updates the lastApplied snapshot to a consistent point for secondary reads. * - * It is illegal to call with a optime that does not name an existing snapshot. + * It is illegal to call with a non-existent optime. */ - virtual void updateLocalSnapshot(const OpTime& optime) = 0; + virtual void updateLastAppliedSnapshot(const OpTime& optime) = 0; /** * Returns whether or not the SnapshotThread is active. diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 9fa64da2a2e..ce80ab0acac 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -356,7 +356,7 @@ void ReplicationCoordinatorExternalStateImpl::clearAppliedThroughIfCleanShutdown } // Ensure that all writes are visible before reading. If we failed mid-batch, it would be - // possible to read from a kLastApplied ReadSource where not all writes to the minValid document + // possible to read from a kNoOverlap ReadSource where not all writes to the minValid document // are visible, generating a writeConflict that would not resolve. opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); @@ -972,10 +972,10 @@ void ReplicationCoordinatorExternalStateImpl::updateCommittedSnapshot( notifyOplogMetadataWaiters(newCommitPoint); } -void ReplicationCoordinatorExternalStateImpl::updateLocalSnapshot(const OpTime& optime) { +void ReplicationCoordinatorExternalStateImpl::updateLastAppliedSnapshot(const OpTime& optime) { auto manager = _service->getStorageEngine()->getSnapshotManager(); if (manager) { - manager->setLocalSnapshot(optime.getTimestamp()); + manager->setLastApplied(optime.getTimestamp()); } } diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h index d1d12e285b3..f38aee76a39 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h @@ -104,7 +104,7 @@ public: virtual bool tooStale(); void dropAllSnapshots() final; void updateCommittedSnapshot(const OpTime& newCommitPoint) final; - void updateLocalSnapshot(const OpTime& optime) final; + void updateLastAppliedSnapshot(const OpTime& optime) final; virtual bool snapshotsEnabled() const; virtual void notifyOplogMetadataWaiters(const OpTime& committedOpTime); boost::optional<OpTime> getEarliestDropPendingOpTime() const final; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp index 84986f783c8..4c9ddbaf7f1 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp @@ -250,7 +250,7 @@ void ReplicationCoordinatorExternalStateMock::dropAllSnapshots() {} void ReplicationCoordinatorExternalStateMock::updateCommittedSnapshot( const OpTime& newCommitPoint) {} -void ReplicationCoordinatorExternalStateMock::updateLocalSnapshot(const OpTime& optime) {} +void ReplicationCoordinatorExternalStateMock::updateLastAppliedSnapshot(const OpTime& optime) {} bool ReplicationCoordinatorExternalStateMock::snapshotsEnabled() const { return _areSnapshotsEnabled; diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h index 0ef9ad2e893..fd867df8ac7 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h +++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h @@ -93,7 +93,7 @@ public: virtual bool tooStale(); virtual void dropAllSnapshots(); virtual void updateCommittedSnapshot(const OpTime& newCommitPoint); - virtual void updateLocalSnapshot(const OpTime& optime); + virtual void updateLastAppliedSnapshot(const OpTime& optime); virtual bool snapshotsEnabled() const; virtual void notifyOplogMetadataWaiters(const OpTime& committedOpTime); boost::optional<OpTime> getEarliestDropPendingOpTime() const final; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index ebde2673416..bcae984ce91 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1389,6 +1389,14 @@ void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndWallTime( // No need to wake up replication waiters because there should not be any replication waiters // waiting on our own lastApplied. + // Update the storage engine's lastApplied snapshot before updating the stable timestamp on the + // storage engine. New transactions reading from the lastApplied snapshot should start before + // the oldest timestamp is advanced to avoid races. Additionally, update this snapshot before + // signaling optime waiters. This avoids a race that would allow optime waiters to open + // transactions on stale lastApplied values because they do not hold or reacquire the + // replication coordinator mutex when signaled. + _externalState->updateLastAppliedSnapshot(opTime); + // Signal anyone waiting on optime changes. _opTimeWaiterList.setValueIf_inlock( [opTime](const OpTime& waitOpTime, const SharedWaiterHandle& waiter) { @@ -1396,11 +1404,6 @@ void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndWallTime( }, opTime); - // Update the local snapshot before updating the stable timestamp on the storage engine. New - // transactions reading from the local snapshot should start before the oldest timestamp is - // advanced to avoid races. - _externalState->updateLocalSnapshot(opTime); - // Notify the oplog waiters after updating the local snapshot. signalOplogWaiters(); diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index 82c406dc480..fc680d1a67a 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -130,6 +130,7 @@ public: _oplogApplicationEndPoint(oplogApplicationEndPoint) {} void startup(OperationContext* opCtx) final { + opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); _client = std::make_unique<DBDirectClient>(opCtx); BSONObj predicate = _oplogApplicationEndPoint ? BSON("$gte" << _oplogApplicationStartPoint << "$lte" << *_oplogApplicationEndPoint) diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index b9f75c07d1a..8608926919c 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -512,8 +512,8 @@ void reconstructPreparedTransactions(OperationContext* opCtx, repl::OplogApplica } // Read the transactions table and the oplog collection without a timestamp. // The below DBDirectClient read uses AutoGetCollectionForRead which could implicitly change the - // read source to kLastApplied. So we need to explicitly set the read source to kNoTimestamp to - // force reads in this scope to be untimestamped. + // read source. So we need to explicitly set the read source to kNoTimestamp to force reads in + // this scope to be untimestamped. ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); DBDirectClient client(opCtx); diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript index ccd5d048e1d..e32b8ba329a 100644 --- a/src/mongo/db/storage/SConscript +++ b/src/mongo/db/storage/SConscript @@ -30,6 +30,23 @@ env.Library( ) env.Library( + target='snapshot_helper', + source=[ + 'snapshot_helper.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/namespace_string', + ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/concurrency/lock_manager', + '$BUILD_DIR/mongo/db/repl/read_concern_args', + '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', + 'recovery_unit_base', + ], + ) + +env.Library( target='duplicate_key_error_info', source=[ 'duplicate_key_error_info.cpp', diff --git a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp index aeef71561ae..baecec3c8af 100644 --- a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp +++ b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp @@ -150,9 +150,9 @@ public: return itCountOn(op); } - int itCountLocal() { + int itCountLastApplied() { auto op = makeOperation(); - op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied); + op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); return itCountOn(op); } @@ -176,14 +176,14 @@ public: return std::string(record->data.data()); } - boost::optional<Record> readRecordLocal(RecordId id) { + boost::optional<Record> readRecordLastApplied(RecordId id) { auto op = makeOperation(); - op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied); + op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); return readRecordOn(op, id); } - std::string readStringLocal(RecordId id) { - auto record = readRecordLocal(id); + std::string readStringLastApplied(RecordId id) { + auto record = readRecordLastApplied(id); ASSERT(record); return std::string(record->data.data()); } @@ -360,7 +360,7 @@ TEST_F(SnapshotManagerTests, UpdateAndDelete) { ASSERT(!readRecordCommitted(id)); } -TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) { +TEST_F(SnapshotManagerTests, InsertAndReadOnLastAppliedSnapshot) { if (!snapshotManager) return; // This test is only for engines that DO support SnapshotManagers. @@ -369,7 +369,7 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) { auto id = insertRecordAndCommit(); auto afterInsert = fetchAndIncrementTimestamp(); - // Not reading on the last local timestamp returns the most recent data. + // Not reading on the last applied timestamp returns the most recent data. auto op = makeOperation(); auto ru = op->recoveryUnit(); ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); @@ -379,18 +379,18 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) { deleteRecordAndCommit(id); auto afterDelete = fetchAndIncrementTimestamp(); - // Reading at the local snapshot timestamps returns data in order. - snapshotManager->setLocalSnapshot(beforeInsert); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + // Reading at the last applied snapshot timestamps returns data in order. + snapshotManager->setLastApplied(beforeInsert); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); - snapshotManager->setLocalSnapshot(afterInsert); - ASSERT_EQ(itCountLocal(), 1); - ASSERT(readRecordLocal(id)); + snapshotManager->setLastApplied(afterInsert); + ASSERT_EQ(itCountLastApplied(), 1); + ASSERT(readRecordLastApplied(id)); - snapshotManager->setLocalSnapshot(afterDelete); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + snapshotManager->setLastApplied(afterDelete); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); } TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) { @@ -416,20 +416,20 @@ TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) { deleteRecordAndCommit(id); auto afterDelete = fetchAndIncrementTimestamp(); - snapshotManager->setLocalSnapshot(beforeInsert); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + snapshotManager->setLastApplied(beforeInsert); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); - snapshotManager->setLocalSnapshot(afterInsert); - ASSERT_EQ(itCountLocal(), 1); - ASSERT_EQ(readStringLocal(id), "Aardvark"); + snapshotManager->setLastApplied(afterInsert); + ASSERT_EQ(itCountLastApplied(), 1); + ASSERT_EQ(readStringLastApplied(id), "Aardvark"); - snapshotManager->setLocalSnapshot(afterUpdate); - ASSERT_EQ(itCountLocal(), 1); - ASSERT_EQ(readStringLocal(id), "Blue spotted stingray"); + snapshotManager->setLastApplied(afterUpdate); + ASSERT_EQ(itCountLastApplied(), 1); + ASSERT_EQ(readStringLastApplied(id), "Blue spotted stingray"); - snapshotManager->setLocalSnapshot(afterDelete); - ASSERT_EQ(itCountLocal(), 0); - ASSERT(!readRecordLocal(id)); + snapshotManager->setLastApplied(afterDelete); + ASSERT_EQ(itCountLastApplied(), 0); + ASSERT(!readRecordLastApplied(id)); } } // namespace mongo diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h index f8c70309ae2..eeddfafd624 100644 --- a/src/mongo/db/storage/recovery_unit.h +++ b/src/mongo/db/storage/recovery_unit.h @@ -253,7 +253,6 @@ public: * - when using ReadSource::kNoOverlap, the timestamp chosen by the storage engine. * - when using ReadSource::kAllDurableSnapshot, the timestamp chosen using the storage * engine's all_durable timestamp. - * - when using ReadSource::kLastApplied, the timestamp chosen using the storage engine's last * applied timestamp. Can return boost::none if no timestamp has been established. * - when using ReadSource::kMajorityCommitted, the majority committed timestamp chosen by the * storage engine after a transaction has been opened or after a call to @@ -399,11 +398,6 @@ public: */ kNoOverlap, /** - * Read from the last applied timestamp. New transactions start at the most up-to-date - * timestamp. - */ - kLastApplied, - /** * Read from the all_durable timestamp. New transactions will always read from the same * timestamp and never advance. */ @@ -414,6 +408,24 @@ public: kProvided }; + static std::string toString(ReadSource rs) { + switch (rs) { + case ReadSource::kUnset: + return "kUnset"; + case ReadSource::kNoTimestamp: + return "kNoTimestamp"; + case ReadSource::kMajorityCommitted: + return "kMajorityCommitted"; + case ReadSource::kNoOverlap: + return "kNoOverlap"; + case ReadSource::kAllDurableSnapshot: + return "kAllDurableSnapshot"; + case ReadSource::kProvided: + return "kProvided"; + } + MONGO_UNREACHABLE; + } + /** * Sets which timestamp to use for read transactions. If 'provided' is supplied, only kProvided * is an acceptable input. diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp new file mode 100644 index 00000000000..777868f7830 --- /dev/null +++ b/src/mongo/db/storage/snapshot_helper.cpp @@ -0,0 +1,151 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage + +#include "mongo/platform/basic.h" + +#include "mongo/db/storage/snapshot_helper.h" + +#include "mongo/db/repl/read_concern_args.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/logv2/log.h" + +namespace mongo { +namespace SnapshotHelper { +bool canSwitchReadSource(OperationContext* opCtx) { + + // Most readConcerns have behavior controlled at higher levels. Local and available are the only + // ReadConcerns that should consider changing, since they read without a timestamp by default. + const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel(); + if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern || + readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) { + return true; + } + + return false; +} + +bool shouldReadAtNoOverlap(OperationContext* opCtx, + const NamespaceString& nss, + std::string* reason) { + + // If this is true, then the operation opted-in to the PBWM lock, implying that it cannot read + // at no-overlap. It's important to note that it is possible for this to be false, but still be + // holding the PBWM lock, explained below. + if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) { + *reason = "conflicts with batch application"; + return false; + } + + // If we are already holding the PBWM lock, do not read at no-overlap. Snapshots acquired by an + // operation after a yield/restore must see all writes in the pre-yield snapshot. Once a + // snapshot is reading without a timestamp, we choose to continue acquiring snapshots without a + // timestamp. This is done in lieu of determining a timestamp far enough in the future that's + // guaranteed to observe all previous writes. This may occur when multiple collection locks are + // held concurrently, which is often the case when DBDirectClient is used. + if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) { + *reason = "PBWM lock is held"; + LOGV2_DEBUG(20577, 1, "not reading at no-overlap because the PBWM lock is held"); + return false; + } + + // If we are in a replication state (like secondary or primary catch-up) where we are not + // accepting writes, we should read at no-overlap. If this node can accept writes, then no + // conflicting replication batches are being applied and we can read from the default snapshot. + if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) { + *reason = "primary"; + return false; + } + + // Non-replicated collections do not need to read at no-overlap, as those collections are not + // written by the replication system. However, the oplog is special, as it *is* written by the + // replication system. + if (!nss.isReplicated() && !nss.isOplog()) { + *reason = "unreplicated collection"; + return false; + } + + return true; +} +boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx, + const NamespaceString& nss) { + const bool canSwitch = canSwitchReadSource(opCtx); + if (!canSwitch) { + return boost::none; + } + + const auto existing = opCtx->recoveryUnit()->getTimestampReadSource(); + std::string reason; + const bool readAtNoOverlap = shouldReadAtNoOverlap(opCtx, nss, &reason); + if (existing == RecoveryUnit::ReadSource::kUnset) { + // Shifting from reading without a timestamp to reading with a timestamp can be dangerous + // because writes will appear to vanish. This case is intended for new reads on secondaries + // and query yield recovery after state transitions from primary to secondary. + if (readAtNoOverlap) { + LOGV2_DEBUG(4452901, 2, "Changing ReadSource to kNoOverlap", logAttrs(nss)); + return RecoveryUnit::ReadSource::kNoOverlap; + } + } else if (existing == RecoveryUnit::ReadSource::kNoOverlap) { + // For some reason, we can no longer read at kNoOverlap. + // An operation that yields a timestamped snapshot must restore a snapshot with at least as + // large of a timestamp, or with proper consideration of rollback scenarios, no timestamp. + // Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to + // reading without one. More writes will become visible. + if (!readAtNoOverlap) { + LOGV2_DEBUG( + 4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason); + // This shift to kUnset assumes that callers will not make future attempts to manipulate + // their ReadSources after performing reads at an un-timetamped snapshot. The only + // exception is callers of this function that may need to change from kUnset to + // kNoOverlap in the event of a catalog conflict or query yield. + return RecoveryUnit::ReadSource::kUnset; + } + } + return boost::none; +} + +bool collectionChangesConflictWithRead(boost::optional<Timestamp> collectionMin, + boost::optional<Timestamp> readTimestamp) { + // This is the timestamp of the most recent catalog changes to this collection. If this is + // greater than any point in time read timestamps, we should either wait or return an error. + if (!collectionMin) { + return false; + } + + // If we do not have a point in time to conflict with collectionMin, return. + if (!readTimestamp || readTimestamp->isNull()) { + return false; + } + + // Return if there are no conflicting catalog changes with the readTimestamp. + return *collectionMin > readTimestamp; +} +} // namespace SnapshotHelper +} // namespace mongo
\ No newline at end of file diff --git a/src/mongo/db/storage/snapshot_helper.h b/src/mongo/db/storage/snapshot_helper.h new file mode 100644 index 00000000000..fa8fdd85f24 --- /dev/null +++ b/src/mongo/db/storage/snapshot_helper.h @@ -0,0 +1,43 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/operation_context.h" + +namespace mongo { +namespace SnapshotHelper { +// Returns a ReadSource if we should change our current ReadSource. Returns boost::none otherwise. +boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx, + const NamespaceString& nss); + +bool collectionChangesConflictWithRead(boost::optional<Timestamp> collectionMin, + boost::optional<Timestamp> readTimestamp); +} // namespace SnapshotHelper +} // namespace mongo diff --git a/src/mongo/db/storage/snapshot_manager.h b/src/mongo/db/storage/snapshot_manager.h index d41df1c5013..7839ef179ba 100644 --- a/src/mongo/db/storage/snapshot_manager.h +++ b/src/mongo/db/storage/snapshot_manager.h @@ -59,14 +59,14 @@ public: virtual void setCommittedSnapshot(const Timestamp& timestamp) = 0; /** - * Sets the snapshot for the last stable timestamp for reading on secondaries. + * Sets the lastApplied timestamp. */ - virtual void setLocalSnapshot(const Timestamp& timestamp) = 0; + virtual void setLastApplied(const Timestamp& timestamp) = 0; /** - * Returns the local snapshot timestamp. + * Returns the lastApplied timestamp. */ - virtual boost::optional<Timestamp> getLocalSnapshot() = 0; + virtual boost::optional<Timestamp> getLastApplied() = 0; /** * Drops all snapshots and clears the "committed" snapshot. diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 661b0e009c1..352699a9de8 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -668,6 +668,12 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName, setInitialDataTimestamp(_recoveryTimestamp); setOldestTimestamp(_recoveryTimestamp, false); setStableTimestamp(_recoveryTimestamp, false); + + _sessionCache->snapshotManager().setLastApplied(_recoveryTimestamp); + { + stdx::lock_guard<Latch> lk(_highestDurableTimestampMutex); + _highestSeenDurableTimestamp = _recoveryTimestamp.asULL(); + } } } diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp index 8cc45e0201e..f9d9751d464 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp @@ -453,7 +453,6 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() // The following ReadSources can only establish a read timestamp when a transaction is // opened. case ReadSource::kNoOverlap: - case ReadSource::kLastApplied: case ReadSource::kAllDurableSnapshot: break; } @@ -462,14 +461,14 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() getSession(); switch (_timestampReadSource) { - case ReadSource::kLastApplied: - // The lastApplied timestamp is not always available, so it is not possible to invariant - // that it exists as other ReadSources do. + case ReadSource::kNoOverlap: + // The lastApplied and allDurable timestamps are not always available if the system has + // not accepted writes, so it is not possible to invariant that it exists as other + // ReadSources do. if (!_readAtTimestamp.isNull()) { return _readAtTimestamp; } return boost::none; - case ReadSource::kNoOverlap: case ReadSource::kAllDurableSnapshot: invariant(!_readAtTimestamp.isNull()); return _readAtTimestamp; @@ -515,17 +514,6 @@ void WiredTigerRecoveryUnit::_txnOpen() { session, _prepareConflictBehavior, _roundUpPreparedTimestamps); break; } - case ReadSource::kLastApplied: { - if (_sessionCache->snapshotManager().getLocalSnapshot()) { - _readAtTimestamp = _sessionCache->snapshotManager().beginTransactionOnLocalSnapshot( - session, _prepareConflictBehavior, _roundUpPreparedTimestamps); - } else { - WiredTigerBeginTxnBlock( - session, _prepareConflictBehavior, _roundUpPreparedTimestamps) - .done(); - } - break; - } case ReadSource::kNoOverlap: { _readAtTimestamp = _beginTransactionAtNoOverlapTimestamp(session); break; @@ -555,8 +543,9 @@ void WiredTigerRecoveryUnit::_txnOpen() { LOGV2_DEBUG(22414, 3, - "WT begin_transaction for snapshot id {getSnapshotId_toNumber}", - "getSnapshotId_toNumber"_attr = getSnapshotId().toNumber()); + "WT begin_transaction", + "snapshotId"_attr = getSnapshotId().toNumber(), + "readSource"_attr = toString(_timestampReadSource)); } Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllDurableTimestamp(WT_SESSION* session) { @@ -578,8 +567,8 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllDurableTimestamp(WT_SESS Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSION* session) { - auto lastApplied = _sessionCache->snapshotManager().getLocalSnapshot(); - Timestamp allDurable = Timestamp(_oplogManager->fetchAllDurableValue(session->connection)); + auto lastApplied = _sessionCache->snapshotManager().getLastApplied(); + Timestamp allDurable = Timestamp(_sessionCache->getKVEngine()->getAllDurableTimestamp()); // When using timestamps for reads and writes, it's important that readers and writers don't // overlap with the timestamps they use. In other words, at any point in the system there should @@ -607,17 +596,34 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSI // should read afterward. Timestamp readTimestamp = (lastApplied) ? std::min(*lastApplied, allDurable) : allDurable; + if (readTimestamp.isNull()) { + // When there is not an all_durable or lastApplied timestamp available, read without a + // timestamp. Do not round up the read timestamp to the oldest timestamp. + + // There is a race that allows new transactions to start between the time we check for a + // read timestamp and start our transaction, which can temporarily violate the contract of + // kNoOverlap. That is, writes will be visible that occur after the all_durable time. This + // is only possible for readers that start immediately after an initial sync that did not + // replicate any oplog entries. Future transactions will start reading at a timestamp once + // timestamped writes have been made. + WiredTigerBeginTxnBlock txnOpen( + session, _prepareConflictBehavior, _roundUpPreparedTimestamps); + LOGV2_DEBUG(4452900, 1, "no read timestamp available for kNoOverlap"); + txnOpen.done(); + return readTimestamp; + } + WiredTigerBeginTxnBlock txnOpen(session, _prepareConflictBehavior, _roundUpPreparedTimestamps, RoundUpReadTimestamp::kRound); auto status = txnOpen.setReadSnapshot(readTimestamp); fassert(51066, status); + txnOpen.done(); - // We might have rounded to oldest between calling getAllDurable and setReadSnapshot. We need - // to get the actual read timestamp we used. + // We might have rounded to oldest between calling getAllDurable and setReadSnapshot. We + // need to get the actual read timestamp we used. readTimestamp = _getTransactionReadTimestamp(session); - txnOpen.done(); return readTimestamp; } @@ -769,15 +775,14 @@ void WiredTigerRecoveryUnit::setTimestampReadSource(ReadSource readSource, boost::optional<Timestamp> provided) { LOGV2_DEBUG(22416, 3, - "setting timestamp read source: {static_cast_int_readSource}, provided timestamp: " - "{provided_provided_none}", - "static_cast_int_readSource"_attr = static_cast<int>(readSource), - "provided_provided_none"_attr = ((provided) ? provided->toString() : "none")); + "setting timestamp read source", + "readSource"_attr = toString(readSource), + "provided"_attr = ((provided) ? provided->toString() : "none")); invariant(!_isActive() || _timestampReadSource == readSource, str::stream() << "Current state: " << toString(_getState()) << ". Invalid internal state while setting timestamp read source: " - << static_cast<int>(readSource) << ", provided timestamp: " + << toString(readSource) << ", provided timestamp: " << (provided ? provided->toString() : "none")); invariant(!provided == (readSource != ReadSource::kProvided)); invariant(!(provided && provided->isNull())); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp index 314a524063e..1f440d319b7 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp @@ -163,11 +163,14 @@ public: clientAndCtx2 = makeClientAndOpCtx(harnessHelper.get(), "reader"); ru1 = checked_cast<WiredTigerRecoveryUnit*>(clientAndCtx1.second->recoveryUnit()); ru2 = checked_cast<WiredTigerRecoveryUnit*>(clientAndCtx2.second->recoveryUnit()); + snapshotManager = dynamic_cast<WiredTigerSnapshotManager*>( + harnessHelper->getEngine()->getSnapshotManager()); } std::unique_ptr<WiredTigerRecoveryUnitHarnessHelper> harnessHelper; ClientAndCtx clientAndCtx1, clientAndCtx2; WiredTigerRecoveryUnit *ru1, *ru2; + WiredTigerSnapshotManager* snapshotManager; private: const char* wt_uri = "table:prepare_transaction"; @@ -180,6 +183,86 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, SetReadSource) { ASSERT_EQ(Timestamp(1, 1), ru1->getPointInTimeReadTimestamp()); } +TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) { + OperationContext* opCtx1 = clientAndCtx1.second.get(); + std::unique_ptr<RecordStore> rs(harnessHelper->createRecordStore(opCtx1, "a.b")); + + const std::string str = str::stream() << "test"; + const Timestamp ts1{1, 1}; + const Timestamp ts2{1, 2}; + const Timestamp ts3{1, 2}; + + RecordId rid1; + { + WriteUnitOfWork wuow(opCtx1); + StatusWith<RecordId> res = rs->insertRecord(opCtx1, str.c_str(), str.size() + 1, ts1); + ASSERT_OK(res); + wuow.commit(); + rid1 = res.getValue(); + snapshotManager->setLastApplied(ts1); + } + + // Read without a timestamp. The write should be visible. + ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(), RecoveryUnit::ReadSource::kUnset); + RecordData unused; + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + + // Read with kNoOverlap. The write should be visible. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + + RecordId rid2, rid3; + { + // Start, but do not commit a transaction with opCtx2. This sets a timestamp at ts2, which + // creates a hole. kNoOverlap, which is a function of all_durable, will only be able to read + // at the time immediately before. + OperationContext* opCtx2 = clientAndCtx2.second.get(); + WriteUnitOfWork wuow(opCtx2); + StatusWith<RecordId> res = + rs->insertRecord(opCtx2, str.c_str(), str.size() + 1, Timestamp()); + ASSERT_OK(opCtx2->recoveryUnit()->setTimestamp(ts2)); + ASSERT_OK(res); + + // While holding open a transaction with opCtx2, perform an insert at ts3 with opCtx1. This + // creates a "hole". + { + WriteUnitOfWork wuow(opCtx1); + StatusWith<RecordId> res = rs->insertRecord(opCtx1, str.c_str(), str.size() + 1, ts3); + ASSERT_OK(res); + wuow.commit(); + rid3 = res.getValue(); + snapshotManager->setLastApplied(ts3); + } + + // Read without a timestamp, and we should see the first and third records. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused)); + ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused)); + + // Now read at kNoOverlap. Since the transaction at ts2 has not committed, all_durable is + // held back to ts1. LastApplied has advanced to ts3, but because kNoOverlap is the minimum, + // we should only see one record. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused)); + ASSERT_FALSE(rs->findRecord(opCtx1, rid3, &unused)); + + wuow.commit(); + rid2 = res.getValue(); + } + + // Now that the hole has been closed, kNoOverlap should see all 3 records. + opCtx1->recoveryUnit()->abandonSnapshot(); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap); + ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); + ASSERT_TRUE(rs->findRecord(opCtx1, rid2, &unused)); + ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused)); +} + TEST_F(WiredTigerRecoveryUnitTestFixture, CreateAndCheckForCachePressure) { int time = 1; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp index 74988508a4c..efc2d8adfe5 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp @@ -48,17 +48,17 @@ void WiredTigerSnapshotManager::setCommittedSnapshot(const Timestamp& timestamp) _committedSnapshot = timestamp; } -void WiredTigerSnapshotManager::setLocalSnapshot(const Timestamp& timestamp) { - stdx::lock_guard<Latch> lock(_localSnapshotMutex); +void WiredTigerSnapshotManager::setLastApplied(const Timestamp& timestamp) { + stdx::lock_guard<Latch> lock(_lastAppliedMutex); if (timestamp.isNull()) - _localSnapshot = boost::none; + _lastApplied = boost::none; else - _localSnapshot = timestamp; + _lastApplied = timestamp; } -boost::optional<Timestamp> WiredTigerSnapshotManager::getLocalSnapshot() { - stdx::lock_guard<Latch> lock(_localSnapshotMutex); - return _localSnapshot; +boost::optional<Timestamp> WiredTigerSnapshotManager::getLastApplied() { + stdx::lock_guard<Latch> lock(_lastAppliedMutex); + return _lastApplied; } void WiredTigerSnapshotManager::dropAllSnapshots() { @@ -93,23 +93,4 @@ Timestamp WiredTigerSnapshotManager::beginTransactionOnCommittedSnapshot( return *_committedSnapshot; } -Timestamp WiredTigerSnapshotManager::beginTransactionOnLocalSnapshot( - WT_SESSION* session, - PrepareConflictBehavior prepareConflictBehavior, - RoundUpPreparedTimestamps roundUpPreparedTimestamps) const { - WiredTigerBeginTxnBlock txnOpen(session, prepareConflictBehavior, roundUpPreparedTimestamps); - - stdx::lock_guard<Latch> lock(_localSnapshotMutex); - invariant(_localSnapshot); - LOGV2_DEBUG(22427, - 3, - "begin_transaction on local snapshot {localSnapshot_get}", - "localSnapshot_get"_attr = _localSnapshot.get().toString()); - auto status = txnOpen.setReadSnapshot(_localSnapshot.get()); - fassert(50775, status); - - txnOpen.done(); - return *_localSnapshot; -} - } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h index 1726a7d4c2b..b285c694e70 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h @@ -51,8 +51,8 @@ public: WiredTigerSnapshotManager() = default; void setCommittedSnapshot(const Timestamp& timestamp) final; - void setLocalSnapshot(const Timestamp& timestamp) final; - boost::optional<Timestamp> getLocalSnapshot() final; + void setLastApplied(const Timestamp& timestamp) final; + boost::optional<Timestamp> getLastApplied() final; void dropAllSnapshots() final; // @@ -70,16 +70,6 @@ public: RoundUpPreparedTimestamps roundUpPreparedTimestamps) const; /** - * Starts a transaction on the last stable local timestamp, set by setLocalSnapshot. - * - * Throws if no local snapshot has been set. - */ - Timestamp beginTransactionOnLocalSnapshot( - WT_SESSION* session, - PrepareConflictBehavior prepareConflictBehavior, - RoundUpPreparedTimestamps roundUpPreparedTimestamps) const; - - /** * Returns lowest SnapshotName that could possibly be used by a future call to * beginTransactionOnCommittedSnapshot, or boost::none if there is currently no committed * snapshot. @@ -95,9 +85,9 @@ private: MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_committedSnapshotMutex"); boost::optional<Timestamp> _committedSnapshot; - // Snapshot to use for reads at a local stable timestamp. - mutable Mutex _localSnapshotMutex = // Guards _localSnapshot. - MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_localSnapshotMutex"); - boost::optional<Timestamp> _localSnapshot; + // Timestamp to use for reads at a the lastApplied timestamp. + mutable Mutex _lastAppliedMutex = // Guards _lastApplied. + MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_lastAppliedMutex"); + boost::optional<Timestamp> _lastApplied; }; } // namespace mongo diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index d148f48d416..fca1968b3e4 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -1525,7 +1525,7 @@ public: // This test does not run a real ReplicationCoordinator, so must advance the snapshot // manager manually. auto storageEngine = cc().getServiceContext()->getStorageEngine(); - storageEngine->getSnapshotManager()->setLocalSnapshot(presentTs); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); const auto beforeTxnTime = _clock->reserveTicks(1); auto beforeTxnTs = beforeTxnTime.asTimestamp(); @@ -3106,7 +3106,7 @@ public: // This test does not run a real ReplicationCoordinator, so must advance the snapshot // manager manually. auto storageEngine = cc().getServiceContext()->getStorageEngine(); - storageEngine->getSnapshotManager()->setLocalSnapshot(presentTs); + storageEngine->getSnapshotManager()->setLastApplied(presentTs); const auto beforeTxnTime = _clock->reserveTicks(1); beforeTxnTs = beforeTxnTime.asTimestamp(); commitEntryTs = beforeTxnTime.addTicks(1).asTimestamp(); |