SERVER-46721 Secondary readers should read at the no-overlap time instead of lastApplied

The no-overlap time, ReadSource::kNoOverlap, is the minimum of replication's lastApplied timestamp and WiredTiger's all_durable time. This time is independent of replication state and ensures queries do not see oplog holes after state transitions from secondary to primary.
author: Louis Williams <louis.williams@mongodb.com> 2020-05-12 13:39:31 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-05-12 18:03:48 +0000
commit: 25c694f365db0f07a445bd17b6cd5cbf32f5f2f9 (patch)
tree: c90451e347838f428b8cad851531b42c42cce6fa /src
parent: e2602ad053b2120982fbcac8e33e1ad64e6ec30a (diff)
download: mongo-25c694f365db0f07a445bd17b6cd5cbf32f5f2f9.tar.gz
28 files changed, 604 insertions, 299 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 55595f5999b..9519d1207f1 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -602,6 +602,7 @@ env.Library(
     LIBDEPS_PRIVATE=[
         '$BUILD_DIR/mongo/idl/server_parameter',
         'catalog/database_holder',
+        'storage/snapshot_helper',
     ],
 )
 
diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp
index 50913896118..733ed190250 100644
--- a/src/mongo/db/catalog_raii_test.cpp
+++ b/src/mongo/db/catalog_raii_test.cpp
@@ -260,8 +260,8 @@ TEST_F(ReadSourceScopeTest, RestoreReadSource) {
         ReadSourceScope scope(opCtx());
         ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kUnset);
 
-        opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kLastApplied);
-        ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kLastApplied);
+        opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kNoOverlap);
+        ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoOverlap);
         ASSERT_EQ(opCtx()->recoveryUnit()->getPointInTimeReadTimestamp(), boost::none);
     }
     ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kProvided);
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index f183225cd8c..97e6f6b79ea 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -148,6 +148,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg
         switch (rcArgs.getMajorityReadMechanism()) {
             case repl::ReadConcernArgs::MajorityReadMechanism::kMajoritySnapshot: {
                 // Make sure we read from the majority snapshot.
+                opCtx->recoveryUnit()->abandonSnapshot();
                 opCtx->recoveryUnit()->setTimestampReadSource(
                     RecoveryUnit::ReadSource::kMajorityCommitted);
                 uassertStatusOK(opCtx->recoveryUnit()->obtainMajorityCommittedSnapshot());
@@ -156,6 +157,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg
             case repl::ReadConcernArgs::MajorityReadMechanism::kSpeculative: {
                 // Mark the operation as speculative and select the correct read source.
                 repl::SpeculativeMajorityReadInfo::get(opCtx).setIsSpeculativeRead();
+                opCtx->recoveryUnit()->abandonSnapshot();
                 opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
                 break;
             }
@@ -167,6 +169,7 @@ void applyCursorReadConcern(OperationContext* opCtx, repl::ReadConcernArgs rcArg
         !opCtx->inMultiDocumentTransaction()) {
         auto atClusterTime = rcArgs.getArgsAtClusterTime();
         invariant(atClusterTime && *atClusterTime != LogicalTime::kUninitialized);
+        opCtx->recoveryUnit()->abandonSnapshot();
         opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided,
                                                       atClusterTime->asTimestamp());
     }
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index 34775dd7d01..53dda3ca490 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -39,6 +39,7 @@
 #include "mongo/db/db_raii_gen.h"
 #include "mongo/db/repl/replication_coordinator.h"
 #include "mongo/db/s/collection_sharding_state.h"
+#include "mongo/db/storage/snapshot_helper.h"
 #include "mongo/logv2/log.h"
 
 namespace mongo {
@@ -112,7 +113,7 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
     _autoColl.emplace(opCtx, nsOrUUID, collectionLockMode, viewMode, deadline);
 
     // If the read source is explicitly set to kNoTimestamp, we read the most up to date data and do
-    // not consider reading at last applied (e.g. FTDC needs that).
+    // not consider reading at the no-overlap point (e.g. FTDC needs that).
     if (opCtx->recoveryUnit()->getTimestampReadSource() == RecoveryUnit::ReadSource::kNoTimestamp)
         return;
 
@@ -123,10 +124,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
     // need to check for pending catalog changes.
     while (auto coll = _autoColl->getCollection()) {
 
-        auto readSource = opCtx->recoveryUnit()->getTimestampReadSource();
-        auto minSnapshot = coll->getMinimumVisibleSnapshot();
-        auto mySnapshot = opCtx->recoveryUnit()->getPointInTimeReadTimestamp();
-
         // TODO(SERVER-47824): Also ban transaction snapshot reads on capped collections.
         uassert(ErrorCodes::SnapshotUnavailable,
                 "Reading from capped collections with readConcern snapshot is not supported "
@@ -135,77 +132,85 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
                     readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern ||
                     opCtx->inMultiDocumentTransaction());
 
-        // If we are reading at a provided timestamp earlier than the latest catalog changes,
-        // then we must return an error.
-        if (readSource == RecoveryUnit::ReadSource::kProvided && minSnapshot &&
-            (*mySnapshot < *minSnapshot)) {
-            uasserted(ErrorCodes::SnapshotUnavailable,
-                      str::stream()
-                          << "Unable to read from a snapshot due to pending collection catalog "
-                             "changes; please retry the operation. Snapshot timestamp is "
-                          << mySnapshot->toString() << ". Collection minimum is "
-                          << minSnapshot->toString());
-        }
-
         // During batch application on secondaries, there is a potential to read inconsistent states
         // that would normally be protected by the PBWM lock. In order to serve secondary reads
         // during this period, we default to not acquiring the lock (by setting
         // _shouldNotConflictWithSecondaryBatchApplicationBlock). On primaries, we always read at a
         // consistent time, so not taking the PBWM lock is not a problem. On secondaries, we have to
-        // guarantee we read at a consistent state, so we must read at the last applied timestamp,
-        // which is set after each complete batch.
+        // guarantee we read at a consistent state, so we must read at the no-overlap timestamp,
+        // which is a function of the lastApplied timestamp, which is set after each complete batch.
         //
-        // If an attempt to read at the last applied timestamp is unsuccessful because there are
-        // pending catalog changes that occur after the last applied timestamp, we release our locks
+        // If an attempt to read at the no-overlap timestamp is unsuccessful because there are
+        // pending catalog changes that occur after the no-overlap timestamp, we release our locks
         // and try again with the PBWM lock (by unsetting
         // _shouldNotConflictWithSecondaryBatchApplicationBlock).
 
         const NamespaceString nss = coll->ns();
+        auto readSource = opCtx->recoveryUnit()->getTimestampReadSource();
 
-        bool readAtLastAppliedTimestamp =
-            _shouldReadAtLastAppliedTimestamp(opCtx, nss, readConcernLevel);
-
-        if (readAtLastAppliedTimestamp) {
-            opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied);
-            readSource = opCtx->recoveryUnit()->getTimestampReadSource();
+        // Once we have our locks, check whether or not we should override the ReadSource that was
+        // set before acquiring locks.
+        if (auto newReadSource = SnapshotHelper::getNewReadSource(opCtx, nss)) {
+            opCtx->recoveryUnit()->setTimestampReadSource(*newReadSource);
+            readSource = *newReadSource;
         }
 
-        // This timestamp could be earlier than the timestamp seen when the transaction is opened
-        // because it is set asynchonously. This is not problematic because holding the collection
-        // lock guarantees no metadata changes will occur in that time.
-        auto lastAppliedTimestamp = readAtLastAppliedTimestamp
-            ? boost::optional<Timestamp>(replCoord->getMyLastAppliedOpTime().getTimestamp())
-            : boost::none;
+        const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp();
+        const auto afterClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAfterClusterTime();
+        if (readTimestamp && afterClusterTime) {
+            // Readers that use afterClusterTime have already waited at a higher level for the
+            // lastApplied time to advance to a specified optime, and they assume the read timestamp
+            // of the operation is at least that waited-for timestamp. For kNoOverlap, which is the
+            // minimum of lastApplied and all_durable, this invariant ensures that afterClusterTime
+            // reads do not choose a read timestamp older than the one requested.
+            invariant(*readTimestamp >= afterClusterTime->asTimestamp(),
+                      str::stream() << "read timestamp " << readTimestamp->toString()
+                                    << "was less than afterClusterTime: "
+                                    << afterClusterTime->asTimestamp().toString());
+        }
 
-        if (!_conflictingCatalogChanges(opCtx, minSnapshot, lastAppliedTimestamp)) {
+        auto minSnapshot = coll->getMinimumVisibleSnapshot();
+        if (!SnapshotHelper::collectionChangesConflictWithRead(minSnapshot, readTimestamp)) {
             return;
         }
 
-        invariant(lastAppliedTimestamp ||
-                  // The kMajorityCommitted and kNoOverlap read sources already read from timestamps
-                  // that are safe with respect to concurrent secondary batch application.
-                  readSource == RecoveryUnit::ReadSource::kMajorityCommitted ||
-                  readSource == RecoveryUnit::ReadSource::kNoOverlap);
+        // If we are reading at a provided timestamp earlier than the latest catalog changes,
+        // then we must return an error.
+        if (readSource == RecoveryUnit::ReadSource::kProvided) {
+            uasserted(ErrorCodes::SnapshotUnavailable,
+                      str::stream()
+                          << "Unable to read from a snapshot due to pending collection catalog "
+                             "changes; please retry the operation. Snapshot timestamp is "
+                          << readTimestamp->toString() << ". Collection minimum is "
+                          << minSnapshot->toString());
+        }
+
+        invariant(
+            // The kMajorityCommitted and kNoOverlap read sources already read from timestamps
+            // that are safe with respect to concurrent secondary batch application, and are
+            // eligible for retrying.
+            readSource == RecoveryUnit::ReadSource::kMajorityCommitted ||
+            readSource == RecoveryUnit::ReadSource::kNoOverlap);
         invariant(readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern);
 
         // Yield locks in order to do the blocking call below.
         _autoColl = boost::none;
 
-        // If there are pending catalog changes, we should conflict with any in-progress batches (by
-        // taking the PBWM lock) and choose not to read from the last applied timestamp by unsetting
-        // _shouldNotConflictWithSecondaryBatchApplicationBlock. Index builds on secondaries can
-        // complete at timestamps later than the lastAppliedTimestamp during initial sync. After
-        // initial sync finishes, if we waited instead of retrying, readers would block indefinitely
-        // waiting for the lastAppliedTimestamp to move forward. Instead we force the reader take
-        // the PBWM lock and retry.
-        if (lastAppliedTimestamp) {
+        // If there are pending catalog changes when using a no-overlap read source, we choose to
+        // take the PBWM lock to conflict with any in-progress batches. This prevents us from idly
+        // spinning in this loop trying to get a new read timestamp ahead of the minimum visible
+        // snapshot. This helps us guarantee liveness (i.e. we can eventually get a suitable read
+        // timestamp) but should not be necessary for correctness. After initial sync finishes, if
+        // we waited instead of retrying, readers would block indefinitely waiting for the
+        // noOverlap time to move forward. Instead we force the reader take the PBWM lock and retry.
+        if (readSource == RecoveryUnit::ReadSource::kNoOverlap) {
+            invariant(readTimestamp);
             LOGV2(20576,
-                  "tried reading at last-applied time: {lastAppliedTimestamp} on ns: {nss_ns}, but "
-                  "future catalog changes are pending at time {minSnapshot}. Trying again without "
-                  "reading at last-applied time.",
-                  "lastAppliedTimestamp"_attr = *lastAppliedTimestamp,
-                  "nss_ns"_attr = nss.ns(),
-                  "minSnapshot"_attr = *minSnapshot);
+                  "Tried reading at no-overlap time, but future catalog changes are pending. "
+                  "Trying again without reading at no-overlap time.",
+                  "noOverlapTimestamp"_attr = *readTimestamp,
+                  "collection"_attr = nss.ns(),
+                  "collectionMinSnapshot"_attr = *minSnapshot);
             // Destructing the block sets _shouldConflictWithSecondaryBatchApplication back to the
             // previous value. If the previous value is false (because there is another
             // shouldNotConflictWithSecondaryBatchApplicationBlock outside of this function), this
@@ -220,28 +225,12 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
                 ErrorCodes::SnapshotUnavailable,
                 str::stream() << "Unable to read from a snapshot due to pending collection catalog "
                                  "changes; please retry the operation. Snapshot timestamp is "
-                              << (mySnapshot ? mySnapshot->toString() : "(none)")
-                              << ". Collection minimum is " << minSnapshot->toString(),
+                              << readTimestamp->toString() << ". Collection minimum is "
+                              << minSnapshot->toString(),
                 opCtx->lockState()->shouldConflictWithSecondaryBatchApplication());
 
-            // Cannot change ReadSource while a RecoveryUnit is active, which may result from
-            // calling getPointInTimeReadTimestamp().
-            opCtx->recoveryUnit()->abandonSnapshot();
-            opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
-        }
-
-        // If there are pending catalog changes when using a no-overlap read source, we choose to
-        // take the PBWM lock to conflict with any in-progress batches. This prevents us from idly
-        // spinning in this loop trying to get a new read timestamp ahead of the minimum visible
-        // snapshot. This helps us guarantee liveness (i.e. we can eventually get a suitable read
-        // timestamp) but should not be necessary for correctness.
-        if (readSource == RecoveryUnit::ReadSource::kNoOverlap) {
-            invariant(!lastAppliedTimestamp);  // no-overlap read source selects its own timestamp.
-            _shouldNotConflictWithSecondaryBatchApplicationBlock = boost::none;
-            invariant(opCtx->lockState()->shouldConflictWithSecondaryBatchApplication());
-
-            // Abandon our snapshot but don't change our read source, so that we can select a new
-            // read timestamp on the next loop iteration.
+            // Abandon our snapshot. We may select a new read timestamp or ReadSource in the next
+            // loop iteration.
             opCtx->recoveryUnit()->abandonSnapshot();
         }
 
@@ -259,84 +248,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx,
     }
 }
 
-bool AutoGetCollectionForRead::_shouldReadAtLastAppliedTimestamp(
-    OperationContext* opCtx,
-    const NamespaceString& nss,
-    repl::ReadConcernLevel readConcernLevel) const {
-
-    // If this block is unset, then the operation did not opt-out of the PBWM lock, implying that it
-    // cannot read at lastApplied. It's important to note that it is possible for this to be set,
-    // but still be holding the PBWM lock, explained below.
-    if (!_shouldNotConflictWithSecondaryBatchApplicationBlock) {
-        return false;
-    }
-
-    // If we are already holding the PBWM lock, do not read at last-applied. This is because once an
-    // operation reads without a timestamp (effectively seeing all writes), it is no longer safe to
-    // start reading at a timestamp, as writes or catalog operations may appear to vanish.
-    // This may occur when multiple collection locks are held concurrently, which is often the case
-    // when DBDirectClient is used.
-    if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) {
-        LOGV2_DEBUG(20577, 1, "not reading at last-applied because the PBWM lock is held");
-        return false;
-    }
-
-    // Majority and snapshot readConcern levels should not read from lastApplied; these read
-    // concerns already have a designated timestamp to read from.
-    if (readConcernLevel != repl::ReadConcernLevel::kLocalReadConcern &&
-        readConcernLevel != repl::ReadConcernLevel::kAvailableReadConcern) {
-        return false;
-    }
-
-    // If we are in a replication state (like secondary or primary catch-up) where we are not
-    // accepting writes, we should read at lastApplied. If this node can accept writes, then no
-    // conflicting replication batches are being applied and we can read from the default snapshot.
-    if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) {
-        return false;
-    }
-
-    // Non-replicated collections do not need to read at lastApplied, as those collections are not
-    // written by the replication system.  However, the oplog is special, as it *is* written by the
-    // replication system.
-    if (!nss.isReplicated() && !nss.isOplog()) {
-        return false;
-    }
-
-    return true;
-}
-
-bool AutoGetCollectionForRead::_conflictingCatalogChanges(
-    OperationContext* opCtx,
-    boost::optional<Timestamp> minSnapshot,
-    boost::optional<Timestamp> lastAppliedTimestamp) const {
-    // This is the timestamp of the most recent catalog changes to this collection. If this is
-    // greater than any point in time read timestamps, we should either wait or return an error.
-    if (!minSnapshot) {
-        return false;
-    }
-
-    // If we are reading from the lastAppliedTimestamp and it is up-to-date with any catalog
-    // changes, we can return.
-    if (lastAppliedTimestamp &&
-        (lastAppliedTimestamp->isNull() || *lastAppliedTimestamp >= *minSnapshot)) {
-        return false;
-    }
-
-    // This can be set when readConcern is "snapshot" or "majority".
-    auto mySnapshot = opCtx->recoveryUnit()->getPointInTimeReadTimestamp();
-
-    // If we do not have a point in time to conflict with minSnapshot, return.
-    if (!mySnapshot && !lastAppliedTimestamp) {
-        return false;
-    }
-
-    // Return if there are no conflicting catalog changes with mySnapshot.
-    if (mySnapshot && *mySnapshot >= *minSnapshot) {
-        return false;
-    }
-
-    return true;
-}
 
 AutoGetCollectionForReadCommand::AutoGetCollectionForReadCommand(
     OperationContext* opCtx,
diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h
index edf32cd0010..51208643c6a 100644
--- a/src/mongo/db/db_raii.h
+++ b/src/mongo/db/db_raii.h
@@ -133,19 +133,6 @@ private:
     // This field is optional, because the code to wait for majority committed snapshot needs to
     // release locks in order to block waiting
     boost::optional<AutoGetCollection> _autoColl;
-
-    // Returns true if we should read at the last applied timestamp instead of at "no" timestamp
-    // (i.e. reading with the "latest" snapshot reflecting all writes).  Reading at the last applied
-    // timestamp avoids reading in-flux data actively being written by the replication system.
-    bool _shouldReadAtLastAppliedTimestamp(OperationContext* opCtx,
-                                           const NamespaceString& nss,
-                                           repl::ReadConcernLevel readConcernLevel) const;
-
-    // Returns true if the minSnapshot causes conflicting catalog changes for either the provided
-    // lastAppliedTimestamp or the point-in-time snapshot of the RecoveryUnit on 'opCtx'.
-    bool _conflictingCatalogChanges(OperationContext* opCtx,
-                                    boost::optional<Timestamp> minSnapshot,
-                                    boost::optional<Timestamp> lastAppliedTimestamp) const;
 };
 
 /**
diff --git a/src/mongo/db/db_raii_test.cpp b/src/mongo/db/db_raii_test.cpp
index 4b9a3bac3c4..5258a51a33b 100644
--- a/src/mongo/db/db_raii_test.cpp
+++ b/src/mongo/db/db_raii_test.cpp
@@ -37,6 +37,8 @@
 #include "mongo/db/client.h"
 #include "mongo/db/concurrency/lock_state.h"
 #include "mongo/db/db_raii.h"
+#include "mongo/db/query/internal_plans.h"
+#include "mongo/db/storage/snapshot_manager.h"
 #include "mongo/logv2/log.h"
 #include "mongo/unittest/unittest.h"
 #include "mongo/util/time_support.h"
@@ -199,8 +201,13 @@ TEST_F(DBRAIITestFixture,
     ASSERT_OK(
         storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions));
     ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+    // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock
+    // class and does not update the correct state in the SnapshotManager.
     repl::OpTime opTime(Timestamp(200, 1), 1);
-    replCoord->setMyLastAppliedOpTimeAndWallTime({opTime, Date_t() + Seconds(1)});
+    auto snapshotManager =
+        client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
+    snapshotManager->setLastApplied(opTime.getTimestamp());
     Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
     ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
 
@@ -220,18 +227,119 @@ TEST_F(DBRAIITestFixture,
     // for the collection.  If we now manually set our last applied time to something very early, we
     // will be guaranteed to hit the logic that triggers when the minimum snapshot time is greater
     // than the read-at time, since we default to reading at last-applied when in SECONDARY state.
+
+    // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock
+    // class and does not update the correct state in the SnapshotManager.
     repl::OpTime opTime(Timestamp(2, 1), 1);
-    replCoord->setMyLastAppliedOpTimeAndWallTime({opTime, Date_t() + Seconds(1)});
+    auto snapshotManager =
+        client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
+    snapshotManager->setLastApplied(opTime.getTimestamp());
+
     Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
     ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
+
     AutoGetCollectionForRead coll(client2.second.get(), NamespaceString("local.system.js"));
+    // Reading from an unreplicated collection does not change the ReadSource to kNoOverlap.
+    ASSERT_EQ(client2.second.get()->recoveryUnit()->getTimestampReadSource(),
+              RecoveryUnit::ReadSource::kUnset);
 
-    // The current code uasserts in this situation, so we confirm that happens here.
+    // Reading from a replicated collection will try to switch to kNoOverlap. Because we are
+    // already reading without a timestamp and we can't reacquire the PBWM lock to continue reading
+    // without a timestamp, we uassert in this situation.
     ASSERT_THROWS_CODE(AutoGetCollectionForRead(client2.second.get(), nss),
                        DBException,
                        ErrorCodes::SnapshotUnavailable);
 }
 
+TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedConflict) {
+    // This test simulates a situation where AutoGetCollectionForRead cant read at the no-overlap
+    // point (minimum of all_durable and lastApplied) because it is set to a point earlier than the
+    // catalog change. We expect to read without a timestamp and hold the PBWM lock.
+    auto replCoord = repl::ReplicationCoordinator::get(client1.second.get());
+    CollectionOptions defaultCollectionOptions;
+    ASSERT_OK(
+        storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions));
+    ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+    // Note that when the collection was created, above, the system chooses a minimum snapshot time
+    // for the collection.  If we now manually set our last applied time to something very early, we
+    // will be guaranteed to hit the logic that triggers when the minimum snapshot time is greater
+    // than the read-at time, since we default to reading at last-applied when in SECONDARY state.
+
+    // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock
+    // class and does not update the correct state in the SnapshotManager.
+    repl::OpTime opTime(Timestamp(2, 1), 1);
+    auto snapshotManager =
+        client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
+    snapshotManager->setLastApplied(opTime.getTimestamp());
+    AutoGetCollectionForRead coll(client1.second.get(), nss);
+
+    // We can't read from kNoOverlap in this scenario because there is a catalog conflict. Resort
+    // to taking the PBWM lock and reading without a timestamp.
+    ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
+              RecoveryUnit::ReadSource::kUnset);
+    ASSERT_TRUE(client1.second.get()->lockState()->isLockHeldForMode(
+        resourceIdParallelBatchWriterMode, MODE_IS));
+}
+
+TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) {
+    // This test simulates a situation where AutoGetCollectionForRead reads at the no-overlap
+    // point (minimum of all_durable and lastApplied) even though lastApplied is not available.
+    auto replCoord = repl::ReplicationCoordinator::get(client1.second.get());
+    CollectionOptions defaultCollectionOptions;
+    ASSERT_OK(
+        storageInterface()->createCollection(client1.second.get(), nss, defaultCollectionOptions));
+    ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY));
+
+    // Note that when the collection was created, above, the system chooses a minimum snapshot time
+    // for the collection. Since last-applied isn't available, we default to all_durable, which is
+    // available, and is greater than the collection minimum snapshot.
+    auto snapshotManager =
+        client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager();
+    ASSERT_FALSE(snapshotManager->getLastApplied());
+    AutoGetCollectionForRead coll(client1.second.get(), nss);
+
+    // Even though lastApplied isn't available, the ReadSource is set to kNoOverlap, which reads
+    // at the all_durable time.
+    ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(),
+              RecoveryUnit::ReadSource::kNoOverlap);
+    ASSERT_TRUE(client1.second.get()->recoveryUnit()->getPointInTimeReadTimestamp());
+    ASSERT_FALSE(client1.second.get()->lockState()->isLockHeldForMode(
+        resourceIdParallelBatchWriterMode, MODE_IS));
+}
+
+TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesNoOverlapOnSecondary) {
+    auto opCtx = client1.second.get();
+    ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {}));
+    ASSERT_OK(
+        repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY));
+    AutoGetCollectionForRead autoColl(opCtx, nss);
+    auto exec = InternalPlanner::collectionScan(opCtx,
+                                                nss.ns(),
+                                                autoColl.getCollection(),
+                                                PlanExecutor::YIELD_MANUAL,
+                                                InternalPlanner::FORWARD);
+
+    // The collection scan should use the default ReadSource on a secondary.
+    ASSERT_EQ(RecoveryUnit::ReadSource::kNoOverlap,
+              opCtx->recoveryUnit()->getTimestampReadSource());
+
+    // While yielding the collection scan, simulate stepping-up to a primary.
+    exec->saveState();
+    Locker::LockSnapshot lockSnapshot;
+    ASSERT_TRUE(opCtx->lockState()->saveLockStateAndUnlock(&lockSnapshot));
+    ASSERT_OK(
+        repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_PRIMARY));
+
+    // After restoring, the collection scan should now be reading with kNoOverlap, the default on
+    // secondaries.
+    opCtx->lockState()->restoreLockState(opCtx, lockSnapshot);
+    exec->restoreState();
+    ASSERT_EQ(RecoveryUnit::ReadSource::kNoOverlap,
+              opCtx->recoveryUnit()->getTimestampReadSource());
+    BSONObj obj;
+    ASSERT_EQUALS(PlanExecutor::IS_EOF, exec->getNext(&obj, nullptr));
+}
 
 }  // namespace
 }  // namespace mongo
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index 839ca031062..16e2a483ca7 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -2037,6 +2037,9 @@ void IndexBuildsCoordinator::_runIndexBuildInner(OperationContext* opCtx,
 void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx,
                                          std::shared_ptr<ReplIndexBuildState> replState,
                                          const IndexBuildOptions& indexBuildOptions) {
+    // Read without a timestamp. When we commit, we block writes which guarantees all writes are
+    // visible.
+    opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
     _scanCollectionAndInsertKeysIntoSorter(opCtx, replState);
     _insertKeysFromSideTablesWithoutBlockingWrites(opCtx, replState);
     _signalPrimaryForCommitReadiness(opCtx, replState);
diff --git a/src/mongo/db/repl/oplog_batcher.cpp b/src/mongo/db/repl/oplog_batcher.cpp
index 27653ab21dc..aba27772547 100644
--- a/src/mongo/db/repl/oplog_batcher.cpp
+++ b/src/mongo/db/repl/oplog_batcher.cpp
@@ -363,8 +363,8 @@ std::size_t getBatchLimitOplogBytes(OperationContext* opCtx, StorageInterface* s
     // We can't change the timestamp source within a write unit of work.
     invariant(!opCtx->lockState()->inAWriteUnitOfWork());
     // We're only reading oplog metadata, so the timestamp is not important.  If we read with the
-    // default (which is kLastApplied on secondaries), we may end up with a reader that is at
-    // kLastApplied.  If we then roll back, then when we reconstruct prepared transactions during
+    // default (which is lastApplied on secondaries), we may end up with a reader that is at
+    // lastApplied.  If we then roll back, then when we reconstruct prepared transactions during
     // rollback recovery we will be preparing transactions before the read timestamp, which triggers
     // an assertion in WiredTiger.
     ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
diff --git a/src/mongo/db/repl/replication_coordinator_external_state.h b/src/mongo/db/repl/replication_coordinator_external_state.h
index e5ca3cfe12c..29cc6dd9c5b 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state.h
@@ -260,11 +260,11 @@ public:
     virtual void updateCommittedSnapshot(const OpTime& newCommitPoint) = 0;
 
     /**
-     * Updates the local snapshot to a consistent point for secondary reads.
+     * Updates the lastApplied snapshot to a consistent point for secondary reads.
      *
-     * It is illegal to call with a optime that does not name an existing snapshot.
+     * It is illegal to call with a non-existent optime.
      */
-    virtual void updateLocalSnapshot(const OpTime& optime) = 0;
+    virtual void updateLastAppliedSnapshot(const OpTime& optime) = 0;
 
     /**
      * Returns whether or not the SnapshotThread is active.
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 9fa64da2a2e..ce80ab0acac 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -356,7 +356,7 @@ void ReplicationCoordinatorExternalStateImpl::clearAppliedThroughIfCleanShutdown
     }
 
     // Ensure that all writes are visible before reading. If we failed mid-batch, it would be
-    // possible to read from a kLastApplied ReadSource where not all writes to the minValid document
+    // possible to read from a kNoOverlap ReadSource where not all writes to the minValid document
     // are visible, generating a writeConflict that would not resolve.
     opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
 
@@ -972,10 +972,10 @@ void ReplicationCoordinatorExternalStateImpl::updateCommittedSnapshot(
     notifyOplogMetadataWaiters(newCommitPoint);
 }
 
-void ReplicationCoordinatorExternalStateImpl::updateLocalSnapshot(const OpTime& optime) {
+void ReplicationCoordinatorExternalStateImpl::updateLastAppliedSnapshot(const OpTime& optime) {
     auto manager = _service->getStorageEngine()->getSnapshotManager();
     if (manager) {
-        manager->setLocalSnapshot(optime.getTimestamp());
+        manager->setLastApplied(optime.getTimestamp());
     }
 }
 
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index d1d12e285b3..f38aee76a39 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -104,7 +104,7 @@ public:
     virtual bool tooStale();
     void dropAllSnapshots() final;
     void updateCommittedSnapshot(const OpTime& newCommitPoint) final;
-    void updateLocalSnapshot(const OpTime& optime) final;
+    void updateLastAppliedSnapshot(const OpTime& optime) final;
     virtual bool snapshotsEnabled() const;
     virtual void notifyOplogMetadataWaiters(const OpTime& committedOpTime);
     boost::optional<OpTime> getEarliestDropPendingOpTime() const final;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
index 84986f783c8..4c9ddbaf7f1 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.cpp
@@ -250,7 +250,7 @@ void ReplicationCoordinatorExternalStateMock::dropAllSnapshots() {}
 void ReplicationCoordinatorExternalStateMock::updateCommittedSnapshot(
     const OpTime& newCommitPoint) {}
 
-void ReplicationCoordinatorExternalStateMock::updateLocalSnapshot(const OpTime& optime) {}
+void ReplicationCoordinatorExternalStateMock::updateLastAppliedSnapshot(const OpTime& optime) {}
 
 bool ReplicationCoordinatorExternalStateMock::snapshotsEnabled() const {
     return _areSnapshotsEnabled;
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_mock.h b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
index 0ef9ad2e893..fd867df8ac7 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_mock.h
@@ -93,7 +93,7 @@ public:
     virtual bool tooStale();
     virtual void dropAllSnapshots();
     virtual void updateCommittedSnapshot(const OpTime& newCommitPoint);
-    virtual void updateLocalSnapshot(const OpTime& optime);
+    virtual void updateLastAppliedSnapshot(const OpTime& optime);
     virtual bool snapshotsEnabled() const;
     virtual void notifyOplogMetadataWaiters(const OpTime& committedOpTime);
     boost::optional<OpTime> getEarliestDropPendingOpTime() const final;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index ebde2673416..bcae984ce91 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1389,6 +1389,14 @@ void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndWallTime(
     // No need to wake up replication waiters because there should not be any replication waiters
     // waiting on our own lastApplied.
 
+    // Update the storage engine's lastApplied snapshot before updating the stable timestamp on the
+    // storage engine. New transactions reading from the lastApplied snapshot should start before
+    // the oldest timestamp is advanced to avoid races. Additionally, update this snapshot before
+    // signaling optime waiters. This avoids a race that would allow optime waiters to open
+    // transactions on stale lastApplied values because they do not hold or reacquire the
+    // replication coordinator mutex when signaled.
+    _externalState->updateLastAppliedSnapshot(opTime);
+
     // Signal anyone waiting on optime changes.
     _opTimeWaiterList.setValueIf_inlock(
         [opTime](const OpTime& waitOpTime, const SharedWaiterHandle& waiter) {
@@ -1396,11 +1404,6 @@ void ReplicationCoordinatorImpl::_setMyLastAppliedOpTimeAndWallTime(
         },
         opTime);
 
-    // Update the local snapshot before updating the stable timestamp on the storage engine. New
-    // transactions reading from the local snapshot should start before the oldest timestamp is
-    // advanced to avoid races.
-    _externalState->updateLocalSnapshot(opTime);
-
     // Notify the oplog waiters after updating the local snapshot.
     signalOplogWaiters();
 
diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp
index 82c406dc480..fc680d1a67a 100644
--- a/src/mongo/db/repl/replication_recovery.cpp
+++ b/src/mongo/db/repl/replication_recovery.cpp
@@ -130,6 +130,7 @@ public:
           _oplogApplicationEndPoint(oplogApplicationEndPoint) {}
 
     void startup(OperationContext* opCtx) final {
+        opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
         _client = std::make_unique<DBDirectClient>(opCtx);
         BSONObj predicate = _oplogApplicationEndPoint
             ? BSON("$gte" << _oplogApplicationStartPoint << "$lte" << *_oplogApplicationEndPoint)
diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp
index b9f75c07d1a..8608926919c 100644
--- a/src/mongo/db/repl/transaction_oplog_application.cpp
+++ b/src/mongo/db/repl/transaction_oplog_application.cpp
@@ -512,8 +512,8 @@ void reconstructPreparedTransactions(OperationContext* opCtx, repl::OplogApplica
     }
     // Read the transactions table and the oplog collection without a timestamp.
     // The below DBDirectClient read uses AutoGetCollectionForRead which could implicitly change the
-    // read source to kLastApplied. So we need to explicitly set the read source to kNoTimestamp to
-    // force reads in this scope to be untimestamped.
+    // read source. So we need to explicitly set the read source to kNoTimestamp to force reads in
+    // this scope to be untimestamped.
     ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp);
 
     DBDirectClient client(opCtx);
diff --git a/src/mongo/db/storage/SConscript b/src/mongo/db/storage/SConscript
index ccd5d048e1d..e32b8ba329a 100644
--- a/src/mongo/db/storage/SConscript
+++ b/src/mongo/db/storage/SConscript
@@ -30,6 +30,23 @@ env.Library(
     )
 
 env.Library(
+    target='snapshot_helper',
+    source=[
+        'snapshot_helper.cpp',
+        ],
+    LIBDEPS=[
+        '$BUILD_DIR/mongo/base',
+        '$BUILD_DIR/mongo/db/namespace_string',
+        ],
+    LIBDEPS_PRIVATE=[
+        '$BUILD_DIR/mongo/db/concurrency/lock_manager',
+        '$BUILD_DIR/mongo/db/repl/read_concern_args',
+        '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
+        'recovery_unit_base',
+        ],
+    )
+
+env.Library(
     target='duplicate_key_error_info',
     source=[
         'duplicate_key_error_info.cpp',
diff --git a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
index aeef71561ae..baecec3c8af 100644
--- a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
+++ b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp
@@ -150,9 +150,9 @@ public:
         return itCountOn(op);
     }
 
-    int itCountLocal() {
+    int itCountLastApplied() {
         auto op = makeOperation();
-        op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied);
+        op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
         return itCountOn(op);
     }
 
@@ -176,14 +176,14 @@ public:
         return std::string(record->data.data());
     }
 
-    boost::optional<Record> readRecordLocal(RecordId id) {
+    boost::optional<Record> readRecordLastApplied(RecordId id) {
         auto op = makeOperation();
-        op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kLastApplied);
+        op->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
         return readRecordOn(op, id);
     }
 
-    std::string readStringLocal(RecordId id) {
-        auto record = readRecordLocal(id);
+    std::string readStringLastApplied(RecordId id) {
+        auto record = readRecordLastApplied(id);
         ASSERT(record);
         return std::string(record->data.data());
     }
@@ -360,7 +360,7 @@ TEST_F(SnapshotManagerTests, UpdateAndDelete) {
     ASSERT(!readRecordCommitted(id));
 }
 
-TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) {
+TEST_F(SnapshotManagerTests, InsertAndReadOnLastAppliedSnapshot) {
     if (!snapshotManager)
         return;  // This test is only for engines that DO support SnapshotManagers.
 
@@ -369,7 +369,7 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) {
     auto id = insertRecordAndCommit();
     auto afterInsert = fetchAndIncrementTimestamp();
 
-    // Not reading on the last local timestamp returns the most recent data.
+    // Not reading on the last applied timestamp returns the most recent data.
     auto op = makeOperation();
     auto ru = op->recoveryUnit();
     ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
@@ -379,18 +379,18 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLocalSnapshot) {
     deleteRecordAndCommit(id);
     auto afterDelete = fetchAndIncrementTimestamp();
 
-    // Reading at the local snapshot timestamps returns data in order.
-    snapshotManager->setLocalSnapshot(beforeInsert);
-    ASSERT_EQ(itCountLocal(), 0);
-    ASSERT(!readRecordLocal(id));
+    // Reading at the last applied snapshot timestamps returns data in order.
+    snapshotManager->setLastApplied(beforeInsert);
+    ASSERT_EQ(itCountLastApplied(), 0);
+    ASSERT(!readRecordLastApplied(id));
 
-    snapshotManager->setLocalSnapshot(afterInsert);
-    ASSERT_EQ(itCountLocal(), 1);
-    ASSERT(readRecordLocal(id));
+    snapshotManager->setLastApplied(afterInsert);
+    ASSERT_EQ(itCountLastApplied(), 1);
+    ASSERT(readRecordLastApplied(id));
 
-    snapshotManager->setLocalSnapshot(afterDelete);
-    ASSERT_EQ(itCountLocal(), 0);
-    ASSERT(!readRecordLocal(id));
+    snapshotManager->setLastApplied(afterDelete);
+    ASSERT_EQ(itCountLastApplied(), 0);
+    ASSERT(!readRecordLastApplied(id));
 }
 
 TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) {
@@ -416,20 +416,20 @@ TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) {
     deleteRecordAndCommit(id);
     auto afterDelete = fetchAndIncrementTimestamp();
 
-    snapshotManager->setLocalSnapshot(beforeInsert);
-    ASSERT_EQ(itCountLocal(), 0);
-    ASSERT(!readRecordLocal(id));
+    snapshotManager->setLastApplied(beforeInsert);
+    ASSERT_EQ(itCountLastApplied(), 0);
+    ASSERT(!readRecordLastApplied(id));
 
-    snapshotManager->setLocalSnapshot(afterInsert);
-    ASSERT_EQ(itCountLocal(), 1);
-    ASSERT_EQ(readStringLocal(id), "Aardvark");
+    snapshotManager->setLastApplied(afterInsert);
+    ASSERT_EQ(itCountLastApplied(), 1);
+    ASSERT_EQ(readStringLastApplied(id), "Aardvark");
 
-    snapshotManager->setLocalSnapshot(afterUpdate);
-    ASSERT_EQ(itCountLocal(), 1);
-    ASSERT_EQ(readStringLocal(id), "Blue spotted stingray");
+    snapshotManager->setLastApplied(afterUpdate);
+    ASSERT_EQ(itCountLastApplied(), 1);
+    ASSERT_EQ(readStringLastApplied(id), "Blue spotted stingray");
 
-    snapshotManager->setLocalSnapshot(afterDelete);
-    ASSERT_EQ(itCountLocal(), 0);
-    ASSERT(!readRecordLocal(id));
+    snapshotManager->setLastApplied(afterDelete);
+    ASSERT_EQ(itCountLastApplied(), 0);
+    ASSERT(!readRecordLastApplied(id));
 }
 }  // namespace mongo
diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h
index f8c70309ae2..eeddfafd624 100644
--- a/src/mongo/db/storage/recovery_unit.h
+++ b/src/mongo/db/storage/recovery_unit.h
@@ -253,7 +253,6 @@ public:
      *  - when using ReadSource::kNoOverlap, the timestamp chosen by the storage engine.
      *  - when using ReadSource::kAllDurableSnapshot, the timestamp chosen using the storage
      * engine's all_durable timestamp.
-     *  - when using ReadSource::kLastApplied, the timestamp chosen using the storage engine's last
      * applied timestamp. Can return boost::none if no timestamp has been established.
      *  - when using ReadSource::kMajorityCommitted, the majority committed timestamp chosen by the
      * storage engine after a transaction has been opened or after a call to
@@ -399,11 +398,6 @@ public:
          */
         kNoOverlap,
         /**
-         * Read from the last applied timestamp. New transactions start at the most up-to-date
-         * timestamp.
-         */
-        kLastApplied,
-        /**
          * Read from the all_durable timestamp. New transactions will always read from the same
          * timestamp and never advance.
          */
@@ -414,6 +408,24 @@ public:
         kProvided
     };
 
+    static std::string toString(ReadSource rs) {
+        switch (rs) {
+            case ReadSource::kUnset:
+                return "kUnset";
+            case ReadSource::kNoTimestamp:
+                return "kNoTimestamp";
+            case ReadSource::kMajorityCommitted:
+                return "kMajorityCommitted";
+            case ReadSource::kNoOverlap:
+                return "kNoOverlap";
+            case ReadSource::kAllDurableSnapshot:
+                return "kAllDurableSnapshot";
+            case ReadSource::kProvided:
+                return "kProvided";
+        }
+        MONGO_UNREACHABLE;
+    }
+
     /**
      * Sets which timestamp to use for read transactions. If 'provided' is supplied, only kProvided
      * is an acceptable input.
diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp
new file mode 100644
index 00000000000..777868f7830
--- /dev/null
+++ b/src/mongo/db/storage/snapshot_helper.cpp
@@ -0,0 +1,151 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/storage/snapshot_helper.h"
+
+#include "mongo/db/repl/read_concern_args.h"
+#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/logv2/log.h"
+
+namespace mongo {
+namespace SnapshotHelper {
+bool canSwitchReadSource(OperationContext* opCtx) {
+
+    // Most readConcerns have behavior controlled at higher levels. Local and available are the only
+    // ReadConcerns that should consider changing, since they read without a timestamp by default.
+    const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel();
+    if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern ||
+        readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) {
+        return true;
+    }
+
+    return false;
+}
+
+bool shouldReadAtNoOverlap(OperationContext* opCtx,
+                           const NamespaceString& nss,
+                           std::string* reason) {
+
+    // If this is true, then the operation opted-in to the PBWM lock, implying that it cannot read
+    // at no-overlap. It's important to note that it is possible for this to be false, but still be
+    // holding the PBWM lock, explained below.
+    if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) {
+        *reason = "conflicts with batch application";
+        return false;
+    }
+
+    // If we are already holding the PBWM lock, do not read at no-overlap. Snapshots acquired by an
+    // operation after a yield/restore must see all writes in the pre-yield snapshot. Once a
+    // snapshot is reading without a timestamp, we choose to continue acquiring snapshots without a
+    // timestamp. This is done in lieu of determining a timestamp far enough in the future that's
+    // guaranteed to observe all previous writes. This may occur when multiple collection locks are
+    // held concurrently, which is often the case when DBDirectClient is used.
+    if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) {
+        *reason = "PBWM lock is held";
+        LOGV2_DEBUG(20577, 1, "not reading at no-overlap because the PBWM lock is held");
+        return false;
+    }
+
+    // If we are in a replication state (like secondary or primary catch-up) where we are not
+    // accepting writes, we should read at no-overlap. If this node can accept writes, then no
+    // conflicting replication batches are being applied and we can read from the default snapshot.
+    if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) {
+        *reason = "primary";
+        return false;
+    }
+
+    // Non-replicated collections do not need to read at no-overlap, as those collections are not
+    // written by the replication system.  However, the oplog is special, as it *is* written by the
+    // replication system.
+    if (!nss.isReplicated() && !nss.isOplog()) {
+        *reason = "unreplicated collection";
+        return false;
+    }
+
+    return true;
+}
+boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx,
+                                                           const NamespaceString& nss) {
+    const bool canSwitch = canSwitchReadSource(opCtx);
+    if (!canSwitch) {
+        return boost::none;
+    }
+
+    const auto existing = opCtx->recoveryUnit()->getTimestampReadSource();
+    std::string reason;
+    const bool readAtNoOverlap = shouldReadAtNoOverlap(opCtx, nss, &reason);
+    if (existing == RecoveryUnit::ReadSource::kUnset) {
+        // Shifting from reading without a timestamp to reading with a timestamp can be dangerous
+        // because writes will appear to vanish. This case is intended for new reads on secondaries
+        // and query yield recovery after state transitions from primary to secondary.
+        if (readAtNoOverlap) {
+            LOGV2_DEBUG(4452901, 2, "Changing ReadSource to kNoOverlap", logAttrs(nss));
+            return RecoveryUnit::ReadSource::kNoOverlap;
+        }
+    } else if (existing == RecoveryUnit::ReadSource::kNoOverlap) {
+        // For some reason, we can no longer read at kNoOverlap.
+        // An operation that yields a timestamped snapshot must restore a snapshot with at least as
+        // large of a timestamp, or with proper consideration of rollback scenarios, no timestamp.
+        // Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to
+        // reading without one. More writes will become visible.
+        if (!readAtNoOverlap) {
+            LOGV2_DEBUG(
+                4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason);
+            // This shift to kUnset assumes that callers will not make future attempts to manipulate
+            // their ReadSources after performing reads at an un-timetamped snapshot. The only
+            // exception is callers of this function that may need to change from kUnset to
+            // kNoOverlap in the event of a catalog conflict or query yield.
+            return RecoveryUnit::ReadSource::kUnset;
+        }
+    }
+    return boost::none;
+}
+
+bool collectionChangesConflictWithRead(boost::optional<Timestamp> collectionMin,
+                                       boost::optional<Timestamp> readTimestamp) {
+    // This is the timestamp of the most recent catalog changes to this collection. If this is
+    // greater than any point in time read timestamps, we should either wait or return an error.
+    if (!collectionMin) {
+        return false;
+    }
+
+    // If we do not have a point in time to conflict with collectionMin, return.
+    if (!readTimestamp || readTimestamp->isNull()) {
+        return false;
+    }
+
+    // Return if there are no conflicting catalog changes with the readTimestamp.
+    return *collectionMin > readTimestamp;
+}
+}  // namespace SnapshotHelper
+}  // namespace mongo
+\ No newline at end of file
diff --git a/src/mongo/db/storage/snapshot_helper.h b/src/mongo/db/storage/snapshot_helper.h
new file mode 100644
index 00000000000..fa8fdd85f24
--- /dev/null
+++ b/src/mongo/db/storage/snapshot_helper.h
@@ -0,0 +1,43 @@
+/**
+ *    Copyright (C) 2020-present MongoDB, Inc.
+ *
+ *    This program is free software: you can redistribute it and/or modify
+ *    it under the terms of the Server Side Public License, version 1,
+ *    as published by MongoDB, Inc.
+ *
+ *    This program is distributed in the hope that it will be useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *    Server Side Public License for more details.
+ *
+ *    You should have received a copy of the Server Side Public License
+ *    along with this program. If not, see
+ *    <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ *    As a special exception, the copyright holders give permission to link the
+ *    code of portions of this program with the OpenSSL library under certain
+ *    conditions as described in each individual source file and distribute
+ *    linked combinations including the program with the OpenSSL library. You
+ *    must comply with the Server Side Public License in all respects for
+ *    all of the code used other than as permitted herein. If you modify file(s)
+ *    with this exception, you may extend this exception to your version of the
+ *    file(s), but you are not obligated to do so. If you do not wish to do so,
+ *    delete this exception statement from your version. If you delete this
+ *    exception statement from all source files in the program, then also delete
+ *    it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/operation_context.h"
+
+namespace mongo {
+namespace SnapshotHelper {
+// Returns a ReadSource if we should change our current ReadSource. Returns boost::none otherwise.
+boost::optional<RecoveryUnit::ReadSource> getNewReadSource(OperationContext* opCtx,
+                                                           const NamespaceString& nss);
+
+bool collectionChangesConflictWithRead(boost::optional<Timestamp> collectionMin,
+                                       boost::optional<Timestamp> readTimestamp);
+}  // namespace SnapshotHelper
+}  // namespace mongo
diff --git a/src/mongo/db/storage/snapshot_manager.h b/src/mongo/db/storage/snapshot_manager.h
index d41df1c5013..7839ef179ba 100644
--- a/src/mongo/db/storage/snapshot_manager.h
+++ b/src/mongo/db/storage/snapshot_manager.h
@@ -59,14 +59,14 @@ public:
     virtual void setCommittedSnapshot(const Timestamp& timestamp) = 0;
 
     /**
-     *  Sets the snapshot for the last stable timestamp for reading on secondaries.
+     *  Sets the lastApplied timestamp.
      */
-    virtual void setLocalSnapshot(const Timestamp& timestamp) = 0;
+    virtual void setLastApplied(const Timestamp& timestamp) = 0;
 
     /**
-     * Returns the local snapshot timestamp.
+     * Returns the lastApplied timestamp.
      */
-    virtual boost::optional<Timestamp> getLocalSnapshot() = 0;
+    virtual boost::optional<Timestamp> getLastApplied() = 0;
 
     /**
      * Drops all snapshots and clears the "committed" snapshot.
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 661b0e009c1..352699a9de8 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -668,6 +668,12 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName,
             setInitialDataTimestamp(_recoveryTimestamp);
             setOldestTimestamp(_recoveryTimestamp, false);
             setStableTimestamp(_recoveryTimestamp, false);
+
+            _sessionCache->snapshotManager().setLastApplied(_recoveryTimestamp);
+            {
+                stdx::lock_guard<Latch> lk(_highestDurableTimestampMutex);
+                _highestSeenDurableTimestamp = _recoveryTimestamp.asULL();
+            }
         }
     }
 
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
index 8cc45e0201e..f9d9751d464 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp
@@ -453,7 +453,6 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp()
         // The following ReadSources can only establish a read timestamp when a transaction is
         // opened.
         case ReadSource::kNoOverlap:
-        case ReadSource::kLastApplied:
         case ReadSource::kAllDurableSnapshot:
             break;
     }
@@ -462,14 +461,14 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp()
     getSession();
 
     switch (_timestampReadSource) {
-        case ReadSource::kLastApplied:
-            // The lastApplied timestamp is not always available, so it is not possible to invariant
-            // that it exists as other ReadSources do.
+        case ReadSource::kNoOverlap:
+            // The lastApplied and allDurable timestamps are not always available if the system has
+            // not accepted writes, so it is not possible to invariant that it exists as other
+            // ReadSources do.
             if (!_readAtTimestamp.isNull()) {
                 return _readAtTimestamp;
             }
             return boost::none;
-        case ReadSource::kNoOverlap:
         case ReadSource::kAllDurableSnapshot:
             invariant(!_readAtTimestamp.isNull());
             return _readAtTimestamp;
@@ -515,17 +514,6 @@ void WiredTigerRecoveryUnit::_txnOpen() {
                     session, _prepareConflictBehavior, _roundUpPreparedTimestamps);
             break;
         }
-        case ReadSource::kLastApplied: {
-            if (_sessionCache->snapshotManager().getLocalSnapshot()) {
-                _readAtTimestamp = _sessionCache->snapshotManager().beginTransactionOnLocalSnapshot(
-                    session, _prepareConflictBehavior, _roundUpPreparedTimestamps);
-            } else {
-                WiredTigerBeginTxnBlock(
-                    session, _prepareConflictBehavior, _roundUpPreparedTimestamps)
-                    .done();
-            }
-            break;
-        }
         case ReadSource::kNoOverlap: {
             _readAtTimestamp = _beginTransactionAtNoOverlapTimestamp(session);
             break;
@@ -555,8 +543,9 @@ void WiredTigerRecoveryUnit::_txnOpen() {
 
     LOGV2_DEBUG(22414,
                 3,
-                "WT begin_transaction for snapshot id {getSnapshotId_toNumber}",
-                "getSnapshotId_toNumber"_attr = getSnapshotId().toNumber());
+                "WT begin_transaction",
+                "snapshotId"_attr = getSnapshotId().toNumber(),
+                "readSource"_attr = toString(_timestampReadSource));
 }
 
 Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllDurableTimestamp(WT_SESSION* session) {
@@ -578,8 +567,8 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllDurableTimestamp(WT_SESS
 
 Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSION* session) {
 
-    auto lastApplied = _sessionCache->snapshotManager().getLocalSnapshot();
-    Timestamp allDurable = Timestamp(_oplogManager->fetchAllDurableValue(session->connection));
+    auto lastApplied = _sessionCache->snapshotManager().getLastApplied();
+    Timestamp allDurable = Timestamp(_sessionCache->getKVEngine()->getAllDurableTimestamp());
 
     // When using timestamps for reads and writes, it's important that readers and writers don't
     // overlap with the timestamps they use. In other words, at any point in the system there should
@@ -607,17 +596,34 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSI
     // should read afterward.
     Timestamp readTimestamp = (lastApplied) ? std::min(*lastApplied, allDurable) : allDurable;
 
+    if (readTimestamp.isNull()) {
+        // When there is not an all_durable or lastApplied timestamp available, read without a
+        // timestamp. Do not round up the read timestamp to the oldest timestamp.
+
+        // There is a race that allows new transactions to start between the time we check for a
+        // read timestamp and start our transaction, which can temporarily violate the contract of
+        // kNoOverlap. That is, writes will be visible that occur after the all_durable time. This
+        // is only possible for readers that start immediately after an initial sync that did not
+        // replicate any oplog entries. Future transactions will start reading at a timestamp once
+        // timestamped writes have been made.
+        WiredTigerBeginTxnBlock txnOpen(
+            session, _prepareConflictBehavior, _roundUpPreparedTimestamps);
+        LOGV2_DEBUG(4452900, 1, "no read timestamp available for kNoOverlap");
+        txnOpen.done();
+        return readTimestamp;
+    }
+
     WiredTigerBeginTxnBlock txnOpen(session,
                                     _prepareConflictBehavior,
                                     _roundUpPreparedTimestamps,
                                     RoundUpReadTimestamp::kRound);
     auto status = txnOpen.setReadSnapshot(readTimestamp);
     fassert(51066, status);
+    txnOpen.done();
 
-    // We might have rounded to oldest between calling getAllDurable and setReadSnapshot. We need
-    // to get the actual read timestamp we used.
+    // We might have rounded to oldest between calling getAllDurable and setReadSnapshot. We
+    // need to get the actual read timestamp we used.
     readTimestamp = _getTransactionReadTimestamp(session);
-    txnOpen.done();
     return readTimestamp;
 }
 
@@ -769,15 +775,14 @@ void WiredTigerRecoveryUnit::setTimestampReadSource(ReadSource readSource,
                                                     boost::optional<Timestamp> provided) {
     LOGV2_DEBUG(22416,
                 3,
-                "setting timestamp read source: {static_cast_int_readSource}, provided timestamp: "
-                "{provided_provided_none}",
-                "static_cast_int_readSource"_attr = static_cast<int>(readSource),
-                "provided_provided_none"_attr = ((provided) ? provided->toString() : "none"));
+                "setting timestamp read source",
+                "readSource"_attr = toString(readSource),
+                "provided"_attr = ((provided) ? provided->toString() : "none"));
 
     invariant(!_isActive() || _timestampReadSource == readSource,
               str::stream() << "Current state: " << toString(_getState())
                             << ". Invalid internal state while setting timestamp read source: "
-                            << static_cast<int>(readSource) << ", provided timestamp: "
+                            << toString(readSource) << ", provided timestamp: "
                             << (provided ? provided->toString() : "none"));
     invariant(!provided == (readSource != ReadSource::kProvided));
     invariant(!(provided && provided->isNull()));
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
index 314a524063e..1f440d319b7 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp
@@ -163,11 +163,14 @@ public:
         clientAndCtx2 = makeClientAndOpCtx(harnessHelper.get(), "reader");
         ru1 = checked_cast<WiredTigerRecoveryUnit*>(clientAndCtx1.second->recoveryUnit());
         ru2 = checked_cast<WiredTigerRecoveryUnit*>(clientAndCtx2.second->recoveryUnit());
+        snapshotManager = dynamic_cast<WiredTigerSnapshotManager*>(
+            harnessHelper->getEngine()->getSnapshotManager());
     }
 
     std::unique_ptr<WiredTigerRecoveryUnitHarnessHelper> harnessHelper;
     ClientAndCtx clientAndCtx1, clientAndCtx2;
     WiredTigerRecoveryUnit *ru1, *ru2;
+    WiredTigerSnapshotManager* snapshotManager;
 
 private:
     const char* wt_uri = "table:prepare_transaction";
@@ -180,6 +183,86 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, SetReadSource) {
     ASSERT_EQ(Timestamp(1, 1), ru1->getPointInTimeReadTimestamp());
 }
 
+TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) {
+    OperationContext* opCtx1 = clientAndCtx1.second.get();
+    std::unique_ptr<RecordStore> rs(harnessHelper->createRecordStore(opCtx1, "a.b"));
+
+    const std::string str = str::stream() << "test";
+    const Timestamp ts1{1, 1};
+    const Timestamp ts2{1, 2};
+    const Timestamp ts3{1, 2};
+
+    RecordId rid1;
+    {
+        WriteUnitOfWork wuow(opCtx1);
+        StatusWith<RecordId> res = rs->insertRecord(opCtx1, str.c_str(), str.size() + 1, ts1);
+        ASSERT_OK(res);
+        wuow.commit();
+        rid1 = res.getValue();
+        snapshotManager->setLastApplied(ts1);
+    }
+
+    // Read without a timestamp. The write should be visible.
+    ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(), RecoveryUnit::ReadSource::kUnset);
+    RecordData unused;
+    ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
+
+    // Read with kNoOverlap. The write should be visible.
+    opCtx1->recoveryUnit()->abandonSnapshot();
+    opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
+    ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
+
+    RecordId rid2, rid3;
+    {
+        // Start, but do not commit a transaction with opCtx2. This sets a timestamp at ts2, which
+        // creates a hole. kNoOverlap, which is a function of all_durable, will only be able to read
+        // at the time immediately before.
+        OperationContext* opCtx2 = clientAndCtx2.second.get();
+        WriteUnitOfWork wuow(opCtx2);
+        StatusWith<RecordId> res =
+            rs->insertRecord(opCtx2, str.c_str(), str.size() + 1, Timestamp());
+        ASSERT_OK(opCtx2->recoveryUnit()->setTimestamp(ts2));
+        ASSERT_OK(res);
+
+        // While holding open a transaction with opCtx2, perform an insert at ts3 with opCtx1. This
+        // creates a "hole".
+        {
+            WriteUnitOfWork wuow(opCtx1);
+            StatusWith<RecordId> res = rs->insertRecord(opCtx1, str.c_str(), str.size() + 1, ts3);
+            ASSERT_OK(res);
+            wuow.commit();
+            rid3 = res.getValue();
+            snapshotManager->setLastApplied(ts3);
+        }
+
+        // Read without a timestamp, and we should see the first and third records.
+        opCtx1->recoveryUnit()->abandonSnapshot();
+        opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset);
+        ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
+        ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused));
+        ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused));
+
+        // Now read at kNoOverlap. Since the transaction at ts2 has not committed, all_durable is
+        // held back to ts1. LastApplied has advanced to ts3, but because kNoOverlap is the minimum,
+        // we should only see one record.
+        opCtx1->recoveryUnit()->abandonSnapshot();
+        opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
+        ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
+        ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused));
+        ASSERT_FALSE(rs->findRecord(opCtx1, rid3, &unused));
+
+        wuow.commit();
+        rid2 = res.getValue();
+    }
+
+    // Now that the hole has been closed, kNoOverlap should see all 3 records.
+    opCtx1->recoveryUnit()->abandonSnapshot();
+    opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoOverlap);
+    ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused));
+    ASSERT_TRUE(rs->findRecord(opCtx1, rid2, &unused));
+    ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused));
+}
+
 TEST_F(WiredTigerRecoveryUnitTestFixture, CreateAndCheckForCachePressure) {
     int time = 1;
 
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp
index 74988508a4c..efc2d8adfe5 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.cpp
@@ -48,17 +48,17 @@ void WiredTigerSnapshotManager::setCommittedSnapshot(const Timestamp& timestamp)
     _committedSnapshot = timestamp;
 }
 
-void WiredTigerSnapshotManager::setLocalSnapshot(const Timestamp& timestamp) {
-    stdx::lock_guard<Latch> lock(_localSnapshotMutex);
+void WiredTigerSnapshotManager::setLastApplied(const Timestamp& timestamp) {
+    stdx::lock_guard<Latch> lock(_lastAppliedMutex);
     if (timestamp.isNull())
-        _localSnapshot = boost::none;
+        _lastApplied = boost::none;
     else
-        _localSnapshot = timestamp;
+        _lastApplied = timestamp;
 }
 
-boost::optional<Timestamp> WiredTigerSnapshotManager::getLocalSnapshot() {
-    stdx::lock_guard<Latch> lock(_localSnapshotMutex);
-    return _localSnapshot;
+boost::optional<Timestamp> WiredTigerSnapshotManager::getLastApplied() {
+    stdx::lock_guard<Latch> lock(_lastAppliedMutex);
+    return _lastApplied;
 }
 
 void WiredTigerSnapshotManager::dropAllSnapshots() {
@@ -93,23 +93,4 @@ Timestamp WiredTigerSnapshotManager::beginTransactionOnCommittedSnapshot(
     return *_committedSnapshot;
 }
 
-Timestamp WiredTigerSnapshotManager::beginTransactionOnLocalSnapshot(
-    WT_SESSION* session,
-    PrepareConflictBehavior prepareConflictBehavior,
-    RoundUpPreparedTimestamps roundUpPreparedTimestamps) const {
-    WiredTigerBeginTxnBlock txnOpen(session, prepareConflictBehavior, roundUpPreparedTimestamps);
-
-    stdx::lock_guard<Latch> lock(_localSnapshotMutex);
-    invariant(_localSnapshot);
-    LOGV2_DEBUG(22427,
-                3,
-                "begin_transaction on local snapshot {localSnapshot_get}",
-                "localSnapshot_get"_attr = _localSnapshot.get().toString());
-    auto status = txnOpen.setReadSnapshot(_localSnapshot.get());
-    fassert(50775, status);
-
-    txnOpen.done();
-    return *_localSnapshot;
-}
-
 }  // namespace mongo
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h
index 1726a7d4c2b..b285c694e70 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_snapshot_manager.h
@@ -51,8 +51,8 @@ public:
     WiredTigerSnapshotManager() = default;
 
     void setCommittedSnapshot(const Timestamp& timestamp) final;
-    void setLocalSnapshot(const Timestamp& timestamp) final;
-    boost::optional<Timestamp> getLocalSnapshot() final;
+    void setLastApplied(const Timestamp& timestamp) final;
+    boost::optional<Timestamp> getLastApplied() final;
     void dropAllSnapshots() final;
 
     //
@@ -70,16 +70,6 @@ public:
         RoundUpPreparedTimestamps roundUpPreparedTimestamps) const;
 
     /**
-     * Starts a transaction on the last stable local timestamp, set by setLocalSnapshot.
-     *
-     * Throws if no local snapshot has been set.
-     */
-    Timestamp beginTransactionOnLocalSnapshot(
-        WT_SESSION* session,
-        PrepareConflictBehavior prepareConflictBehavior,
-        RoundUpPreparedTimestamps roundUpPreparedTimestamps) const;
-
-    /**
      * Returns lowest SnapshotName that could possibly be used by a future call to
      * beginTransactionOnCommittedSnapshot, or boost::none if there is currently no committed
      * snapshot.
@@ -95,9 +85,9 @@ private:
         MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_committedSnapshotMutex");
     boost::optional<Timestamp> _committedSnapshot;
 
-    // Snapshot to use for reads at a local stable timestamp.
-    mutable Mutex _localSnapshotMutex =  // Guards _localSnapshot.
-        MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_localSnapshotMutex");
-    boost::optional<Timestamp> _localSnapshot;
+    // Timestamp to use for reads at a the lastApplied timestamp.
+    mutable Mutex _lastAppliedMutex =  // Guards _lastApplied.
+        MONGO_MAKE_LATCH("WiredTigerSnapshotManager::_lastAppliedMutex");
+    boost::optional<Timestamp> _lastApplied;
 };
 }  // namespace mongo
diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp
index d148f48d416..fca1968b3e4 100644
--- a/src/mongo/dbtests/storage_timestamp_tests.cpp
+++ b/src/mongo/dbtests/storage_timestamp_tests.cpp
@@ -1525,7 +1525,7 @@ public:
         // This test does not run a real ReplicationCoordinator, so must advance the snapshot
         // manager manually.
         auto storageEngine = cc().getServiceContext()->getStorageEngine();
-        storageEngine->getSnapshotManager()->setLocalSnapshot(presentTs);
+        storageEngine->getSnapshotManager()->setLastApplied(presentTs);
 
         const auto beforeTxnTime = _clock->reserveTicks(1);
         auto beforeTxnTs = beforeTxnTime.asTimestamp();
@@ -3106,7 +3106,7 @@ public:
         // This test does not run a real ReplicationCoordinator, so must advance the snapshot
         // manager manually.
         auto storageEngine = cc().getServiceContext()->getStorageEngine();
-        storageEngine->getSnapshotManager()->setLocalSnapshot(presentTs);
+        storageEngine->getSnapshotManager()->setLastApplied(presentTs);
         const auto beforeTxnTime = _clock->reserveTicks(1);
         beforeTxnTs = beforeTxnTime.asTimestamp();
         commitEntryTs = beforeTxnTime.addTicks(1).asTimestamp();
author	Louis Williams <louis.williams@mongodb.com>	2020-05-12 13:39:31 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-05-12 18:03:48 +0000
commit	25c694f365db0f07a445bd17b6cd5cbf32f5f2f9 (patch)
tree	c90451e347838f428b8cad851531b42c42cce6fa /src
parent	e2602ad053b2120982fbcac8e33e1ad64e6ec30a (diff)
download	mongo-25c694f365db0f07a445bd17b6cd5cbf32f5f2f9.tar.gz