From 11c68393df88a6f1ea4855e6ac15e54ca9f9d976 Mon Sep 17 00:00:00 2001 From: Louis Williams Date: Thu, 10 Sep 2020 12:30:21 -0400 Subject: SERVER-48452 Internal readers should default to reading without a timestamp Removes ReadSource::kUnset in favor of kNoTimestamp as the default Makes the following behavioral changes to AutoGetCollectionForRead: * Removes special early-return handling for kNoTimestamp * Only user or DBDirectClient operations are eligible to read at kLastApplied. * Operations only read at kLastApplied when in the SECONDARY state, nothing else. This means most internal operations that use DBDirectClient do not need to use a ReadSourceScope to ensure they read at kNoTimestamp. --- src/mongo/db/catalog_raii.h | 2 +- src/mongo/db/catalog_raii_test.cpp | 6 +- src/mongo/db/db_raii.cpp | 35 +++++++-- src/mongo/db/db_raii_test.cpp | 82 ++++++++++++++++++++-- src/mongo/db/free_mon/free_mon_storage.cpp | 4 ++ src/mongo/db/ftdc/collector.cpp | 5 +- src/mongo/db/index_build_entry_helpers.cpp | 3 +- src/mongo/db/index_builds_coordinator.cpp | 11 +-- src/mongo/db/namespace_string.cpp | 12 ++++ src/mongo/db/namespace_string.h | 5 ++ src/mongo/db/pipeline/document_source_writer.h | 2 +- src/mongo/db/repl/bgsync.cpp | 10 +-- src/mongo/db/repl/collection_bulk_loader_impl.cpp | 2 +- src/mongo/db/repl/oplog_applier_impl.cpp | 5 +- src/mongo/db/repl/oplog_batcher.cpp | 16 +---- ...replication_coordinator_external_state_impl.cpp | 3 +- src/mongo/db/repl/replication_recovery.cpp | 4 +- .../db/repl/transaction_oplog_application.cpp | 14 ++-- .../ephemeral_for_test_kv_engine_test.cpp | 4 +- .../ephemeral_for_test_recovery_unit.cpp | 1 - .../ephemeral_for_test_recovery_unit.h | 2 +- .../db/storage/kv/kv_engine_timestamps_test.cpp | 4 +- src/mongo/db/storage/recovery_unit.h | 10 +-- src/mongo/db/storage/snapshot_helper.cpp | 80 ++++++++++++++------- src/mongo/db/storage/snapshot_helper.h | 4 ++ .../wiredtiger/wiredtiger_recovery_unit.cpp | 4 -- .../storage/wiredtiger/wiredtiger_recovery_unit.h | 2 +- .../wiredtiger/wiredtiger_recovery_unit_test.cpp | 5 +- src/mongo/db/transaction_participant.cpp | 5 +- src/mongo/dbtests/querytests.cpp | 2 +- src/mongo/dbtests/storage_timestamp_tests.cpp | 12 ++-- 31 files changed, 247 insertions(+), 109 deletions(-) diff --git a/src/mongo/db/catalog_raii.h b/src/mongo/db/catalog_raii.h index 47444538dd5..367b87e933b 100644 --- a/src/mongo/db/catalog_raii.h +++ b/src/mongo/db/catalog_raii.h @@ -291,7 +291,7 @@ private: class ReadSourceScope { public: ReadSourceScope(OperationContext* opCtx, - RecoveryUnit::ReadSource readSource = RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource readSource, boost::optional provided = boost::none); ~ReadSourceScope(); diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp index cc222301ca0..e767d1f30ca 100644 --- a/src/mongo/db/catalog_raii_test.cpp +++ b/src/mongo/db/catalog_raii_test.cpp @@ -230,7 +230,7 @@ public: } private: - ReadSource _source = ReadSource::kUnset; + ReadSource _source = ReadSource::kNoTimestamp; boost::optional _timestamp; }; @@ -257,8 +257,8 @@ TEST_F(ReadSourceScopeTest, RestoreReadSource) { ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kProvided); ASSERT_EQ(opCtx()->recoveryUnit()->getPointInTimeReadTimestamp(), Timestamp(1, 2)); { - ReadSourceScope scope(opCtx()); - ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kUnset); + ReadSourceScope scope(opCtx(), ReadSource::kNoTimestamp); + ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoTimestamp); opCtx()->recoveryUnit()->setTimestampReadSource(ReadSource::kNoOverlap); ASSERT_EQ(opCtx()->recoveryUnit()->getTimestampReadSource(), ReadSource::kNoOverlap); diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp index a8329f4641d..22a9181f157 100644 --- a/src/mongo/db/db_raii.cpp +++ b/src/mongo/db/db_raii.cpp @@ -90,6 +90,10 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, const NamespaceStringOrUUID& nsOrUUID, AutoGetCollectionViewMode viewMode, Date_t deadline) { + // The caller was expecting to conflict with batch application before entering this function. + // i.e. the caller does not currently have a ShouldNotConflict... block in scope. + bool callerWasConflicting = opCtx->lockState()->shouldConflictWithSecondaryBatchApplication(); + // Don't take the ParallelBatchWriterMode lock when the server parameter is set and our // storage engine supports snapshot reads. if (gAllowSecondaryReadsDuringBatchApplication.load() && @@ -100,11 +104,6 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, const auto collectionLockMode = getLockModeForQuery(opCtx, nsOrUUID.nss()); _autoColl.emplace(opCtx, nsOrUUID, collectionLockMode, viewMode, deadline); - // If the read source is explicitly set to kNoTimestamp, we read the most up to date data and do - // not consider changing our ReadSource (e.g. FTDC needs that). - if (opCtx->recoveryUnit()->getTimestampReadSource() == RecoveryUnit::ReadSource::kNoTimestamp) - return; - repl::ReplicationCoordinator* const replCoord = repl::ReplicationCoordinator::get(opCtx); const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel(); @@ -154,6 +153,32 @@ AutoGetCollectionForRead::AutoGetCollectionForRead(OperationContext* opCtx, << afterClusterTime->asTimestamp().toString()); } + // This assertion protects operations from reading inconsistent data on secondaries when + // using the default ReadSource of kNoTimestamp. + + // Reading at lastApplied on secondaries is the safest behavior and is enabled for all user + // and DBDirectClient reads using 'local' and 'available' readConcerns. If an internal + // operation wishes to read without a timestamp during a batch, a ShouldNotConflict can + // suppress this fatal assertion with the following considerations: + // * The operation is not reading replicated data in a replication state where batch + // application is active OR + // * Reading inconsistent, out-of-order data is either inconsequential or required by + // the operation. + + // If the caller entered this function expecting to conflict with batch application + // (i.e. no ShouldNotConflict block in scope), but they are reading without a timestamp and + // not holding the PBWM lock, then there is a possibility that this reader may + // unintentionally see inconsistent data during a batch. Certain namespaces are applied + // serially in oplog application, and therefore can be safely read without taking the PBWM + // lock or reading at a timestamp. + if (readSource == RecoveryUnit::ReadSource::kNoTimestamp && callerWasConflicting && + !nss.mustBeAppliedInOwnOplogBatch() && + SnapshotHelper::shouldReadAtLastApplied(opCtx, nss)) { + LOGV2_FATAL(4728700, + "Reading from replicated collection without read timestamp or PBWM lock", + "collection"_attr = nss); + } + auto minSnapshot = coll->getMinimumVisibleSnapshot(); if (!SnapshotHelper::collectionChangesConflictWithRead(minSnapshot, readTimestamp)) { return; diff --git a/src/mongo/db/db_raii_test.cpp b/src/mongo/db/db_raii_test.cpp index b101ce91961..eba322c5581 100644 --- a/src/mongo/db/db_raii_test.cpp +++ b/src/mongo/db/db_raii_test.cpp @@ -42,6 +42,7 @@ #include "mongo/db/query/internal_plans.h" #include "mongo/db/storage/snapshot_manager.h" #include "mongo/logv2/log.h" +#include "mongo/unittest/death_test.h" #include "mongo/unittest/unittest.h" #include "mongo/util/time_support.h" @@ -219,6 +220,8 @@ TEST_F(DBRAIITestFixture, Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX); ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX)); + // Simulate using a DBDirectClient to test this behavior for user reads. + client2.first->setInDirectClient(true); AutoGetCollectionForRead coll(client2.second.get(), nss); } @@ -239,6 +242,8 @@ TEST_F(DBRAIITestFixture, Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX); ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX)); + // Simulate using a DBDirectClient to test this behavior for user reads. + client2.first->setInDirectClient(true); AutoGetCollectionForRead coll(client2.second.get(), nss); } @@ -266,10 +271,12 @@ TEST_F(DBRAIITestFixture, Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX); ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX)); + // Simulate using a DBDirectClient to test this behavior for user reads. + client2.first->setInDirectClient(true); AutoGetCollectionForRead coll(client2.second.get(), NamespaceString("local.system.js")); // Reading from an unreplicated collection does not change the ReadSource to kLastApplied. ASSERT_EQ(client2.second.get()->recoveryUnit()->getTimestampReadSource(), - RecoveryUnit::ReadSource::kUnset); + RecoveryUnit::ReadSource::kNoTimestamp); // Reading from a replicated collection will try to switch to kLastApplied. Because we are // already reading without a timestamp and we can't reacquire the PBWM lock to continue reading @@ -300,12 +307,15 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedConflict) { auto snapshotManager = client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); snapshotManager->setLastApplied(opTime.getTimestamp()); + + // Simulate using a DBDirectClient to test this behavior for user reads. + client1.first->setInDirectClient(true); AutoGetCollectionForRead coll(client1.second.get(), nss); // We can't read from kLastApplied in this scenario because there is a catalog conflict. Resort // to taking the PBWM lock and reading without a timestamp. ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), - RecoveryUnit::ReadSource::kUnset); + RecoveryUnit::ReadSource::kNoTimestamp); ASSERT_TRUE(client1.second.get()->lockState()->isLockHeldForMode( resourceIdParallelBatchWriterMode, MODE_IS)); } @@ -325,6 +335,9 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) { auto snapshotManager = client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); ASSERT_FALSE(snapshotManager->getLastApplied()); + + // Simulate using a DBDirectClient to test this behavior for user reads. + client1.first->setInDirectClient(true); AutoGetCollectionForRead coll(client1.second.get(), nss); ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), @@ -334,6 +347,33 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadLastAppliedUnavailable) { resourceIdParallelBatchWriterMode, MODE_IS)); } +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadOplogOnSecondary) { + // This test simulates a situation where AutoGetCollectionForRead reads at lastApplied on a + // secondary. + auto replCoord = repl::ReplicationCoordinator::get(client1.second.get()); + ASSERT_OK(replCoord->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Ensure the default ReadSource is used. + ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kNoTimestamp); + + // Don't call into the ReplicationCoordinator to update lastApplied because it is only a mock + // class and does not update the correct state in the SnapshotManager. + repl::OpTime opTime(Timestamp(2, 1), 1); + auto snapshotManager = + client1.second.get()->getServiceContext()->getStorageEngine()->getSnapshotManager(); + snapshotManager->setLastApplied(opTime.getTimestamp()); + + // Simulate using a DBDirectClient to test this behavior for user reads. + client1.first->setInDirectClient(true); + AutoGetCollectionForRead coll(client1.second.get(), NamespaceString::kRsOplogNamespace); + + ASSERT_EQ(client1.second.get()->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kLastApplied); + ASSERT_FALSE(client1.second.get()->lockState()->isLockHeldForMode( + resourceIdParallelBatchWriterMode, MODE_IS)); +} + TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesLastAppliedOnSecondary) { auto opCtx = client1.second.get(); @@ -342,11 +382,15 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUsesLastAppliedOnSecondary) { CollectionOptions options; options.capped = true; ASSERT_OK(storageInterface()->createCollection(opCtx, nss, options)); + + // Simulate using a DBDirectClient to test this behavior for user reads. + opCtx->getClient()->setInDirectClient(true); AutoGetCollectionForRead autoColl(opCtx, nss); auto exec = makeTailableQueryPlan(opCtx, autoColl.getCollection()); // The collection scan should use the default ReadSource on a primary. - ASSERT_EQ(RecoveryUnit::ReadSource::kUnset, opCtx->recoveryUnit()->getTimestampReadSource()); + ASSERT_EQ(RecoveryUnit::ReadSource::kNoTimestamp, + opCtx->recoveryUnit()->getTimestampReadSource()); // When the tailable query recovers from its yield, it should discover that the node is // secondary and change its read source. @@ -373,6 +417,9 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadChangedReadSourceAfterStepUp) ASSERT_OK(storageInterface()->createCollection(opCtx, nss, options)); ASSERT_OK( repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Simulate using a DBDirectClient to test this behavior for user reads. + opCtx->getClient()->setInDirectClient(true); AutoGetCollectionForRead autoColl(opCtx, nss); auto exec = makeTailableQueryPlan(opCtx, autoColl.getCollection()); @@ -390,9 +437,36 @@ TEST_F(DBRAIITestFixture, AutoGetCollectionForReadChangedReadSourceAfterStepUp) // After restoring, the collection scan should now be reading with kUnset, the default on // primaries. - ASSERT_EQ(RecoveryUnit::ReadSource::kUnset, opCtx->recoveryUnit()->getTimestampReadSource()); + ASSERT_EQ(RecoveryUnit::ReadSource::kNoTimestamp, + opCtx->recoveryUnit()->getTimestampReadSource()); ASSERT_EQUALS(PlanExecutor::IS_EOF, exec->getNext(&unused, nullptr)); } +DEATH_TEST_F(DBRAIITestFixture, AutoGetCollectionForReadUnsafe, "Fatal assertion") { + auto opCtx = client1.second.get(); + ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {})); + + ASSERT_OK( + repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Non-user read on a replicated collection should fail because we are reading on a secondary + // without a timestamp. + AutoGetCollectionForRead autoColl(opCtx, nss); +} + +TEST_F(DBRAIITestFixture, AutoGetCollectionForReadSafe) { + auto opCtx = client1.second.get(); + ASSERT_OK(storageInterface()->createCollection(opCtx, nss, {})); + + ASSERT_OK( + repl::ReplicationCoordinator::get(opCtx)->setFollowerMode(repl::MemberState::RS_SECONDARY)); + + // Non-user read on a replicated collection should not fail because of the ShouldNotConflict + // block. + ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState()); + + AutoGetCollectionForRead autoColl(opCtx, nss); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/free_mon/free_mon_storage.cpp b/src/mongo/db/free_mon/free_mon_storage.cpp index 7c25c6a671c..89be39295e1 100644 --- a/src/mongo/db/free_mon/free_mon_storage.cpp +++ b/src/mongo/db/free_mon/free_mon_storage.cpp @@ -57,6 +57,10 @@ boost::optional FreeMonStorage::read(OperationContext* opCt auto storageInterface = repl::StorageInterface::get(opCtx); + // Ensure we read without a timestamp. + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); + AutoGetCollectionForRead autoRead(opCtx, NamespaceString::kServerConfigurationNamespace); auto swObj = storageInterface->findById( diff --git a/src/mongo/db/ftdc/collector.cpp b/src/mongo/db/ftdc/collector.cpp index 37dd68b136e..11ba9d4d3a4 100644 --- a/src/mongo/db/ftdc/collector.cpp +++ b/src/mongo/db/ftdc/collector.cpp @@ -70,8 +70,9 @@ std::tuple FTDCCollectorCollection::collect(Client* client) { ShouldNotConflictWithSecondaryBatchApplicationBlock shouldNotConflictBlock(opCtx->lockState()); opCtx->lockState()->skipAcquireTicket(); - // Explicitly start future read transactions without a timestamp. - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); + // Ensure future transactions read without a timestamp. + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); for (auto& collector : _collectors) { BSONObjBuilder subObjBuilder(builder.subobjStart(collector->name())); diff --git a/src/mongo/db/index_build_entry_helpers.cpp b/src/mongo/db/index_build_entry_helpers.cpp index da3f43b29e2..fc689873f6e 100644 --- a/src/mongo/db/index_build_entry_helpers.cpp +++ b/src/mongo/db/index_build_entry_helpers.cpp @@ -254,7 +254,8 @@ Status removeIndexBuildEntry(OperationContext* opCtx, UUID indexBuildUUID) { StatusWith getIndexBuildEntry(OperationContext* opCtx, UUID indexBuildUUID) { // Read the most up to date data. - ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); AutoGetCollectionForRead autoCollection(opCtx, NamespaceString::kIndexBuildEntryNamespace); const Collection* collection = autoCollection.getCollection(); diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp index c8caafc318f..d27dd0848db 100644 --- a/src/mongo/db/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds_coordinator.cpp @@ -2553,7 +2553,8 @@ void IndexBuildsCoordinator::_buildIndex(OperationContext* opCtx, // Read without a timestamp. When we commit, we block writes which guarantees all writes are // visible. - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); // The collection scan might read with a kMajorityCommitted read source, but will restore // kNoTimestamp afterwards. _scanCollectionAndInsertSortedKeysIntoIndex(opCtx, replState); @@ -2655,7 +2656,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites( uassertStatusOK(_indexBuildsManager.drainBackgroundWrites( opCtx, replState->buildUUID, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kYield)); } @@ -2681,7 +2682,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites( uassertStatusOK(_indexBuildsManager.drainBackgroundWrites( opCtx, replState->buildUUID, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); } @@ -2769,7 +2770,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide uassertStatusOK(_indexBuildsManager.drainBackgroundWrites( opCtx, replState->buildUUID, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); try { @@ -2916,7 +2917,7 @@ StatusWith> IndexBuildsCoordinator::_runIndexReb uassertStatusOK(_indexBuildsManager.drainBackgroundWrites( opCtx, replState->buildUUID, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); uassertStatusOK( diff --git a/src/mongo/db/namespace_string.cpp b/src/mongo/db/namespace_string.cpp index 9471aca909c..bee7df5ca40 100644 --- a/src/mongo/db/namespace_string.cpp +++ b/src/mongo/db/namespace_string.cpp @@ -144,6 +144,18 @@ bool NamespaceString::isLegalClientSystemNS() const { return false; } +/** + * Oplog entries on 'system.views' should also be processed one at a time. View catalog immediately + * reflects changes for each oplog entry so we can see inconsistent view catalog if multiple oplog + * entries on 'system.views' are being applied out of the original order. + * + * Process updates to 'admin.system.version' individually as well so the secondary's FCV when + * processing each operation matches the primary's when committing that operation. + */ +bool NamespaceString::mustBeAppliedInOwnOplogBatch() const { + return isSystemDotViews() || isServerConfigurationCollection() || isPrivilegeCollection(); +} + NamespaceString NamespaceString::makeListCollectionsNSS(StringData dbName) { NamespaceString nss(dbName, listCollectionsCursorCol); dassert(nss.isValid()); diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h index a43406f8bd4..e5de9877c84 100644 --- a/src/mongo/db/namespace_string.h +++ b/src/mongo/db/namespace_string.h @@ -337,6 +337,11 @@ public: */ bool isDropPendingNamespace() const; + /** + * Returns true if operations on this namespace must be applied in their own oplog batch. + */ + bool mustBeAppliedInOwnOplogBatch() const; + /** * Returns the drop-pending namespace name for this namespace, provided the given optime. * diff --git a/src/mongo/db/pipeline/document_source_writer.h b/src/mongo/db/pipeline/document_source_writer.h index 9c175890ecf..b91c49a90db 100644 --- a/src/mongo/db/pipeline/document_source_writer.h +++ b/src/mongo/db/pipeline/document_source_writer.h @@ -65,7 +65,7 @@ public: } repl::ReadConcernArgs::get(_opCtx) = repl::ReadConcernArgs(); - _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::kUnset); + _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); } ~DocumentSourceWriteBlock() { diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 77daa595256..a9c8b47e61a 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -700,8 +700,9 @@ void BackgroundSync::_runRollback(OperationContext* opCtx, ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState()); - // Explicitly start future read transactions without a timestamp. - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); + // Ensure future transactions read without a timestamp. + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); // Rollback is a synchronous operation that uses the task executor and may not be // executed inside the fetcher callback. @@ -878,8 +879,9 @@ void BackgroundSync::start(OperationContext* opCtx) { OpTime lastAppliedOpTime; ShouldNotConflictWithSecondaryBatchApplicationBlock noConflict(opCtx->lockState()); - // Explicitly start future read transactions without a timestamp. - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); + // Ensure future transactions read without a timestamp. + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); do { lastAppliedOpTime = _readLastAppliedOpTime(opCtx); diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp index eab00297cdd..23fce736413 100644 --- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp +++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp @@ -278,7 +278,7 @@ Status CollectionBulkLoaderImpl::commit() { status = _idIndexBlock->drainBackgroundWrites( _opCtx.get(), - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, _nss.isSystemDotViews() ? IndexBuildInterceptor::DrainYieldPolicy::kNoYield : IndexBuildInterceptor::DrainYieldPolicy::kYield); if (!status.isOK()) { diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp index 24ff5ad96d6..f769fd14c6d 100644 --- a/src/mongo/db/repl/oplog_applier_impl.cpp +++ b/src/mongo/db/repl/oplog_applier_impl.cpp @@ -779,8 +779,9 @@ Status OplogApplierImpl::applyOplogBatchPerWorker(OperationContext* opCtx, // destroyed by unstash in its destructor. Thus we set the flag explicitly. opCtx->lockState()->setShouldConflictWithSecondaryBatchApplication(false); - // Explicitly start future read transactions without a timestamp. - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); + // Ensure future transactions read without a timestamp. + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); // When querying indexes, we return the record matching the key if it exists, or an adjacent // document. This means that it is possible for us to hit a prepare conflict if we query for an diff --git a/src/mongo/db/repl/oplog_batcher.cpp b/src/mongo/db/repl/oplog_batcher.cpp index 99f7077519d..efd257d26d8 100644 --- a/src/mongo/db/repl/oplog_batcher.cpp +++ b/src/mongo/db/repl/oplog_batcher.cpp @@ -121,13 +121,6 @@ bool isUnpreparedCommit(const OplogEntry& entry) { * the final oplog entry in the transaction is processed individually, since the operations are not * actually run until the commit operation is reached. * - * Oplog entries on 'system.views' should also be processed one at a time. View catalog immediately - * reflects changes for each oplog entry so we can see inconsistent view catalog if multiple oplog - * entries on 'system.views' are being applied out of the original order. - * - * Process updates to 'admin.system.version' individually as well so the secondary's FCV when - * processing each operation matches the primary's when committing that operation. - * * The ends of large transactions (> 16MB) should also be processed immediately on its own in order * to avoid scenarios where parts of the transaction is batched with other operations not in the * transaction. @@ -143,8 +136,7 @@ bool OplogBatcher::mustProcessIndividually(const OplogEntry& entry) { } const auto nss = entry.getNss(); - return nss.isSystemDotViews() || nss.isServerConfigurationCollection() || - nss.isPrivilegeCollection(); + return nss.mustBeAppliedInOwnOplogBatch(); } std::size_t OplogBatcher::getOpCount(const OplogEntry& entry) { @@ -355,12 +347,6 @@ std::size_t getBatchLimitOplogEntries() { std::size_t getBatchLimitOplogBytes(OperationContext* opCtx, StorageInterface* storageInterface) { // We can't change the timestamp source within a write unit of work. invariant(!opCtx->lockState()->inAWriteUnitOfWork()); - // We're only reading oplog metadata, so the timestamp is not important. If we read with the - // default (which is lastApplied on secondaries), we may end up with a reader that is at - // lastApplied. If we then roll back, then when we reconstruct prepared transactions during - // rollback recovery we will be preparing transactions before the read timestamp, which triggers - // an assertion in WiredTiger. - ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); auto oplogMaxSizeResult = storageInterface->getOplogMaxSize(opCtx); auto oplogMaxSize = fassert(40301, oplogMaxSizeResult); return std::min(oplogMaxSize / 10, std::size_t(replBatchLimitBytes.load())); diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 3cfe7be562b..3f03ceb9d28 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -357,7 +357,8 @@ void ReplicationCoordinatorExternalStateImpl::clearAppliedThroughIfCleanShutdown // Ensure that all writes are visible before reading. If we failed mid-batch, it would be // possible to read from a kNoOverlap ReadSource where not all writes to the minValid document // are visible, generating a writeConflict that would not resolve. - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); auto loadLastOpTimeAndWallTimeResult = loadLastOpTimeAndWallTime(opCtx); if (_replicationProcess->getConsistencyMarkers()->getOplogTruncateAfterPoint(opCtx).isNull() && diff --git a/src/mongo/db/repl/replication_recovery.cpp b/src/mongo/db/repl/replication_recovery.cpp index bba59beb626..c0c242421f9 100644 --- a/src/mongo/db/repl/replication_recovery.cpp +++ b/src/mongo/db/repl/replication_recovery.cpp @@ -131,7 +131,9 @@ public: _oplogApplicationEndPoint(oplogApplicationEndPoint) {} void startup(OperationContext* opCtx) final { - opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); + invariant(opCtx->recoveryUnit()->getTimestampReadSource() == + RecoveryUnit::ReadSource::kNoTimestamp); + _client = std::make_unique(opCtx); BSONObj predicate = _oplogApplicationEndPoint ? BSON("$gte" << _oplogApplicationStartPoint << "$lte" << *_oplogApplicationEndPoint) diff --git a/src/mongo/db/repl/transaction_oplog_application.cpp b/src/mongo/db/repl/transaction_oplog_application.cpp index 0c7a1f0727b..67fb840de64 100644 --- a/src/mongo/db/repl/transaction_oplog_application.cpp +++ b/src/mongo/db/repl/transaction_oplog_application.cpp @@ -262,8 +262,9 @@ std::pair, bool> _readTransactionOperationsFromOplogChai const std::vector& cachedOps, const bool checkForCommands) noexcept { bool isTransactionWithCommand = false; - // Traverse the oplog chain with its own snapshot and read timestamp. - ReadSourceScope readSourceScope(opCtx); + // Ensure future transactions read without a timestamp. + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); std::vector ops; @@ -538,11 +539,10 @@ void reconstructPreparedTransactions(OperationContext* opCtx, repl::OplogApplica LOGV2(21848, "Hit skipReconstructPreparedTransactions failpoint"); return; } - // Read the transactions table and the oplog collection without a timestamp. - // The below DBDirectClient read uses AutoGetCollectionForRead which could implicitly change the - // read source. So we need to explicitly set the read source to kNoTimestamp to force reads in - // this scope to be untimestamped. - ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); + + // Ensure future transactions read without a timestamp. + invariant(RecoveryUnit::ReadSource::kNoTimestamp == + opCtx->recoveryUnit()->getTimestampReadSource()); DBDirectClient client(opCtx); const auto cursor = client.query(NamespaceString::kSessionTransactionsTableNamespace, diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp index fcf49f74442..e249daed751 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_kv_engine_test.cpp @@ -359,7 +359,7 @@ TEST_F(EphemeralForTestKVEngineTest, ReadOlderSnapshotsSimple) { ASSERT(!rs->findRecord(&opCtx, loc2, &rd)); opCtx.recoveryUnit()->abandonSnapshot(); - opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); ASSERT(rs->findRecord(&opCtx, loc1, &rd)); ASSERT(rs->findRecord(&opCtx, loc2, &rd)); } @@ -452,7 +452,7 @@ TEST_F(EphemeralForTestKVEngineTest, SetReadTimestampBehindOldestTimestamp) { ASSERT_THROWS_CODE(rs->findRecord(&opCtx, loc2, &rd), DBException, ErrorCodes::SnapshotTooOld); opCtx.recoveryUnit()->abandonSnapshot(); - opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + opCtx.recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); ASSERT(rs->findRecord(&opCtx, loc1, &rd)); ASSERT(rs->findRecord(&opCtx, loc2, &rd)); } diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp index 5b2e77e6292..44d73995482 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.cpp @@ -119,7 +119,6 @@ bool RecoveryUnit::forkIfNeeded() { boost::optional readFrom = boost::none; switch (_timestampReadSource) { - case ReadSource::kUnset: case ReadSource::kNoTimestamp: case ReadSource::kMajorityCommitted: case ReadSource::kNoOverlap: diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h index 0e0afbb1a13..c31d0d54d86 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_recovery_unit.h @@ -131,7 +131,7 @@ private: Timestamp _commitTimestamp = Timestamp::min(); // Specifies which external source to use when setting read timestamps on transactions. - ReadSource _timestampReadSource = ReadSource::kUnset; + ReadSource _timestampReadSource = ReadSource::kNoTimestamp; boost::optional _readAtTimestamp = boost::none; }; diff --git a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp index 57bf3bf714d..1e928738d57 100644 --- a/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp +++ b/src/mongo/db/storage/kv/kv_engine_timestamps_test.cpp @@ -372,7 +372,7 @@ TEST_F(SnapshotManagerTests, InsertAndReadOnLastAppliedSnapshot) { // Not reading on the last applied timestamp returns the most recent data. auto op = makeOperation(); auto ru = op->recoveryUnit(); - ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + ru->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); ASSERT_EQ(itCountOn(op), 1); ASSERT(readRecordOn(op, id)); @@ -408,7 +408,7 @@ TEST_F(SnapshotManagerTests, UpdateAndDeleteOnLocalSnapshot) { // Not reading on the last local timestamp returns the most recent data. auto op = makeOperation(); auto ru = op->recoveryUnit(); - ru->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + ru->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); ASSERT_EQ(itCountOn(op), 1); auto record = readRecordOn(op, id); ASSERT_EQ(std::string(record->data.data()), "Blue spotted stingray"); diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h index 5c8be96b528..2057f8854b3 100644 --- a/src/mongo/db/storage/recovery_unit.h +++ b/src/mongo/db/storage/recovery_unit.h @@ -392,11 +392,7 @@ public: */ enum ReadSource { /** - * Do not read from a timestamp. This is the default. - */ - kUnset, - /** - * Read without a timestamp explicitly. + * Read without a timestamp. This is the default. */ kNoTimestamp, /** @@ -424,8 +420,6 @@ public: static std::string toString(ReadSource rs) { switch (rs) { - case ReadSource::kUnset: - return "kUnset"; case ReadSource::kNoTimestamp: return "kNoTimestamp"; case ReadSource::kMajorityCommitted: @@ -455,7 +449,7 @@ public: boost::optional provided = boost::none) {} virtual ReadSource getTimestampReadSource() const { - return ReadSource::kUnset; + return ReadSource::kNoTimestamp; }; /** diff --git a/src/mongo/db/storage/snapshot_helper.cpp b/src/mongo/db/storage/snapshot_helper.cpp index 5acbcd3a513..84af208d391 100644 --- a/src/mongo/db/storage/snapshot_helper.cpp +++ b/src/mongo/db/storage/snapshot_helper.cpp @@ -38,29 +38,37 @@ #include "mongo/logv2/log.h" namespace mongo { -namespace SnapshotHelper { -bool canSwitchReadSource(OperationContext* opCtx) { - - // Most readConcerns have behavior controlled at higher levels. Local and available are the only - // ReadConcerns that should consider changing, since they read without a timestamp by default. +namespace { +bool canReadAtLastApplied(OperationContext* opCtx) { + // Local and available are the only ReadConcern levels that allow their ReadSource to be + // overridden to read at lastApplied. They read without a timestamp by default, but this check + // allows user secondary reads from conflicting with oplog batch application by reading at a + // consistent point in time. + // Internal operations use DBDirectClient as a loopback to perform local operations, and they + // expect the same level of consistency guarantees as any user operation. For that reason, + // DBDirectClient should be able to change the owning operation's ReadSource in order to serve + // consistent data. const auto readConcernLevel = repl::ReadConcernArgs::get(opCtx).getLevel(); - if (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern || - readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern) { + if ((opCtx->getClient()->isFromUserConnection() || opCtx->getClient()->isInDirectClient()) && + (readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern || + readConcernLevel == repl::ReadConcernLevel::kAvailableReadConcern)) { return true; } - return false; } +} // namespace +namespace SnapshotHelper { bool shouldReadAtLastApplied(OperationContext* opCtx, const NamespaceString& nss, std::string* reason) { - // If this is true, then the operation opted-in to the PBWM lock, implying that it cannot change // its ReadSource. It's important to note that it is possible for this to be false, but still be // holding the PBWM lock, explained below. if (opCtx->lockState()->shouldConflictWithSecondaryBatchApplication()) { - *reason = "conflicts with batch application"; + if (reason) { + *reason = "conflicts with batch application"; + } return false; } @@ -71,16 +79,32 @@ bool shouldReadAtLastApplied(OperationContext* opCtx, // guaranteed to observe all previous writes. This may occur when multiple collection locks are // held concurrently, which is often the case when DBDirectClient is used. if (opCtx->lockState()->isLockHeldForMode(resourceIdParallelBatchWriterMode, MODE_IS)) { - *reason = "PBWM lock is held"; + if (reason) { + *reason = "PBWM lock is held"; + } LOGV2_DEBUG(20577, 1, "not reading at lastApplied because the PBWM lock is held"); return false; } - // If we are in a replication state (like secondary or primary catch-up) where we are not - // accepting writes, we should read at lastApplied. If this node can accept writes, then no - // conflicting replication batches are being applied and we can read from the default snapshot. + // If this node can accept writes (i.e. primary), then no conflicting replication batches are + // being applied and we can read from the default snapshot. If we are in a replication state + // (like secondary or primary catch-up) where we are not accepting writes, we should read at + // lastApplied. if (repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx, "admin")) { - *reason = "primary"; + if (reason) { + *reason = "primary"; + } + return false; + } + + // If we are not secondary, then we should not attempt to read at lastApplied because it may not + // be available or valid. Any operations reading outside of the primary or secondary states must + // be internal. We give these operations the benefit of the doubt rather than attempting to read + // at a lastApplied timestamp that is not valid. + if (!repl::ReplicationCoordinator::get(opCtx)->isInPrimaryOrSecondaryState(opCtx)) { + if (reason) { + *reason = "not primary or secondary"; + } return false; } @@ -88,7 +112,9 @@ bool shouldReadAtLastApplied(OperationContext* opCtx, // written by the replication system. However, the oplog is special, as it *is* written by the // replication system. if (!nss.isReplicated() && !nss.isOplog()) { - *reason = "unreplicated collection"; + if (reason) { + *reason = "unreplicated collection"; + } return false; } @@ -96,15 +122,14 @@ bool shouldReadAtLastApplied(OperationContext* opCtx, } boost::optional getNewReadSource(OperationContext* opCtx, const NamespaceString& nss) { - const bool canSwitch = canSwitchReadSource(opCtx); - if (!canSwitch) { + if (!canReadAtLastApplied(opCtx)) { return boost::none; } const auto existing = opCtx->recoveryUnit()->getTimestampReadSource(); std::string reason; const bool readAtLastApplied = shouldReadAtLastApplied(opCtx, nss, &reason); - if (existing == RecoveryUnit::ReadSource::kUnset) { + if (existing == RecoveryUnit::ReadSource::kNoTimestamp) { // Shifting from reading without a timestamp to reading with a timestamp can be dangerous // because writes will appear to vanish. This case is intended for new reads on secondaries // and query yield recovery after state transitions from primary to secondary. @@ -122,13 +147,16 @@ boost::optional getNewReadSource(OperationContext* opC // Given readers do not survive rollbacks, it's okay to go from reading with a timestamp to // reading without one. More writes will become visible. if (!readAtLastApplied) { - LOGV2_DEBUG( - 4452902, 2, "Changing ReadSource to kUnset", logAttrs(nss), "reason"_attr = reason); - // This shift to kUnset assumes that callers will not make future attempts to manipulate - // their ReadSources after performing reads at an un-timetamped snapshot. The only - // exception is callers of this function that may need to change from kUnset to - // kLastApplied in the event of a catalog conflict or query yield. - return RecoveryUnit::ReadSource::kUnset; + LOGV2_DEBUG(4452902, + 2, + "Changing ReadSource to kNoTimestamp", + logAttrs(nss), + "reason"_attr = reason); + // This shift to kNoTimestamp assumes that callers will not make future attempts to + // manipulate their ReadSources after performing reads at an un-timetamped snapshot. The + // only exception is callers of this function that may need to change from kNoTimestamp + // to kLastApplied in the event of a catalog conflict or query yield. + return RecoveryUnit::ReadSource::kNoTimestamp; } } return boost::none; diff --git a/src/mongo/db/storage/snapshot_helper.h b/src/mongo/db/storage/snapshot_helper.h index fa8fdd85f24..c24dfd16d8c 100644 --- a/src/mongo/db/storage/snapshot_helper.h +++ b/src/mongo/db/storage/snapshot_helper.h @@ -37,6 +37,10 @@ namespace SnapshotHelper { boost::optional getNewReadSource(OperationContext* opCtx, const NamespaceString& nss); +bool shouldReadAtLastApplied(OperationContext* opCtx, + const NamespaceString& nss, + std::string* reason = nullptr); + bool collectionChangesConflictWithRead(boost::optional collectionMin, boost::optional readTimestamp); } // namespace SnapshotHelper diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp index 1167fd673f3..b3cc4c6dde7 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp @@ -445,7 +445,6 @@ boost::optional WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() // transaction to establish a read timestamp, but only for ReadSources that are expected to have // read timestamps. switch (_timestampReadSource) { - case ReadSource::kUnset: case ReadSource::kNoTimestamp: return boost::none; case ReadSource::kMajorityCommitted: @@ -484,7 +483,6 @@ boost::optional WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() return _readAtTimestamp; // The follow ReadSources returned values in the first switch block. - case ReadSource::kUnset: case ReadSource::kNoTimestamp: case ReadSource::kMajorityCommitted: case ReadSource::kProvided: @@ -507,7 +505,6 @@ void WiredTigerRecoveryUnit::_txnOpen() { WT_SESSION* session = _session->getSession(); switch (_timestampReadSource) { - case ReadSource::kUnset: case ReadSource::kNoTimestamp: { if (_isOplogReader) { _oplogVisibleTs = static_cast(_oplogManager->getOplogReadTimestamp()); @@ -827,7 +824,6 @@ void WiredTigerRecoveryUnit::setTimestampReadSource(ReadSource readSource, "setting timestamp read source", "readSource"_attr = toString(readSource), "provided"_attr = ((provided) ? provided->toString() : "none")); - invariant(!_isActive() || _timestampReadSource == readSource, str::stream() << "Current state: " << toString(_getState()) << ". Invalid internal state while setting timestamp read source: " diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h index 312a46f5c09..0d557fc6329 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h @@ -250,7 +250,7 @@ private: bool _isTimestamped = false; // Specifies which external source to use when setting read timestamps on transactions. - ReadSource _timestampReadSource = ReadSource::kUnset; + ReadSource _timestampReadSource = ReadSource::kNoTimestamp; // Commits are assumed ordered. Unordered commits are assumed to always need to reserve a // new optime, and thus always call oplogDiskLocRegister() on the record store. diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp index b50d4b79889..740672e7a2c 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit_test.cpp @@ -203,7 +203,8 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) { } // Read without a timestamp. The write should be visible. - ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(), RecoveryUnit::ReadSource::kUnset); + ASSERT_EQ(opCtx1->recoveryUnit()->getTimestampReadSource(), + RecoveryUnit::ReadSource::kNoTimestamp); RecordData unused; ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); @@ -237,7 +238,7 @@ TEST_F(WiredTigerRecoveryUnitTestFixture, NoOverlapReadSource) { // Read without a timestamp, and we should see the first and third records. opCtx1->recoveryUnit()->abandonSnapshot(); - opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + opCtx1->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); ASSERT_TRUE(rs->findRecord(opCtx1, rid1, &unused)); ASSERT_FALSE(rs->findRecord(opCtx1, rid2, &unused)); ASSERT_TRUE(rs->findRecord(opCtx1, rid3, &unused)); diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index 742bfd087b4..de5874ae3a6 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -124,8 +124,9 @@ struct ActiveTransactionHistory { ActiveTransactionHistory fetchActiveTransactionHistory(OperationContext* opCtx, const LogicalSessionId& lsid) { - // Restore the current timestamp read source after fetching transaction history. - ReadSourceScope readSourceScope(opCtx); + // Restore the current timestamp read source after fetching transaction history using + // DBDirectClient, which may change our ReadSource. + ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kNoTimestamp); ActiveTransactionHistory result; diff --git a/src/mongo/dbtests/querytests.cpp b/src/mongo/dbtests/querytests.cpp index 84d533e2069..022dfb970ed 100644 --- a/src/mongo/dbtests/querytests.cpp +++ b/src/mongo/dbtests/querytests.cpp @@ -117,7 +117,7 @@ protected: uassertStatusOK(indexer.insertAllDocumentsInCollection(&_opCtx, _collection)); uassertStatusOK( indexer.drainBackgroundWrites(&_opCtx, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); uassertStatusOK(indexer.checkConstraints(&_opCtx)); { diff --git a/src/mongo/dbtests/storage_timestamp_tests.cpp b/src/mongo/dbtests/storage_timestamp_tests.cpp index 750c8ac447d..d270e0467d7 100644 --- a/src/mongo/dbtests/storage_timestamp_tests.cpp +++ b/src/mongo/dbtests/storage_timestamp_tests.cpp @@ -103,7 +103,7 @@ public: OneOffRead(OperationContext* opCtx, const Timestamp& ts) : _opCtx(opCtx) { _opCtx->recoveryUnit()->abandonSnapshot(); if (ts.isNull()) { - _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); } else { _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kProvided, ts); } @@ -111,7 +111,7 @@ public: ~OneOffRead() { _opCtx->recoveryUnit()->abandonSnapshot(); - _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); } private: @@ -234,7 +234,7 @@ public: */ void reset(NamespaceString nss) const { ::mongo::writeConflictRetry(_opCtx, "deleteAll", nss.ns(), [&] { - _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kUnset); + _opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp); AutoGetCollection collRaii(_opCtx, nss, LockMode::MODE_X); if (collRaii) { @@ -2057,7 +2057,7 @@ public: firstInsert.asTimestamp()); ASSERT_OK(indexer.drainBackgroundWrites(_opCtx, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); auto indexCatalog = autoColl.getCollection()->getIndexCatalog(); @@ -2100,7 +2100,7 @@ public: setReplCoordAppliedOpTime(repl::OpTime(afterSecondInsert.asTimestamp(), presentTerm)); ASSERT_OK(indexer.drainBackgroundWrites(_opCtx, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); { @@ -2843,7 +2843,7 @@ public: ASSERT_FALSE(buildingIndex->indexBuildInterceptor()->areAllWritesApplied(_opCtx)); ASSERT_OK(indexer.drainBackgroundWrites(_opCtx, - RecoveryUnit::ReadSource::kUnset, + RecoveryUnit::ReadSource::kNoTimestamp, IndexBuildInterceptor::DrainYieldPolicy::kNoYield)); -- cgit v1.2.1