diff options
author | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2019-07-26 12:25:26 -0400 |
---|---|---|
committer | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2019-07-26 12:34:32 -0400 |
commit | 25d5f6a0b01f261e633587013e4ab8116ea2930a (patch) | |
tree | b9659a708b228b7027a859ef8ed7b89b7b4f73b9 /src/mongo | |
parent | 5f7471631d238fb3269fa0f2c3689ac62e93b61f (diff) | |
download | mongo-25d5f6a0b01f261e633587013e4ab8116ea2930a.tar.gz |
SERVER-41861 Replace 'all_committed' with 'all_durable'
Diffstat (limited to 'src/mongo')
29 files changed, 124 insertions, 116 deletions
diff --git a/src/mongo/db/catalog/catalog_control_test.cpp b/src/mongo/db/catalog/catalog_control_test.cpp index 59afeb15a25..827afdaed4b 100644 --- a/src/mongo/db/catalog/catalog_control_test.cpp +++ b/src/mongo/db/catalog/catalog_control_test.cpp @@ -81,7 +81,7 @@ public: return false; } void clearDropPendingState() final {} - Timestamp getAllCommittedTimestamp() const final { + Timestamp getAllDurableTimestamp() const final { return {}; } Timestamp getOldestOpenReadTimestamp() const final { diff --git a/src/mongo/db/commands/dbhash.cpp b/src/mongo/db/commands/dbhash.cpp index 473b323f6b5..5153192234d 100644 --- a/src/mongo/db/commands/dbhash.cpp +++ b/src/mongo/db/commands/dbhash.cpp @@ -166,16 +166,16 @@ public: // down. This isn't an actual concern because the testing infrastructure won't use the // $_internalReadAtClusterTime option in any test suite where clean shutdown is expected // to occur concurrently with tests running. - auto allCommittedTime = storageEngine->getAllCommittedTimestamp(); - invariant(!allCommittedTime.isNull()); + auto allDurableTime = storageEngine->getAllDurableTimestamp(); + invariant(!allDurableTime.isNull()); uassert(ErrorCodes::InvalidOptions, str::stream() << "$_internalReadAtClusterTime value must not be greater" - " than the all-committed timestamp. Requested clusterTime: " + " than the all_durable timestamp. Requested clusterTime: " << targetClusterTime.toString() - << "; all-committed timestamp: " - << allCommittedTime.toString(), - allCommittedTime >= targetClusterTime); + << "; all_durable timestamp: " + << allDurableTime.toString(), + allDurableTime >= targetClusterTime); // The $_internalReadAtClusterTime option causes any storage-layer cursors created // during plan execution to read from a consistent snapshot of data at the supplied diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp index 43413cda5f1..52250811dbd 100644 --- a/src/mongo/db/commands/find_cmd.cpp +++ b/src/mongo/db/commands/find_cmd.cpp @@ -338,17 +338,17 @@ public: // shutting down. This isn't an actual concern because the testing infrastructure // won't use the $_internalReadAtClusterTime option in any test suite where clean // shutdown is expected to occur concurrently with tests running. - auto allCommittedTime = storageEngine->getAllCommittedTimestamp(); - invariant(!allCommittedTime.isNull()); + auto allDurableTime = storageEngine->getAllDurableTimestamp(); + invariant(!allDurableTime.isNull()); uassert(ErrorCodes::InvalidOptions, str::stream() << "$_internalReadAtClusterTime value must not be greater" - " than the all-committed timestamp. Requested" + " than the all_durable timestamp. Requested" " clusterTime: " << targetClusterTime->toString() - << "; all-committed timestamp: " - << allCommittedTime.toString(), - allCommittedTime >= targetClusterTime); + << "; all_durable timestamp: " + << allDurableTime.toString(), + allDurableTime >= targetClusterTime); // The $_internalReadAtClusterTime option causes any storage-layer cursors created // during plan execution to read from a consistent snapshot of data at the supplied diff --git a/src/mongo/db/repl/initial_syncer.cpp b/src/mongo/db/repl/initial_syncer.cpp index 6ecbbed4437..1b8bd562bc4 100644 --- a/src/mongo/db/repl/initial_syncer.cpp +++ b/src/mongo/db/repl/initial_syncer.cpp @@ -515,7 +515,7 @@ void InitialSyncer::_startInitialSyncAttemptCallback( auto storageEngine = getGlobalServiceContext()->getStorageEngine(); if (storageEngine) { // Set the oldestTimestamp to one because WiredTiger does not allow us to set it to zero - // since that would also set the all committed point to zero. We specifically don't set + // since that would also set the all_durable point to zero. We specifically don't set // the stable timestamp here because that will trigger taking a first stable checkpoint even // though the initialDataTimestamp is still set to kAllowUnstableCheckpointsSentinel. storageEngine->setOldestTimestamp(kTimestampOne); @@ -803,7 +803,7 @@ void InitialSyncer::_lastOplogEntryFetcherCallbackForBeginApplyingTimestamp( auto filterBob = BSONObjBuilder(queryBob.subobjStart("filter")); filterBob.append("_id", FeatureCompatibilityVersionParser::kParameterName); filterBob.done(); - // As part of reading the FCV, we ensure the source node "all committed" timestamp has advanced + // As part of reading the FCV, we ensure the source node's all_durable timestamp has advanced // to at least the timestamp of the last optime that we found in the lastOplogEntryFetcher. // When document locking is used, there could be oplog "holes" which would result in // inconsistent initial sync data if we didn't do this. diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 99bacd646cb..0e33068b3cf 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -3462,26 +3462,26 @@ boost::optional<OpTimeAndWallTime> ReplicationCoordinatorImpl::_chooseStableOpTi if (_readWriteAbility->canAcceptNonLocalWrites(lk) && _storage->supportsDocLocking(_service)) { // If the storage engine supports document level locking, then it is possible for oplog // writes to commit out of order. In that case, we don't want to set the stable timestamp - // ahead of the all committed timestamp. This is not a problem for oplog application - // because we only set lastApplied between batches when the all committed timestamp cannot - // be behind. During oplog application the all committed timestamp can jump around since + // ahead of the all_durable timestamp. This is not a problem for oplog application + // because we only set lastApplied between batches when the all_durable timestamp cannot + // be behind. During oplog application the all_durable timestamp can jump around since // we first write oplog entries to the oplog and then go back and apply them. // // We must construct an upper bound for the stable optime candidates such that the upper // bound is at most 'maximumStableOpTime' and any candidate with a timestamp higher than the - // all committed is greater than the upper bound. If the timestamp of 'maximumStableOpTime' - // is <= the all committed, then we use 'maximumStableOpTime'. Otherwise, we construct an - // optime using the all committed and the term of 'maximumStableOpTime'. We must argue that - // there are no stable optime candidates with a timestamp greater than the all committed and + // all_durable is greater than the upper bound. If the timestamp of 'maximumStableOpTime' + // is <= the all_durable, then we use 'maximumStableOpTime'. Otherwise, we construct an + // optime using the all_durable and the term of 'maximumStableOpTime'. We must argue that + // there are no stable optime candidates with a timestamp greater than the all_durable and // a term less than that of 'maximumStableOpTime'. Suppose there were. The - // 'maximumStableOpTime' is either the commit point or the lastApplied, so the all committed + // 'maximumStableOpTime' is either the commit point or the lastApplied, so the all_durable // can only be behind 'maximumStableOpTime' on a primary. If there is a candidate with a - // higher timestamp than the all committed but a lower term than 'maximumStableOpTime', then - // the all committed corresponds to a write in an earlier term than the current one. But + // higher timestamp than the all_durable but a lower term than 'maximumStableOpTime', then + // the all_durable corresponds to a write in an earlier term than the current one. But // this is not possible on a primary, since on step-up, the primary storage commits a 'new // primary' oplog entry in the new term before accepting any new writes, so the all - // committed must be in the current term. - maximumStableTimestamp = std::min(_storage->getAllCommittedTimestamp(_service), + // durable must be in the current term. + maximumStableTimestamp = std::min(_storage->getAllDurableTimestamp(_service), maximumStableOpTime.opTime.getTimestamp()); } diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 4efee18a386..f355d04bba0 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -4260,7 +4260,7 @@ TEST_F(StableOpTimeTest, SetMyLastAppliedSetsStableOpTimeForStorage) { ASSERT_EQUALS(Timestamp::min(), getStorageInterface()->getStableTimestamp()); ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY)); - getStorageInterface()->allCommittedTimestamp = Timestamp(1, 1); + getStorageInterface()->allDurableTimestamp = Timestamp(1, 1); replCoordSetMyLastAppliedOpTime(OpTimeWithTermOne(1, 1), Date_t() + Seconds(100)); replCoordSetMyLastDurableOpTime(OpTimeWithTermOne(1, 1), Date_t() + Seconds(100)); simulateSuccessfulV1Election(); @@ -4274,7 +4274,7 @@ TEST_F(StableOpTimeTest, SetMyLastAppliedSetsStableOpTimeForStorage) { stableTimestamp = getStorageInterface()->getStableTimestamp(); ASSERT_EQUALS(Timestamp(1, 1), getStorageInterface()->getStableTimestamp()); - getStorageInterface()->allCommittedTimestamp = Timestamp(3, 1); + getStorageInterface()->allDurableTimestamp = Timestamp(3, 1); // Check that the stable timestamp is updated for the storage engine when we set the applied // optime. @@ -4329,7 +4329,7 @@ TEST_F(StableOpTimeTest, SetMyLastAppliedSetsStableOpTimeForStorageDisableMajori // Check that the stable timestamp is updated for the storage engine when we set the applied // optime, even though the last committed optime is unset. - getStorageInterface()->allCommittedTimestamp = Timestamp(1, 1); + getStorageInterface()->allDurableTimestamp = Timestamp(1, 1); replCoordSetMyLastAppliedOpTime(OpTime({1, 1}, 1), Date_t() + Seconds(100)); ASSERT_EQUALS(Timestamp(1, 1), getStorageInterface()->getStableTimestamp()); } @@ -4366,7 +4366,7 @@ TEST_F(StableOpTimeTest, AdvanceCommitPointSetsStableOpTimeForStorage) { long long term = 2; getStorageInterface()->supportsDocLockingBool = true; - getStorageInterface()->allCommittedTimestamp = Timestamp(2, 1); + getStorageInterface()->allDurableTimestamp = Timestamp(2, 1); // Add three stable optime candidates. replCoordSetMyLastAppliedOpTime(OpTime({2, 1}, term), Date_t() + Seconds(1)); @@ -4387,7 +4387,7 @@ TEST_F(StableOpTimeTest, AdvanceCommitPointSetsStableOpTimeForStorage) { stableTimestamp = getStorageInterface()->getStableTimestamp(); ASSERT_EQUALS(Timestamp(2, 1), stableTimestamp); - getStorageInterface()->allCommittedTimestamp = Timestamp(4, 4); + getStorageInterface()->allDurableTimestamp = Timestamp(4, 4); // Check that the stable timestamp is updated when we advance the commit point. replCoordAdvanceCommitPoint(OpTime({3, 2}, term), Date_t() + Seconds(3), false); diff --git a/src/mongo/db/repl/storage_interface.h b/src/mongo/db/repl/storage_interface.h index a1cdd60ed92..cb178adf519 100644 --- a/src/mongo/db/repl/storage_interface.h +++ b/src/mongo/db/repl/storage_interface.h @@ -398,12 +398,16 @@ public: bool primaryOnly = false) = 0; /** - * Returns the all committed timestamp. All transactions with timestamps earlier than the - * all committed timestamp are committed. Only storage engines that support document level - * locking must provide an implementation. Other storage engines may provide a no-op - * implementation. + * Returns the all_durable timestamp. All transactions with timestamps earlier than the + * all_durable timestamp are committed. Only storage engines that support document level locking + * must provide an implementation. Other storage engines may provide a no-op implementation. + * + * The all_durable timestamp only includes non-prepared transactions that have been given a + * commit_timestamp and prepared transactions that have been given a durable_timestamp. + * Previously, the deprecated all_committed timestamp would also include prepared transactions + * that were prepared but not committed which could make the stable timestamp briefly jump back. */ - virtual Timestamp getAllCommittedTimestamp(ServiceContext* serviceCtx) const = 0; + virtual Timestamp getAllDurableTimestamp(ServiceContext* serviceCtx) const = 0; /** * Returns the oldest read timestamp in use by an open transaction. Storage engines that support diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp index 7cfa35ba7af..03d8bc66e55 100644 --- a/src/mongo/db/repl/storage_interface_impl.cpp +++ b/src/mongo/db/repl/storage_interface_impl.cpp @@ -1214,8 +1214,8 @@ bool StorageInterfaceImpl::supportsDocLocking(ServiceContext* serviceCtx) const return serviceCtx->getStorageEngine()->supportsDocLocking(); } -Timestamp StorageInterfaceImpl::getAllCommittedTimestamp(ServiceContext* serviceCtx) const { - return serviceCtx->getStorageEngine()->getAllCommittedTimestamp(); +Timestamp StorageInterfaceImpl::getAllDurableTimestamp(ServiceContext* serviceCtx) const { + return serviceCtx->getStorageEngine()->getAllDurableTimestamp(); } Timestamp StorageInterfaceImpl::getOldestOpenReadTimestamp(ServiceContext* serviceCtx) const { diff --git a/src/mongo/db/repl/storage_interface_impl.h b/src/mongo/db/repl/storage_interface_impl.h index 733d11bd5f8..85d7c9186c3 100644 --- a/src/mongo/db/repl/storage_interface_impl.h +++ b/src/mongo/db/repl/storage_interface_impl.h @@ -172,7 +172,7 @@ public: bool supportsDocLocking(ServiceContext* serviceCtx) const override; - Timestamp getAllCommittedTimestamp(ServiceContext* serviceCtx) const override; + Timestamp getAllDurableTimestamp(ServiceContext* serviceCtx) const override; Timestamp getOldestOpenReadTimestamp(ServiceContext* serviceCtx) const override; diff --git a/src/mongo/db/repl/storage_interface_mock.cpp b/src/mongo/db/repl/storage_interface_mock.cpp index d171032c575..77936b4453d 100644 --- a/src/mongo/db/repl/storage_interface_mock.cpp +++ b/src/mongo/db/repl/storage_interface_mock.cpp @@ -90,8 +90,8 @@ Timestamp StorageInterfaceMock::getInitialDataTimestamp() const { return _initialDataTimestamp; } -Timestamp StorageInterfaceMock::getAllCommittedTimestamp(ServiceContext* serviceCtx) const { - return allCommittedTimestamp; +Timestamp StorageInterfaceMock::getAllDurableTimestamp(ServiceContext* serviceCtx) const { + return allDurableTimestamp; } Timestamp StorageInterfaceMock::getOldestOpenReadTimestamp(ServiceContext* serviceCtx) const { diff --git a/src/mongo/db/repl/storage_interface_mock.h b/src/mongo/db/repl/storage_interface_mock.h index ebf54b3cc5d..f4ac2aa763c 100644 --- a/src/mongo/db/repl/storage_interface_mock.h +++ b/src/mongo/db/repl/storage_interface_mock.h @@ -310,7 +310,7 @@ public: return boost::none; } - Timestamp getAllCommittedTimestamp(ServiceContext* serviceCtx) const override; + Timestamp getAllDurableTimestamp(ServiceContext* serviceCtx) const override; Timestamp getOldestOpenReadTimestamp(ServiceContext* serviceCtx) const override; @@ -403,7 +403,7 @@ public: }; bool supportsDocLockingBool = false; - Timestamp allCommittedTimestamp = Timestamp::min(); + Timestamp allDurableTimestamp = Timestamp::min(); Timestamp oldestOpenReadTimestamp = Timestamp::min(); private: diff --git a/src/mongo/db/storage/biggie/biggie_kv_engine.h b/src/mongo/db/storage/biggie/biggie_kv_engine.h index fcb76c638d1..97c836b523a 100644 --- a/src/mongo/db/storage/biggie/biggie_kv_engine.h +++ b/src/mongo/db/storage/biggie/biggie_kv_engine.h @@ -135,7 +135,7 @@ public: void setJournalListener(mongo::JournalListener* jl) final {} - virtual Timestamp getAllCommittedTimestamp() const override { + virtual Timestamp getAllDurableTimestamp() const override { RecordId id = _visibilityManager->getAllCommittedRecord(); return Timestamp(id.repr()); } diff --git a/src/mongo/db/storage/devnull/devnull_kv_engine.h b/src/mongo/db/storage/devnull/devnull_kv_engine.h index 5f52ed44b7d..d0645b897b2 100644 --- a/src/mongo/db/storage/devnull/devnull_kv_engine.h +++ b/src/mongo/db/storage/devnull/devnull_kv_engine.h @@ -121,7 +121,7 @@ public: void setJournalListener(JournalListener* jl) final {} - virtual Timestamp getAllCommittedTimestamp() const override { + virtual Timestamp getAllDurableTimestamp() const override { return Timestamp(); } diff --git a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h index 3eabcae6654..3a76d268efb 100644 --- a/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h +++ b/src/mongo/db/storage/ephemeral_for_test/ephemeral_for_test_engine.h @@ -112,7 +112,7 @@ public: _journalListener = jl; } - virtual Timestamp getAllCommittedTimestamp() const override { + virtual Timestamp getAllDurableTimestamp() const override { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp b/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp index 7356d68e685..f58c2abb0b4 100644 --- a/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp +++ b/src/mongo/db/storage/kv/kv_drop_pending_ident_reaper_test.cpp @@ -117,7 +117,7 @@ public: } void cleanShutdown() override {} void setJournalListener(JournalListener* jl) override {} - Timestamp getAllCommittedTimestamp() const override { + Timestamp getAllDurableTimestamp() const override { return {}; } Timestamp getOldestOpenReadTimestamp() const override { diff --git a/src/mongo/db/storage/kv/kv_engine.h b/src/mongo/db/storage/kv/kv_engine.h index 0503c6f5e6f..d8b5772cd3b 100644 --- a/src/mongo/db/storage/kv/kv_engine.h +++ b/src/mongo/db/storage/kv/kv_engine.h @@ -373,9 +373,9 @@ public: } /** - * See `StorageEngine::getAllCommittedTimestamp` + * See `StorageEngine::getAllDurableTimestamp` */ - virtual Timestamp getAllCommittedTimestamp() const = 0; + virtual Timestamp getAllDurableTimestamp() const = 0; /** * See `StorageEngine::getOldestOpenReadTimestamp` diff --git a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp index e39bae73f0c..17ad60721d7 100644 --- a/src/mongo/db/storage/kv/kv_engine_test_harness.cpp +++ b/src/mongo/db/storage/kv/kv_engine_test_harness.cpp @@ -229,7 +229,7 @@ TEST(KVEngineTestHarness, TemporaryRecordStoreSimple) { } } -TEST(KVEngineTestHarness, AllCommittedTimestamp) { +TEST(KVEngineTestHarness, AllDurableTimestamp) { std::unique_ptr<KVHarnessHelper> helper(KVHarnessHelper::create()); KVEngine* engine = helper->getEngine(); if (!engine->supportsDocLocking()) @@ -258,17 +258,17 @@ TEST(KVEngineTestHarness, AllCommittedTimestamp) { auto t12Doc = BSON("ts" << t12); auto t21Doc = BSON("ts" << t21); - Timestamp allCommitted = engine->getAllCommittedTimestamp(); + Timestamp allDurable = engine->getAllDurableTimestamp(); MyOperationContext opCtx1(engine); WriteUnitOfWork uow1(&opCtx1); ASSERT_EQ(invariant(rs->insertRecord( &opCtx1, t11Doc.objdata(), t11Doc.objsize(), Timestamp::min())), RecordId(1, 1)); - Timestamp lastAllCommitted = allCommitted; - allCommitted = engine->getAllCommittedTimestamp(); - ASSERT_GTE(allCommitted, lastAllCommitted); - ASSERT_LT(allCommitted, t11); + Timestamp lastAllDurable = allDurable; + allDurable = engine->getAllDurableTimestamp(); + ASSERT_GTE(allDurable, lastAllDurable); + ASSERT_LT(allDurable, t11); MyOperationContext opCtx2(engine); WriteUnitOfWork uow2(&opCtx2); @@ -277,26 +277,26 @@ TEST(KVEngineTestHarness, AllCommittedTimestamp) { RecordId(2, 1)); uow2.commit(); - lastAllCommitted = allCommitted; - allCommitted = engine->getAllCommittedTimestamp(); - ASSERT_GTE(allCommitted, lastAllCommitted); - ASSERT_LT(allCommitted, t11); + lastAllDurable = allDurable; + allDurable = engine->getAllDurableTimestamp(); + ASSERT_GTE(allDurable, lastAllDurable); + ASSERT_LT(allDurable, t11); ASSERT_EQ(invariant(rs->insertRecord( &opCtx1, t12Doc.objdata(), t12Doc.objsize(), Timestamp::min())), RecordId(1, 2)); - lastAllCommitted = allCommitted; - allCommitted = engine->getAllCommittedTimestamp(); - ASSERT_GTE(allCommitted, lastAllCommitted); - ASSERT_LT(allCommitted, t11); + lastAllDurable = allDurable; + allDurable = engine->getAllDurableTimestamp(); + ASSERT_GTE(allDurable, lastAllDurable); + ASSERT_LT(allDurable, t11); uow1.commit(); - lastAllCommitted = allCommitted; - allCommitted = engine->getAllCommittedTimestamp(); - ASSERT_GTE(allCommitted, lastAllCommitted); - ASSERT_LTE(allCommitted, t21); + lastAllDurable = allDurable; + allDurable = engine->getAllDurableTimestamp(); + ASSERT_GTE(allDurable, lastAllDurable); + ASSERT_LTE(allDurable, t21); } } diff --git a/src/mongo/db/storage/mobile/mobile_kv_engine.h b/src/mongo/db/storage/mobile/mobile_kv_engine.h index a09d4187200..3762ccf0878 100644 --- a/src/mongo/db/storage/mobile/mobile_kv_engine.h +++ b/src/mongo/db/storage/mobile/mobile_kv_engine.h @@ -128,7 +128,7 @@ public: _journalListener = jl; } - virtual Timestamp getAllCommittedTimestamp() const override { + virtual Timestamp getAllDurableTimestamp() const override { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/storage/recovery_unit.h b/src/mongo/db/storage/recovery_unit.h index dcc8a37482f..1aea3e085ef 100644 --- a/src/mongo/db/storage/recovery_unit.h +++ b/src/mongo/db/storage/recovery_unit.h @@ -242,8 +242,8 @@ public: * a point in time. Any point in time returned will reflect one of the following: * - when using ReadSource::kProvided, the timestamp provided. * - when using ReadSource::kNoOverlap, the timestamp chosen by the storage engine. - * - when using ReadSource::kAllCommittedSnapshot, the timestamp chosen using the storage - * engine's all-committed timestamp. + * - when using ReadSource::kAllDurableSnapshot, the timestamp chosen using the storage + * engine's all_durable timestamp. * - when using ReadSource::kLastApplied, the timestamp chosen using the storage engine's last * applied timestamp. Can return boost::none if no timestamp has been established. * - when using ReadSource::kMajorityCommitted, the majority committed timestamp chosen by the @@ -370,10 +370,10 @@ public: */ kLastApplied, /** - * Read from the all-committed timestamp. New transactions will always read from the same + * Read from the all_durable timestamp. New transactions will always read from the same * timestamp and never advance. */ - kAllCommittedSnapshot, + kAllDurableSnapshot, /** * Read from the timestamp provided to setTimestampReadSource. */ diff --git a/src/mongo/db/storage/storage_engine.h b/src/mongo/db/storage/storage_engine.h index 7722aaaa956..1b0bf3e6216 100644 --- a/src/mongo/db/storage/storage_engine.h +++ b/src/mongo/db/storage/storage_engine.h @@ -500,12 +500,16 @@ public: }; /** - * Returns the all committed timestamp. All transactions with timestamps earlier than the - * all committed timestamp are committed. Only storage engines that support document level - * locking must provide an implementation. Other storage engines may provide a no-op - * implementation. + * Returns the all_durable timestamp. All transactions with timestamps earlier than the + * all_durable timestamp are committed. Only storage engines that support document level locking + * must provide an implementation. Other storage engines may provide a no-op implementation. + * + * The all_durable timestamp only includes non-prepared transactions that have been given a + * commit_timestamp and prepared transactions that have been given a durable_timestamp. + * Previously, the deprecated all_committed timestamp would also include prepared transactions + * that were prepared but not committed which could make the stable timestamp briefly jump back. */ - virtual Timestamp getAllCommittedTimestamp() const = 0; + virtual Timestamp getAllDurableTimestamp() const = 0; /** * Returns the oldest read timestamp in use by an open transaction. Storage engines that support diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index f9498194fc4..57dcb407253 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -805,8 +805,8 @@ void StorageEngineImpl::replicationBatchIsComplete() const { return _engine->replicationBatchIsComplete(); } -Timestamp StorageEngineImpl::getAllCommittedTimestamp() const { - return _engine->getAllCommittedTimestamp(); +Timestamp StorageEngineImpl::getAllDurableTimestamp() const { + return _engine->getAllDurableTimestamp(); } Timestamp StorageEngineImpl::getOldestOpenReadTimestamp() const { diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h index db471036501..54182dd4676 100644 --- a/src/mongo/db/storage/storage_engine_impl.h +++ b/src/mongo/db/storage/storage_engine_impl.h @@ -139,7 +139,7 @@ public: virtual boost::optional<Timestamp> getLastStableRecoveryTimestamp() const override; - virtual Timestamp getAllCommittedTimestamp() const override; + virtual Timestamp getAllDurableTimestamp() const override; virtual Timestamp getOldestOpenReadTimestamp() const override; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 422f838ba5f..026e7d59c74 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -1761,8 +1761,8 @@ StatusWith<Timestamp> WiredTigerKVEngine::recoverToStableTimestamp(OperationCont return {stableTimestamp}; } -Timestamp WiredTigerKVEngine::getAllCommittedTimestamp() const { - return Timestamp(_oplogManager->fetchAllCommittedValue(_conn)); +Timestamp WiredTigerKVEngine::getAllDurableTimestamp() const { + return Timestamp(_oplogManager->fetchAllDurableValue(_conn)); } Timestamp WiredTigerKVEngine::getOldestOpenReadTimestamp() const { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index 625cb5a565a..c1502094ec5 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -230,7 +230,7 @@ public: */ boost::optional<Timestamp> getLastStableRecoveryTimestamp() const override; - Timestamp getAllCommittedTimestamp() const override; + Timestamp getAllDurableTimestamp() const override; Timestamp getOldestOpenReadTimestamp() const override; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp index 28dd48f261f..e3ec7e9097d 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp @@ -207,7 +207,7 @@ void WiredTigerOplogManager::_oplogJournalThreadLoop(WiredTigerSessionCache* ses _opsWaitingForJournal = false; lk.unlock(); - const uint64_t newTimestamp = fetchAllCommittedValue(sessionCache->conn()); + const uint64_t newTimestamp = fetchAllDurableValue(sessionCache->conn()); // The newTimestamp may actually go backward during secondary batch application, // where we commit data file changes separately from oplog changes, so ignore @@ -249,11 +249,11 @@ void WiredTigerOplogManager::_setOplogReadTimestamp(WithLock, uint64_t newTimest LOG(2) << "Setting new oplogReadTimestamp: " << Timestamp(newTimestamp); } -uint64_t WiredTigerOplogManager::fetchAllCommittedValue(WT_CONNECTION* conn) { - // Fetch the latest all_committed value from the storage engine. This value will be a - // timestamp that has no holes (uncommitted transactions with lower timestamps) behind it. +uint64_t WiredTigerOplogManager::fetchAllDurableValue(WT_CONNECTION* conn) { + // Fetch the latest all_durable value from the storage engine. This value will be a timestamp + // that has no holes (uncommitted transactions with lower timestamps) behind it. char buf[(2 * 8 /*bytes in hex*/) + 1 /*nul terminator*/]; - auto wtstatus = conn->query_timestamp(conn, buf, "get=all_committed"); + auto wtstatus = conn->query_timestamp(conn, buf, "get=all_durable"); if (wtstatus == WT_NOTFOUND) { // Treat this as lowest possible timestamp; we need to see all preexisting data but no new // (timestamped) data. diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h index f71ba7e5214..9a82985fc28 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.h @@ -41,7 +41,7 @@ class WiredTigerRecordStore; class WiredTigerSessionCache; -// Manages oplog visibility, by periodically querying WiredTiger's all_committed timestamp value and +// Manages oplog visibility, by periodically querying WiredTiger's all_durable timestamp value and // then using that timestamp for all transactions that read the oplog collection. class WiredTigerOplogManager { WiredTigerOplogManager(const WiredTigerOplogManager&) = delete; @@ -78,9 +78,9 @@ public: void waitForAllEarlierOplogWritesToBeVisible(const WiredTigerRecordStore* oplogRecordStore, OperationContext* opCtx); - // Returns the all committed timestamp. All transactions with timestamps earlier than the - // all committed timestamp are committed. - uint64_t fetchAllCommittedValue(WT_CONNECTION* conn); + // Returns the all_durable timestamp. All transactions with timestamps earlier than the + // all_durable timestamp are committed. + uint64_t fetchAllDurableValue(WT_CONNECTION* conn); private: void _oplogJournalThreadLoop(WiredTigerSessionCache* sessionCache, diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp index 3f26c7b8614..33f1548411c 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.cpp @@ -425,7 +425,7 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() // opened. case ReadSource::kNoOverlap: case ReadSource::kLastApplied: - case ReadSource::kAllCommittedSnapshot: + case ReadSource::kAllDurableSnapshot: break; } @@ -441,7 +441,7 @@ boost::optional<Timestamp> WiredTigerRecoveryUnit::getPointInTimeReadTimestamp() } return boost::none; case ReadSource::kNoOverlap: - case ReadSource::kAllCommittedSnapshot: + case ReadSource::kAllDurableSnapshot: invariant(!_readAtTimestamp.isNull()); return _readAtTimestamp; @@ -501,9 +501,9 @@ void WiredTigerRecoveryUnit::_txnOpen() { _readAtTimestamp = _beginTransactionAtNoOverlapTimestamp(session); break; } - case ReadSource::kAllCommittedSnapshot: { + case ReadSource::kAllDurableSnapshot: { if (_readAtTimestamp.isNull()) { - _readAtTimestamp = _beginTransactionAtAllCommittedTimestamp(session); + _readAtTimestamp = _beginTransactionAtAllDurableTimestamp(session); break; } // Intentionally continue to the next case to read at the _readAtTimestamp. @@ -527,17 +527,17 @@ void WiredTigerRecoveryUnit::_txnOpen() { LOG(3) << "WT begin_transaction for snapshot id " << _mySnapshotId; } -Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllCommittedTimestamp(WT_SESSION* session) { +Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllDurableTimestamp(WT_SESSION* session) { WiredTigerBeginTxnBlock txnOpen(session, _prepareConflictBehavior, _roundUpPreparedTimestamps, RoundUpReadTimestamp::kRound); - Timestamp txnTimestamp = Timestamp(_oplogManager->fetchAllCommittedValue(session->connection)); + Timestamp txnTimestamp = Timestamp(_oplogManager->fetchAllDurableValue(session->connection)); auto status = txnOpen.setReadSnapshot(txnTimestamp); fassert(50948, status); // Since this is not in a critical section, we might have rounded to oldest between - // calling getAllCommitted and setReadSnapshot. We need to get the actual read timestamp we + // calling getAllDurable and setReadSnapshot. We need to get the actual read timestamp we // used. auto readTimestamp = _getTransactionReadTimestamp(session); txnOpen.done(); @@ -547,7 +547,7 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtAllCommittedTimestamp(WT_SE Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSION* session) { auto lastApplied = _sessionCache->snapshotManager().getLocalSnapshot(); - Timestamp allCommitted = Timestamp(_oplogManager->fetchAllCommittedValue(session->connection)); + Timestamp allDurable = Timestamp(_oplogManager->fetchAllDurableValue(session->connection)); // When using timestamps for reads and writes, it's important that readers and writers don't // overlap with the timestamps they use. In other words, at any point in the system there should @@ -555,13 +555,13 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSI // at, or earlier than T. This time T is called the no-overlap point. Using the `kNoOverlap` // ReadSource will compute the most recent known time that is safe to read at. - // The no-overlap point is computed as the minimum of the storage engine's all-committed time + // The no-overlap point is computed as the minimum of the storage engine's all_durable time // and replication's last applied time. On primaries, the last applied time is updated as // transactions commit, which is not necessarily in the order they appear in the oplog. Thus - // the all-committed time is an appropriate value to read at. + // the all_durable time is an appropriate value to read at. - // On secondaries, however, the all-committed time, as computed by the storage engine, can - // advance before oplog application completes a batch. This is because the all-committed time + // On secondaries, however, the all_durable time, as computed by the storage engine, can + // advance before oplog application completes a batch. This is because the all_durable time // is only computed correctly if the storage engine is informed of commit timestamps in // increasing order. Because oplog application processes a batch of oplog entries out of order, // the timestamping requirement is not satisfied. Secondaries, however, only update the last @@ -569,11 +569,11 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSI // secondaries. // By taking the minimum of the two values, storage can compute a legal time to read at without - // knowledge of the replication state. The no-overlap point is the minimum of the all-committed + // knowledge of the replication state. The no-overlap point is the minimum of the all_durable // time, which represents the point where no transactions will commit any earlier, and // lastApplied, which represents the highest optime a node has applied, a point no readers // should read afterward. - Timestamp readTimestamp = (lastApplied) ? std::min(*lastApplied, allCommitted) : allCommitted; + Timestamp readTimestamp = (lastApplied) ? std::min(*lastApplied, allDurable) : allDurable; WiredTigerBeginTxnBlock txnOpen(session, _prepareConflictBehavior, @@ -582,7 +582,7 @@ Timestamp WiredTigerRecoveryUnit::_beginTransactionAtNoOverlapTimestamp(WT_SESSI auto status = txnOpen.setReadSnapshot(readTimestamp); fassert(51066, status); - // We might have rounded to oldest between calling getAllCommitted and setReadSnapshot. We need + // We might have rounded to oldest between calling getAllDurable and setReadSnapshot. We need // to get the actual read timestamp we used. readTimestamp = _getTransactionReadTimestamp(session); txnOpen.done(); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h index 7ed61acf29b..81483c51cd7 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_recovery_unit.h @@ -208,10 +208,10 @@ private: void _txnOpen(); /** - * Starts a transaction at the current all-committed timestamp. + * Starts a transaction at the current all_durable timestamp. * Returns the timestamp the transaction was started at. */ - Timestamp _beginTransactionAtAllCommittedTimestamp(WT_SESSION* session); + Timestamp _beginTransactionAtAllDurableTimestamp(WT_SESSION* session); /** * Starts a transaction at the no-overlap timestamp. Returns the timestamp the transaction diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index 5d9b45eeda0..46d2a49a6c0 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -598,10 +598,10 @@ void TransactionParticipant::Participant::_setReadSnapshot(OperationContext* opC o(lk).transactionMetricsObserver.onChooseReadTimestamp(readTimestamp); } else if (readConcernArgs.getOriginalLevel() == repl::ReadConcernLevel::kSnapshotReadConcern) { // For transactions with read concern level specified as 'snapshot', we will use - // 'kAllCommittedSnapshot' which ensures a snapshot with no 'holes'; that is, it is a state + // 'kAllDurableSnapshot' which ensures a snapshot with no 'holes'; that is, it is a state // of the system that could be reconstructed from the oplog. opCtx->recoveryUnit()->setTimestampReadSource( - RecoveryUnit::ReadSource::kAllCommittedSnapshot); + RecoveryUnit::ReadSource::kAllDurableSnapshot); const auto readTimestamp = repl::StorageInterface::get(opCtx)->getPointInTimeReadTimestamp(opCtx); @@ -661,10 +661,10 @@ TransactionParticipant::OplogSlotReserver::~OplogSlotReserver() { _recoveryUnit->abortUnitOfWork(); } - // After releasing the oplog hole, the "all committed timestamp" can advance past - // this oplog hole, if there are no other open holes. Check if we can advance the stable - // timestamp any further since a majority write may be waiting on the stable timestamp to - // advance beyond this oplog hole to acknowledge the write to the user. + // After releasing the oplog hole, the all_durable timestamp can advance past this oplog hole, + // if there are no other open holes. Check if we can advance the stable timestamp any further + // since a majority write may be waiting on the stable timestamp to advance beyond this oplog + // hole to acknowledge the write to the user. auto replCoord = repl::ReplicationCoordinator::get(_opCtx); replCoord->attemptToAdvanceStableTimestamp(); } |