From 89c3502129303b41b8d35bf5d64eb0a242f061da Mon Sep 17 00:00:00 2001 From: Pavi Vetriselvan Date: Wed, 16 Jan 2019 17:21:04 -0500 Subject: SERVER-38302 recalculate stable timestamp after adding finishOpTime on prepared txn commit --- src/mongo/db/repl/replication_coordinator.h | 7 +++++++ src/mongo/db/repl/replication_coordinator_impl.cpp | 13 +++++++++---- src/mongo/db/repl/replication_coordinator_impl.h | 21 +++++++++++++++++++-- .../db/repl/replication_coordinator_impl_test.cpp | 2 +- src/mongo/db/repl/replication_coordinator_mock.cpp | 4 ++++ src/mongo/db/repl/replication_coordinator_mock.h | 2 ++ src/mongo/db/transaction_participant.cpp | 6 ++++++ .../embedded/replication_coordinator_embedded.cpp | 4 ++++ .../embedded/replication_coordinator_embedded.h | 2 ++ 9 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index 780d94e2596..229ee7e5df3 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -813,6 +813,13 @@ public: */ virtual bool setContainsArbiter() const = 0; + /** + * Getter that exposes _recalculateStableOpTime to the Transaction Participant so we can + * recalculate the stable timestamp when needed. + */ + virtual void recalculateStableOpTime() = 0; + + protected: ReplicationCoordinator(); }; diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 0b11be503fb..719370c8675 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -3223,12 +3223,17 @@ std::set ReplicationCoordinatorImpl::getStableOpTimeCandidates_forTest() return _stableOpTimeCandidates; } -boost::optional ReplicationCoordinatorImpl::getStableOpTime_forTest() { +boost::optional ReplicationCoordinatorImpl::recalculateStableOpTime_forTest() { stdx::unique_lock lk(_mutex); - return _getStableOpTime(lk); + return _recalculateStableOpTime(lk); } -boost::optional ReplicationCoordinatorImpl::_getStableOpTime(WithLock lk) { +void ReplicationCoordinatorImpl::recalculateStableOpTime() { + stdx::unique_lock lk(_mutex); + _recalculateStableOpTime(lk); +} + +boost::optional ReplicationCoordinatorImpl::_recalculateStableOpTime(WithLock lk) { auto commitPoint = _topCoord->getLastCommittedOpTime(); if (_currentCommittedSnapshot) { auto snapshotOpTime = *_currentCommittedSnapshot; @@ -3264,7 +3269,7 @@ boost::optional ReplicationCoordinatorImpl::_getStableOpTime(WithLock lk void ReplicationCoordinatorImpl::_setStableTimestampForStorage(WithLock lk) { // Get the current stable optime. - auto stableOpTime = _getStableOpTime(lk); + auto stableOpTime = _recalculateStableOpTime(lk); // If there is a valid stable optime, set it for the storage engine, and then remove any // old, unneeded stable optime candidates. diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 4e81b40b93a..5c8baf6cf9e 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -305,6 +305,23 @@ public: virtual bool setContainsArbiter() const override; + /** + * Getter that exposes _recalculateStableOpTime to the Transaction Participant so we can + * recalculate the stable timestamp when we add a finishOpTime to ServerTransactionsMetrics. + * This is necessary in the case that the commit point advances to include a prepared + * transaction's commit/abort oplog entry before the metrics are updated. + * + * When we advance the commit point, we calculate and set the stable timestamp for storage. + * But, since we did not update ServerTransactionsMetrics in time, the replication + * coordinator will not know to advance the stable optime. If no new operation triggers a + * new calculation, anything waiting for a new committed snapshot will hang. + + * To prevent that, we explicitly recalculate the stable timestamp by calling + * _recalculateStableOpTime when we add a finishOpTime to ServerTransactionsMetrics in the + * Transaction Participant. + */ + virtual void recalculateStableOpTime() override; + // ================== Test support API =================== /** @@ -360,7 +377,7 @@ public: const OpTime& maximumStableOpTime); void cleanupStableOpTimeCandidates_forTest(std::set* candidates, OpTime stableOpTime); std::set getStableOpTimeCandidates_forTest(); - boost::optional getStableOpTime_forTest(); + boost::optional recalculateStableOpTime_forTest(); /** * Non-blocking version of updateTerm. @@ -1026,7 +1043,7 @@ private: * A helper method that returns the current stable optime based on the current commit point and * set of stable optime candidates. */ - boost::optional _getStableOpTime(WithLock lk); + boost::optional _recalculateStableOpTime(WithLock lk); /** * Calculates the 'stable' replication optime given a set of optime candidates and a maximum diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index 0674d4a88d4..351bf22dbe7 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -4206,7 +4206,7 @@ TEST_F(StableOpTimeTest, ClearOpTimeCandidatesPastCommonPointAfterRollback) { // Make sure the stable optime candidate set has been cleared of all entries past the common // point. opTimeCandidates = repl->getStableOpTimeCandidates_forTest(); - auto stableOpTime = repl->getStableOpTime_forTest(); + auto stableOpTime = repl->recalculateStableOpTime_forTest(); ASSERT(stableOpTime); expectedOpTimeCandidates = {*stableOpTime}; ASSERT_OPTIME_SET_EQ(expectedOpTimeCandidates, opTimeCandidates); diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 8c48d9b69a3..040acc9c965 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -494,5 +494,9 @@ bool ReplicationCoordinatorMock::setContainsArbiter() const { return false; } +void ReplicationCoordinatorMock::recalculateStableOpTime() { + return; +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index 72d2508ce36..ed18860e16c 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -288,6 +288,8 @@ public: virtual bool setContainsArbiter() const override; + virtual void recalculateStableOpTime() override; + private: AtomicWord _snapshotNameGenerator; ServiceContext* const _service; diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp index ce70544fc03..fac08e4dfc2 100644 --- a/src/mongo/db/transaction_participant.cpp +++ b/src/mongo/db/transaction_participant.cpp @@ -1160,6 +1160,12 @@ void TransactionParticipant::_finishCommitTransaction(WithLock lk, OperationCont CurOp::get(opCtx)->debug().storageStats); } + // After writing down the commit oplog entry and adding a finishOpTime to + // ServerTransactionsMetrics, recalculate the stable optime to ensure we correctly advance + // the stable timestamp. + auto replCoord = repl::ReplicationCoordinator::get(opCtx); + replCoord->recalculateStableOpTime(); + // We must clear the recovery unit and locker so any post-transaction writes can run without // transactional settings such as a read timestamp. _cleanUpTxnResourceOnOpCtx(lk, opCtx, TerminationCause::kCommitted); diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp index 6219b0553f8..199bebd9d18 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.cpp +++ b/src/mongo/embedded/replication_coordinator_embedded.cpp @@ -440,5 +440,9 @@ bool ReplicationCoordinatorEmbedded::setContainsArbiter() const { UASSERT_NOT_IMPLEMENTED; } +void ReplicationCoordinatorEmbedded::recalculateStableOpTime() { + UASSERT_NOT_IMPLEMENTED; +} + } // namespace embedded } // namespace mongo diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h index 5c46e002124..9e185f62a08 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.h +++ b/src/mongo/embedded/replication_coordinator_embedded.h @@ -232,6 +232,8 @@ public: bool setContainsArbiter() const override; + void recalculateStableOpTime() override; + private: // Back pointer to the ServiceContext that has started the instance. ServiceContext* const _service; -- cgit v1.2.1