diff options
author | William Schultz <william.schultz@mongodb.com> | 2017-04-03 17:11:00 -0400 |
---|---|---|
committer | William Schultz <william.schultz@mongodb.com> | 2017-04-03 17:12:05 -0400 |
commit | 136b728f6b51ce6155335652ea64640a4ae194c6 (patch) | |
tree | 2cb3cff7aa48da73558b61038e4894c199f45316 /src | |
parent | d8239071cb93ab3e1c900b08065eb58bcfd07dab (diff) | |
download | mongo-136b728f6b51ce6155335652ea64640a4ae194c6.tar.gz |
SERVER-26360 Node should be able to leave RECOVERING after going too stale
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/repl/bgsync.cpp | 24 | ||||
-rw-r--r-- | src/mongo/db/repl/bgsync.h | 42 |
2 files changed, 53 insertions, 13 deletions
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp index 0475e2fba9f..1ad89082db1 100644 --- a/src/mongo/db/repl/bgsync.cpp +++ b/src/mongo/db/repl/bgsync.cpp @@ -323,10 +323,20 @@ void BackgroundSync::_produce(OperationContext* opCtx) { return; } + // We only need to mark ourselves as too stale once. + if (_tooStale) { + return; + } + + // Mark yourself as too stale. + _tooStale = true; + error() << "too stale to catch up -- entering maintenance mode"; log() << "Our newest OpTime : " << lastOpTimeFetched; log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen; log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember"; + + // Activate maintenance mode and transition to RECOVERING. auto status = _replCoord->setMaintenanceMode(true); if (!status.isOK()) { warning() << "Failed to transition into maintenance mode: " << status; @@ -361,6 +371,20 @@ void BackgroundSync::_produce(OperationContext* opCtx) { return; } + // If we find a good sync source after having gone too stale, disable maintenance mode so we can + // transition to SECONDARY. + if (_tooStale) { + + _tooStale = false; + + log() << "No longer too stale. Able to sync from " << _syncSourceHost; + + auto status = _replCoord->setMaintenanceMode(false); + if (!status.isOK()) { + warning() << "Failed to leave maintenance mode: " << status; + } + } + long long lastHashFetched; { stdx::lock_guard<stdx::mutex> lock(_mutex); diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h index 77d9e6618be..fc18a4dd6c3 100644 --- a/src/mongo/db/repl/bgsync.h +++ b/src/mongo/db/repl/bgsync.h @@ -185,31 +185,47 @@ private: // A pointer to the replication coordinator external state. ReplicationCoordinatorExternalState* _replicationCoordinatorExternalState; - // _mutex protects all of the class variables declared below. - // - // Never hold bgsync mutex when trying to acquire the ReplicationCoordinator mutex. - mutable stdx::mutex _mutex; + /** + * All member variables are labeled with one of the following codes indicating the + * synchronization rules for accessing them: + * + * (PR) Completely private to BackgroundSync. Can be read or written to from within the main + * BackgroundSync thread without synchronization. Shouldn't be accessed outside of this + * thread. + * + * (S) Self-synchronizing; access in any way from any context. + * + * (M) Reads and writes guarded by _mutex + * + */ - OpTime _lastOpTimeFetched; + // Protects member data of BackgroundSync. + // Never hold the BackgroundSync mutex when trying to acquire the ReplicationCoordinator mutex. + mutable stdx::mutex _mutex; // (S) - // lastFetchedHash is used to match ops to determine if we need to rollback, when - // a secondary. - long long _lastFetchedHash = 0LL; + OpTime _lastOpTimeFetched; // (M) + + // lastFetchedHash is used to match ops to determine if we need to rollback, when a secondary. + long long _lastFetchedHash = 0LL; // (M) // Thread running producerThread(). - std::unique_ptr<stdx::thread> _producerThread; + std::unique_ptr<stdx::thread> _producerThread; // (M) // Set to true if shutdown() has been called. - bool _inShutdown = false; + bool _inShutdown = false; // (M) + + // Flag that marks whether a node's oplog has no common point with any + // potential sync sources. + bool _tooStale = false; // (PR) - ProducerState _state = ProducerState::Starting; + ProducerState _state = ProducerState::Starting; // (M) - HostAndPort _syncSourceHost; + HostAndPort _syncSourceHost; // (M) // Current sync source resolver validating sync source candidates. // Pointer may be read on any thread that locks _mutex or unlocked on the BGSync thread. It can // only be written to by the BGSync thread while holding _mutex. - std::unique_ptr<SyncSourceResolver> _syncSourceResolver; + std::unique_ptr<SyncSourceResolver> _syncSourceResolver; // (M) // Current oplog fetcher tailing the oplog on the sync source. std::unique_ptr<OplogFetcher> _oplogFetcher; |