summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorWilliam Schultz <william.schultz@mongodb.com>2017-04-03 17:11:00 -0400
committerWilliam Schultz <william.schultz@mongodb.com>2017-04-17 14:03:06 -0400
commit31a2a5bcec525c9dc62cfdd06d126673f507c8df (patch)
tree0a9549db9b37bb73931d60f14988f2974b31e6f0 /src
parent7a6d8274f57cf141fd1d5c754ea1e2a1a6d2f372 (diff)
downloadmongo-31a2a5bcec525c9dc62cfdd06d126673f507c8df.tar.gz
SERVER-26360 Node should be able to leave RECOVERING after going too stale
(cherry picked from commit 136b728f6b51ce6155335652ea64640a4ae194c6)
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/repl/bgsync.cpp24
-rw-r--r--src/mongo/db/repl/bgsync.h42
2 files changed, 53 insertions, 13 deletions
diff --git a/src/mongo/db/repl/bgsync.cpp b/src/mongo/db/repl/bgsync.cpp
index 308e6c7ecce..41781d2fcc9 100644
--- a/src/mongo/db/repl/bgsync.cpp
+++ b/src/mongo/db/repl/bgsync.cpp
@@ -315,10 +315,20 @@ void BackgroundSync::_produce(OperationContext* opCtx) {
return;
}
+ // We only need to mark ourselves as too stale once.
+ if (_tooStale) {
+ return;
+ }
+
+ // Mark yourself as too stale.
+ _tooStale = true;
+
error() << "too stale to catch up -- entering maintenance mode";
log() << "Our newest OpTime : " << lastOpTimeFetched;
log() << "Earliest OpTime available is " << syncSourceResp.earliestOpTimeSeen;
log() << "See http://dochub.mongodb.org/core/resyncingaverystalereplicasetmember";
+
+ // Activate maintenance mode and transition to RECOVERING.
auto status = _replCoord->setMaintenanceMode(true);
if (!status.isOK()) {
warning() << "Failed to transition into maintenance mode: " << status;
@@ -353,6 +363,20 @@ void BackgroundSync::_produce(OperationContext* opCtx) {
return;
}
+ // If we find a good sync source after having gone too stale, disable maintenance mode so we can
+ // transition to SECONDARY.
+ if (_tooStale) {
+
+ _tooStale = false;
+
+ log() << "No longer too stale. Able to sync from " << _syncSourceHost;
+
+ auto status = _replCoord->setMaintenanceMode(false);
+ if (!status.isOK()) {
+ warning() << "Failed to leave maintenance mode: " << status;
+ }
+ }
+
long long lastHashFetched;
{
stdx::lock_guard<stdx::mutex> lock(_mutex);
diff --git a/src/mongo/db/repl/bgsync.h b/src/mongo/db/repl/bgsync.h
index ead036c79d2..6d068967723 100644
--- a/src/mongo/db/repl/bgsync.h
+++ b/src/mongo/db/repl/bgsync.h
@@ -174,31 +174,47 @@ private:
// A pointer to the replication coordinator external state.
ReplicationCoordinatorExternalState* _replicationCoordinatorExternalState;
- // _mutex protects all of the class variables declared below.
- //
- // Never hold bgsync mutex when trying to acquire the ReplicationCoordinator mutex.
- mutable stdx::mutex _mutex;
+ /**
+ * All member variables are labeled with one of the following codes indicating the
+ * synchronization rules for accessing them:
+ *
+ * (PR) Completely private to BackgroundSync. Can be read or written to from within the main
+ * BackgroundSync thread without synchronization. Shouldn't be accessed outside of this
+ * thread.
+ *
+ * (S) Self-synchronizing; access in any way from any context.
+ *
+ * (M) Reads and writes guarded by _mutex
+ *
+ */
- OpTime _lastOpTimeFetched;
+ // Protects member data of BackgroundSync.
+ // Never hold the BackgroundSync mutex when trying to acquire the ReplicationCoordinator mutex.
+ mutable stdx::mutex _mutex; // (S)
- // lastFetchedHash is used to match ops to determine if we need to rollback, when
- // a secondary.
- long long _lastFetchedHash = 0LL;
+ OpTime _lastOpTimeFetched; // (M)
+
+ // lastFetchedHash is used to match ops to determine if we need to rollback, when a secondary.
+ long long _lastFetchedHash = 0LL; // (M)
// Thread running producerThread().
- std::unique_ptr<stdx::thread> _producerThread;
+ std::unique_ptr<stdx::thread> _producerThread; // (M)
// Set to true if shutdown() has been called.
- bool _inShutdown = false;
+ bool _inShutdown = false; // (M)
+
+ // Flag that marks whether a node's oplog has no common point with any
+ // potential sync sources.
+ bool _tooStale = false; // (PR)
- ProducerState _state = ProducerState::Starting;
+ ProducerState _state = ProducerState::Starting; // (M)
- HostAndPort _syncSourceHost;
+ HostAndPort _syncSourceHost; // (M)
// Current sync source resolver validating sync source candidates.
// Pointer may be read on any thread that locks _mutex or unlocked on the BGSync thread. It can
// only be written to by the BGSync thread while holding _mutex.
- std::unique_ptr<SyncSourceResolver> _syncSourceResolver;
+ std::unique_ptr<SyncSourceResolver> _syncSourceResolver; // (M)
// Current oplog fetcher tailing the oplog on the sync source.
std::unique_ptr<OplogFetcher> _oplogFetcher;