SERVER-26222 MigrationManager::_abandonActiveMigrationsAndEnableManager should accept and handle kStopping state

author: Dianna Hohensee <dianna.hohensee@10gen.com> 2016-09-21 14:12:24 -0400
committer: Dianna Hohensee <dianna.hohensee@10gen.com> 2016-09-22 10:12:09 -0400
commit: b09ced0d5e3036bd742c989e1483037fe83458bb (patch)
tree: 3ca4983a52a3835d3de20de18dca5f8984f87e72 /src
parent: 069ac329a310b89daa411d54bb9a4bd8203abc8f (diff)
download: mongo-b09ced0d5e3036bd742c989e1483037fe83458bb.tar.gz
2 files changed, 22 insertions, 14 deletions
diff --git a/src/mongo/s/balancer/balancer.cpp b/src/mongo/s/balancer/balancer.cpp
index 6c275b02b3f..40982f8cd03 100644
--- a/src/mongo/s/balancer/balancer.cpp
+++ b/src/mongo/s/balancer/balancer.cpp
@@ -326,17 +326,15 @@ void Balancer::_mainThread() {
         break;
     }
 
-    if (!_stopRequested()) {
-        log() << "CSRS balancer thread is recovering";
+    log() << "CSRS balancer thread is recovering";
 
-        auto balancerConfig = Grid::get(txn.get())->getBalancerConfiguration();
-        _migrationManager.finishRecovery(txn.get(),
-                                         balancerConfig->getMaxChunkSizeBytes(),
-                                         balancerConfig->getSecondaryThrottle(),
-                                         balancerConfig->waitForDelete());
+    auto balancerConfig = Grid::get(txn.get())->getBalancerConfiguration();
+    _migrationManager.finishRecovery(txn.get(),
+                                     balancerConfig->getMaxChunkSizeBytes(),
+                                     balancerConfig->getSecondaryThrottle(),
+                                     balancerConfig->waitForDelete());
 
-        log() << "CSRS balancer thread is recovered";
-    }
+    log() << "CSRS balancer thread is recovered";
 
     // Main balancer loop
     while (!_stopRequested()) {
diff --git a/src/mongo/s/balancer/migration_manager.cpp b/src/mongo/s/balancer/migration_manager.cpp
index 1afda23d426..ff701c17a04 100644
--- a/src/mongo/s/balancer/migration_manager.cpp
+++ b/src/mongo/s/balancer/migration_manager.cpp
@@ -377,8 +377,10 @@ void MigrationManager::finishRecovery(OperationContext* txn,
                                       bool waitForDelete) {
     {
         stdx::lock_guard<stdx::mutex> lock(_mutex);
-        if (_state == State::kStopping)
+        if (_state == State::kStopping) {
+            _migrationRecoveryMap.clear();
             return;
+        }
 
         // If recovery was abandoned in startRecovery, then there is no more to do.
         if (_state == State::kEnabled) {
@@ -389,6 +391,11 @@ void MigrationManager::finishRecovery(OperationContext* txn,
         invariant(_state == State::kRecovering);
     }
 
+    auto scopedGuard = MakeGuard([&] {
+        _migrationRecoveryMap.clear();
+        _abandonActiveMigrationsAndEnableManager(txn);
+    });
+
     // Schedule recovered migrations.
     vector<ScopedMigrationRequest> scopedMigrationRequests;
     vector<shared_ptr<Notification<Status>>> responses;
@@ -400,11 +407,9 @@ void MigrationManager::finishRecovery(OperationContext* txn,
             // config primary was active and the dist locks have been held by the balancer
             // throughout. Abort migration recovery.
             warning() << "Unable to reload chunk metadata for collection '"
-                      << nssAndMigrateInfos.first << "' during balancer"
-                      << " recovery. Abandoning recovery."
+                      << nssAndMigrateInfos.first
+                      << "' during balancer recovery. Abandoning recovery."
                       << causedBy(redact(scopedCMStatus.getStatus()));
-
-            _abandonActiveMigrationsAndEnableManager(txn);
             return;
         }
 
@@ -445,6 +450,7 @@ void MigrationManager::finishRecovery(OperationContext* txn,
     }
 
     _migrationRecoveryMap.clear();
+    scopedGuard.Dismiss();
 
     {
         stdx::lock_guard<stdx::mutex> lock(_mutex);
@@ -732,6 +738,10 @@ void MigrationManager::_waitForRecovery() {
 
 void MigrationManager::_abandonActiveMigrationsAndEnableManager(OperationContext* txn) {
     stdx::unique_lock<stdx::mutex> lock(_mutex);
+    if (_state == State::kStopping) {
+        // The balancer was interrupted. Let the next balancer recover the state.
+        return;
+    }
     invariant(_state == State::kRecovering);
 
     auto catalogClient = Grid::get(txn)->catalogClient(txn);
author	Dianna Hohensee <dianna.hohensee@10gen.com>	2016-09-21 14:12:24 -0400
committer	Dianna Hohensee <dianna.hohensee@10gen.com>	2016-09-22 10:12:09 -0400
commit	b09ced0d5e3036bd742c989e1483037fe83458bb (patch)
tree	3ca4983a52a3835d3de20de18dca5f8984f87e72 /src
parent	069ac329a310b89daa411d54bb9a4bd8203abc8f (diff)
download	mongo-b09ced0d5e3036bd742c989e1483037fe83458bb.tar.gz