diff options
author | Esha Maharishi <esha.maharishi@mongodb.com> | 2016-12-19 17:11:25 -0500 |
---|---|---|
committer | Esha Maharishi <esha.maharishi@mongodb.com> | 2016-12-21 17:10:36 -0500 |
commit | 6c4794728036f4a5e9169b895e2a9fb6c328812b (patch) | |
tree | 90645083a69b5a229376300d81934f80f10a2d1d | |
parent | 865a2da42dc56ca77f32a07cac5b7130ed21d7d0 (diff) | |
download | mongo-6c4794728036f4a5e9169b895e2a9fb6c328812b.tar.gz |
SERVER-27466 move transitionToPrimaryHangBeforeInitializingConfigDatabase to before taking the global exclusive lock
-rw-r--r-- | jstests/sharding/config_version_rollback.js | 44 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_external_state_impl.cpp | 15 | ||||
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 16 |
3 files changed, 43 insertions, 32 deletions
diff --git a/jstests/sharding/config_version_rollback.js b/jstests/sharding/config_version_rollback.js index bb066b8cc80..95dc114fb14 100644 --- a/jstests/sharding/config_version_rollback.js +++ b/jstests/sharding/config_version_rollback.js @@ -41,40 +41,40 @@ configsvr: '', storageEngine: 'wiredTiger', setParameter: { - "failpoint.transitionToPrimaryHangBeforeInitializingConfigDatabase": + "failpoint.transitionToPrimaryHangBeforeTakingGlobalExclusiveLock": "{'mode':'alwaysOn'}" } }); var conf = configRS.getReplSetConfig(); conf.settings = {catchUpTimeoutMillis: 0}; - configRS.initiate(conf); - var secondaries = configRS.getSecondaries(); - var origPriConn = configRS.getPrimary(); + // Ensure conf.members[0] is the only node that can become primary at first, so we know on which + // nodes to wait for transition to SECONDARY. + conf.members[1].priority = 0; + conf.members[2].priority = 0; + configRS.nodes[0].adminCommand({replSetInitiate: conf}); - // Ensure the primary is waiting to write the config.version document before stopping the oplog - // fetcher on the secondaries. - checkLog.contains( - origPriConn, - 'transition to primary - transitionToPrimaryHangBeforeInitializingConfigDatabase fail point enabled.'); + jsTest.log("Waiting for " + nodes[1] + " and " + nodes[2] + " to transition to SECONDARY."); + configRS.waitForState([nodes[1], nodes[2]], ReplSetTest.State.SECONDARY); - jsTest.log("Stopping the OplogFetcher on the secondaries"); - secondaries.forEach(function(node) { + jsTest.log("Stopping the OplogFetcher on all nodes"); + // Now that the secondaries have finished initial sync and are electable, stop replication. + nodes.forEach(function(node) { assert.commandWorked(node.getDB('admin').runCommand( {configureFailPoint: 'stopOplogFetcher', mode: 'alwaysOn'})); }); jsTest.log("Allowing the primary to write the config.version doc"); - // Note: since we didn't know which node would be elected to be the first primary, we had to - // turn this failpoint on for all nodes earlier. Since we do want the all future primaries to - // write the config.version doc immediately, we turn the failpoint off for all nodes now. nodes.forEach(function(node) { assert.commandWorked(node.adminCommand({ - configureFailPoint: "transitionToPrimaryHangBeforeInitializingConfigDatabase", + configureFailPoint: "transitionToPrimaryHangBeforeTakingGlobalExclusiveLock", mode: "off" })); }); + var origPriConn = configRS.getPrimary(); + var secondaries = configRS.getSecondaries(); + jsTest.log("Confirming that the primary has the config.version doc but the secondaries do not"); var origConfigVersionDoc; assert.soon(function() { @@ -86,10 +86,20 @@ assert.eq(null, secondary.getCollection('config.version').findOne()); }); - // Ensure manually deleting the config.version document is not allowed. + jsTest.log("Checking that manually deleting the config.version document is not allowed."); assert.writeErrorWithCode(origPriConn.getCollection('config.version').remove({}), 40302); assert.commandFailedWithCode(origPriConn.getDB('config').runCommand({drop: 'version'}), 40303); + jsTest.log("Making the secondaries electable by giving all nodes non-zero, equal priority."); + var res = configRS.getPrimary().adminCommand({replSetGetConfig: 1}); + assert.commandWorked(res); + conf = res.config; + conf.members[0].priority = 1; + conf.members[1].priority = 1; + conf.members[2].priority = 1; + conf.version++; + configRS.getPrimary().adminCommand({replSetReconfig: conf}); + jsTest.log("Stepping down original primary"); try { origPriConn.adminCommand({replSetStepDown: 60, force: true}); @@ -106,7 +116,7 @@ assert.neq(origConfigVersionDoc.clusterId, newConfigVersionDoc.clusterId); jsTest.log("Re-enabling replication on all nodes"); - secondaries.forEach(function(node) { + nodes.forEach(function(node) { assert.commandWorked( node.getDB('admin').runCommand({configureFailPoint: 'stopOplogFetcher', mode: 'off'})); }); diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index afee6b33dea..9413d1219af 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -87,7 +87,6 @@ #include "mongo/util/assert_util.h" #include "mongo/util/concurrency/thread_pool.h" #include "mongo/util/exit.h" -#include "mongo/util/fail_point_service.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/net/hostandport.h" @@ -97,8 +96,6 @@ namespace mongo { namespace repl { -MONGO_FP_DECLARE(transitionToPrimaryHangBeforeInitializingConfigDatabase); - namespace { using UniqueLock = stdx::unique_lock<stdx::mutex>; using LockGuard = stdx::lock_guard<stdx::mutex>; @@ -729,18 +726,6 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook fassertStatusOK(40107, status); if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) { - if (MONGO_FAIL_POINT(transitionToPrimaryHangBeforeInitializingConfigDatabase)) { - log() << "transition to primary - " - "transitionToPrimaryHangBeforeInitializingConfigDatabase fail point enabled. " - "Blocking until fail point is disabled."; - while (MONGO_FAIL_POINT(transitionToPrimaryHangBeforeInitializingConfigDatabase)) { - mongo::sleepsecs(1); - if (inShutdown()) { - break; - } - } - } - status = Grid::get(txn)->catalogManager()->initializeConfigDatabaseIfNeeded(txn); if (!status.isOK() && status != ErrorCodes::AlreadyInitialized) { if (ErrorCodes::isShutdownError(status.code())) { diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 8838b8dfaee..e012528b96c 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -79,6 +79,7 @@ #include "mongo/stdx/functional.h" #include "mongo/stdx/mutex.h" #include "mongo/util/assert_util.h" +#include "mongo/util/fail_point_service.h" #include "mongo/util/log.h" #include "mongo/util/scopeguard.h" #include "mongo/util/stacktrace.h" @@ -88,6 +89,8 @@ namespace mongo { namespace repl { +MONGO_FP_DECLARE(transitionToPrimaryHangBeforeTakingGlobalExclusiveLock); + using CallbackFn = executor::TaskExecutor::CallbackFn; using CallbackHandle = executor::TaskExecutor::CallbackHandle; using CBHandle = ReplicationExecutor::CallbackHandle; @@ -941,6 +944,19 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* txn) { _externalState->onDrainComplete(txn); + if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer && + MONGO_FAIL_POINT(transitionToPrimaryHangBeforeTakingGlobalExclusiveLock)) { + log() << "transition to primary - " + "transitionToPrimaryHangBeforeTakingGlobalExclusiveLock fail point enabled. " + "Blocking until fail point is disabled."; + while (MONGO_FAIL_POINT(transitionToPrimaryHangBeforeTakingGlobalExclusiveLock)) { + mongo::sleepsecs(1); + if (_inShutdown) { + break; + } + } + } + ScopedTransaction transaction(txn, MODE_X); Lock::GlobalWrite globalWriteLock(txn->lockState()); lk.lock(); |