summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2016-12-19 17:11:25 -0500
committerEsha Maharishi <esha.maharishi@mongodb.com>2016-12-21 17:10:36 -0500
commit6c4794728036f4a5e9169b895e2a9fb6c328812b (patch)
tree90645083a69b5a229376300d81934f80f10a2d1d
parent865a2da42dc56ca77f32a07cac5b7130ed21d7d0 (diff)
downloadmongo-6c4794728036f4a5e9169b895e2a9fb6c328812b.tar.gz
SERVER-27466 move transitionToPrimaryHangBeforeInitializingConfigDatabase to before taking the global exclusive lock
-rw-r--r--jstests/sharding/config_version_rollback.js44
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp15
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp16
3 files changed, 43 insertions, 32 deletions
diff --git a/jstests/sharding/config_version_rollback.js b/jstests/sharding/config_version_rollback.js
index bb066b8cc80..95dc114fb14 100644
--- a/jstests/sharding/config_version_rollback.js
+++ b/jstests/sharding/config_version_rollback.js
@@ -41,40 +41,40 @@
configsvr: '',
storageEngine: 'wiredTiger',
setParameter: {
- "failpoint.transitionToPrimaryHangBeforeInitializingConfigDatabase":
+ "failpoint.transitionToPrimaryHangBeforeTakingGlobalExclusiveLock":
"{'mode':'alwaysOn'}"
}
});
var conf = configRS.getReplSetConfig();
conf.settings = {catchUpTimeoutMillis: 0};
- configRS.initiate(conf);
- var secondaries = configRS.getSecondaries();
- var origPriConn = configRS.getPrimary();
+ // Ensure conf.members[0] is the only node that can become primary at first, so we know on which
+ // nodes to wait for transition to SECONDARY.
+ conf.members[1].priority = 0;
+ conf.members[2].priority = 0;
+ configRS.nodes[0].adminCommand({replSetInitiate: conf});
- // Ensure the primary is waiting to write the config.version document before stopping the oplog
- // fetcher on the secondaries.
- checkLog.contains(
- origPriConn,
- 'transition to primary - transitionToPrimaryHangBeforeInitializingConfigDatabase fail point enabled.');
+ jsTest.log("Waiting for " + nodes[1] + " and " + nodes[2] + " to transition to SECONDARY.");
+ configRS.waitForState([nodes[1], nodes[2]], ReplSetTest.State.SECONDARY);
- jsTest.log("Stopping the OplogFetcher on the secondaries");
- secondaries.forEach(function(node) {
+ jsTest.log("Stopping the OplogFetcher on all nodes");
+ // Now that the secondaries have finished initial sync and are electable, stop replication.
+ nodes.forEach(function(node) {
assert.commandWorked(node.getDB('admin').runCommand(
{configureFailPoint: 'stopOplogFetcher', mode: 'alwaysOn'}));
});
jsTest.log("Allowing the primary to write the config.version doc");
- // Note: since we didn't know which node would be elected to be the first primary, we had to
- // turn this failpoint on for all nodes earlier. Since we do want the all future primaries to
- // write the config.version doc immediately, we turn the failpoint off for all nodes now.
nodes.forEach(function(node) {
assert.commandWorked(node.adminCommand({
- configureFailPoint: "transitionToPrimaryHangBeforeInitializingConfigDatabase",
+ configureFailPoint: "transitionToPrimaryHangBeforeTakingGlobalExclusiveLock",
mode: "off"
}));
});
+ var origPriConn = configRS.getPrimary();
+ var secondaries = configRS.getSecondaries();
+
jsTest.log("Confirming that the primary has the config.version doc but the secondaries do not");
var origConfigVersionDoc;
assert.soon(function() {
@@ -86,10 +86,20 @@
assert.eq(null, secondary.getCollection('config.version').findOne());
});
- // Ensure manually deleting the config.version document is not allowed.
+ jsTest.log("Checking that manually deleting the config.version document is not allowed.");
assert.writeErrorWithCode(origPriConn.getCollection('config.version').remove({}), 40302);
assert.commandFailedWithCode(origPriConn.getDB('config').runCommand({drop: 'version'}), 40303);
+ jsTest.log("Making the secondaries electable by giving all nodes non-zero, equal priority.");
+ var res = configRS.getPrimary().adminCommand({replSetGetConfig: 1});
+ assert.commandWorked(res);
+ conf = res.config;
+ conf.members[0].priority = 1;
+ conf.members[1].priority = 1;
+ conf.members[2].priority = 1;
+ conf.version++;
+ configRS.getPrimary().adminCommand({replSetReconfig: conf});
+
jsTest.log("Stepping down original primary");
try {
origPriConn.adminCommand({replSetStepDown: 60, force: true});
@@ -106,7 +116,7 @@
assert.neq(origConfigVersionDoc.clusterId, newConfigVersionDoc.clusterId);
jsTest.log("Re-enabling replication on all nodes");
- secondaries.forEach(function(node) {
+ nodes.forEach(function(node) {
assert.commandWorked(
node.getDB('admin').runCommand({configureFailPoint: 'stopOplogFetcher', mode: 'off'}));
});
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index afee6b33dea..9413d1219af 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -87,7 +87,6 @@
#include "mongo/util/assert_util.h"
#include "mongo/util/concurrency/thread_pool.h"
#include "mongo/util/exit.h"
-#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
#include "mongo/util/net/hostandport.h"
@@ -97,8 +96,6 @@
namespace mongo {
namespace repl {
-MONGO_FP_DECLARE(transitionToPrimaryHangBeforeInitializingConfigDatabase);
-
namespace {
using UniqueLock = stdx::unique_lock<stdx::mutex>;
using LockGuard = stdx::lock_guard<stdx::mutex>;
@@ -729,18 +726,6 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
fassertStatusOK(40107, status);
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
- if (MONGO_FAIL_POINT(transitionToPrimaryHangBeforeInitializingConfigDatabase)) {
- log() << "transition to primary - "
- "transitionToPrimaryHangBeforeInitializingConfigDatabase fail point enabled. "
- "Blocking until fail point is disabled.";
- while (MONGO_FAIL_POINT(transitionToPrimaryHangBeforeInitializingConfigDatabase)) {
- mongo::sleepsecs(1);
- if (inShutdown()) {
- break;
- }
- }
- }
-
status = Grid::get(txn)->catalogManager()->initializeConfigDatabaseIfNeeded(txn);
if (!status.isOK() && status != ErrorCodes::AlreadyInitialized) {
if (ErrorCodes::isShutdownError(status.code())) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 8838b8dfaee..e012528b96c 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -79,6 +79,7 @@
#include "mongo/stdx/functional.h"
#include "mongo/stdx/mutex.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/fail_point_service.h"
#include "mongo/util/log.h"
#include "mongo/util/scopeguard.h"
#include "mongo/util/stacktrace.h"
@@ -88,6 +89,8 @@
namespace mongo {
namespace repl {
+MONGO_FP_DECLARE(transitionToPrimaryHangBeforeTakingGlobalExclusiveLock);
+
using CallbackFn = executor::TaskExecutor::CallbackFn;
using CallbackHandle = executor::TaskExecutor::CallbackHandle;
using CBHandle = ReplicationExecutor::CallbackHandle;
@@ -941,6 +944,19 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* txn) {
_externalState->onDrainComplete(txn);
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer &&
+ MONGO_FAIL_POINT(transitionToPrimaryHangBeforeTakingGlobalExclusiveLock)) {
+ log() << "transition to primary - "
+ "transitionToPrimaryHangBeforeTakingGlobalExclusiveLock fail point enabled. "
+ "Blocking until fail point is disabled.";
+ while (MONGO_FAIL_POINT(transitionToPrimaryHangBeforeTakingGlobalExclusiveLock)) {
+ mongo::sleepsecs(1);
+ if (_inShutdown) {
+ break;
+ }
+ }
+ }
+
ScopedTransaction transaction(txn, MODE_X);
Lock::GlobalWrite globalWriteLock(txn->lockState());
lk.lock();