summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorA. Jesse Jiryu Davis <jesse@mongodb.com>2020-11-18 22:10:23 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-20 20:20:02 +0000
commit8bc84de690e1de3cf2755032ac165fc4a3211441 (patch)
treeb0fc135b4337e2da34ad84cb6dcc5f86b4559270
parent9fcb1f6911dc7904163515f45b361a1333a55310 (diff)
downloadmongo-8bc84de690e1de3cf2755032ac165fc4a3211441.tar.gz
SERVER-52680 Start replication when leaving REMOVED state
(cherry picked from commit 73ab98a9094de18b82e596e8d1d0bf311858548b)
-rw-r--r--jstests/replsets/single_node_set_new_hostname.js50
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp15
2 files changed, 63 insertions, 2 deletions
diff --git a/jstests/replsets/single_node_set_new_hostname.js b/jstests/replsets/single_node_set_new_hostname.js
new file mode 100644
index 00000000000..43b21a18998
--- /dev/null
+++ b/jstests/replsets/single_node_set_new_hostname.js
@@ -0,0 +1,50 @@
+/*
+ * When a one-node set is restarted on a different port and reconfigured with the new port, it
+ * should re-elect itself.
+ *
+ * @tags: [
+ * requires_persistence,
+ * ]
+ */
+(function() {
+ "use strict";
+
+ load("jstests/replsets/rslib.js");
+
+ const replTest = new ReplSetTest({nodes: 1});
+ replTest.startSet();
+ replTest.initiate();
+ replTest.getPrimary();
+
+ /*
+ * Prepare to restart the sole node on a new port, it no longer finds itself in the old config.
+ */
+ const config = replTest.getReplSetConfigFromNode(0);
+ const newPort = replTest.getPort(0) + 1;
+ const hostname = config.members[0].host.split(":")[0];
+ const newHostAndPort = `${hostname}:${newPort}`;
+
+ jsTestLog("Restarting the sole node on a new port: " + newPort);
+ replTest.restart(0, {port: newPort});
+ let restartedNode;
+ assert.soonNoExcept(() => {
+ restartedNode = new Mongo(newHostAndPort);
+ return true;
+ }, `Couldn't connect to restarted node "${newHostAndPort}`);
+ waitForState(restartedNode, ReplSetTest.State.REMOVED);
+
+ /*
+ * Update the config to match the node's new port.
+ */
+ jsTestLog("Reconfiguring the set to change the sole node's port.");
+ jsTestLog(`Original config: ${tojson(config)}`);
+ config.version++;
+ config.members[0].host = newHostAndPort;
+ jsTestLog(`New config: ${tojson(config)}`);
+ // Force reconfig since the restarted node is in REMOVED state, not PRIMARY.
+ assert.commandWorked(
+ restartedNode.getDB("admin").runCommand({replSetReconfig: config, force: true}));
+ waitForState(restartedNode, ReplSetTest.State.PRIMARY);
+
+ replTest.stopSet();
+}());
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index dc7ba40996a..22699a68a6f 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -652,11 +652,11 @@ void ReplicationCoordinatorImpl::_stopDataReplication(OperationContext* opCtx) {
void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx,
stdx::function<void()> startCompleted) {
- if (_startedSteadyStateReplication.load()) {
+ if (_startedSteadyStateReplication.swap(true)) {
+ // This is not the first call.
return;
}
- _startedSteadyStateReplication.store(true);
// Check to see if we need to do an initial sync.
const auto lastOpTime = getMyLastAppliedOpTime();
const auto needsInitialSync =
@@ -2784,6 +2784,17 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator_inlock(
_cancelPriorityTakeover_inlock();
}
+ // Ensure replication is running if we are no longer REMOVED.
+ if (_memberState.removed() && !newState.arbiter()) {
+ log() << "Scheduling a task to begin or continue replication";
+ _scheduleWorkAt(_replExecutor->now(),
+ [=](const mongo::executor::TaskExecutor::CallbackArgs& cbData) {
+ _externalState->startThreads(_settings);
+ auto opCtx = cc().makeOperationContext();
+ _startDataReplication(opCtx.get());
+ });
+ }
+
log() << "transition to " << newState << " from " << _memberState << rsLog;
// Initializes the featureCompatibilityVersion to the default value, because arbiters do not
// receive the replicated version.