summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorA. Jesse Jiryu Davis <jesse@mongodb.com>2020-11-18 22:10:23 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-20 20:57:33 +0000
commitf66e8f7ad200d98fcf6b32f4330c7227af7cb517 (patch)
treed9521899fac84c2b50dda27ee047e87eece894c1
parent23054195b6bf8ae84d7c06610b062f22632402fd (diff)
downloadmongo-f66e8f7ad200d98fcf6b32f4330c7227af7cb517.tar.gz
SERVER-52680 Start replication when leaving REMOVED state
(cherry picked from commit 73ab98a9094de18b82e596e8d1d0bf311858548b)
-rw-r--r--jstests/replsets/single_node_set_new_hostname.js50
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp15
2 files changed, 63 insertions, 2 deletions
diff --git a/jstests/replsets/single_node_set_new_hostname.js b/jstests/replsets/single_node_set_new_hostname.js
new file mode 100644
index 00000000000..484cf65a1cf
--- /dev/null
+++ b/jstests/replsets/single_node_set_new_hostname.js
@@ -0,0 +1,50 @@
+/*
+ * When a one-node set is restarted on a different port and reconfigured with the new port, it
+ * should re-elect itself.
+ *
+ * @tags: [
+ * requires_persistence,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/replsets/rslib.js");
+
+const replTest = new ReplSetTest({nodes: 1});
+replTest.startSet();
+replTest.initiate();
+replTest.getPrimary();
+
+/*
+ * Prepare to restart the sole node on a new port, it no longer finds itself in the old config.
+ */
+const config = replTest.getReplSetConfigFromNode(0);
+const newPort = replTest.getPort(0) + 1;
+const hostname = config.members[0].host.split(":")[0];
+const newHostAndPort = `${hostname}:${newPort}`;
+
+jsTestLog("Restarting the sole node on a new port: " + newPort);
+replTest.restart(0, {port: newPort});
+let restartedNode;
+assert.soonNoExcept(() => {
+ restartedNode = new Mongo(newHostAndPort);
+ return true;
+}, `Couldn't connect to restarted node "${newHostAndPort}`);
+waitForState(restartedNode, ReplSetTest.State.REMOVED);
+
+/*
+ * Update the config to match the node's new port.
+ */
+jsTestLog("Reconfiguring the set to change the sole node's port.");
+jsTestLog(`Original config: ${tojson(config)}`);
+config.version++;
+config.members[0].host = newHostAndPort;
+jsTestLog(`New config: ${tojson(config)}`);
+// Force reconfig since the restarted node is in REMOVED state, not PRIMARY.
+assert.commandWorked(
+ restartedNode.getDB("admin").runCommand({replSetReconfig: config, force: true}));
+waitForState(restartedNode, ReplSetTest.State.PRIMARY);
+
+replTest.stopSet();
+}());
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 5ddb1475f06..7f022cbb0e4 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -681,11 +681,11 @@ void ReplicationCoordinatorImpl::_stopDataReplication(OperationContext* opCtx) {
void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx,
stdx::function<void()> startCompleted) {
- if (_startedSteadyStateReplication.load()) {
+ if (_startedSteadyStateReplication.swap(true)) {
+ // This is not the first call.
return;
}
- _startedSteadyStateReplication.store(true);
// Check to see if we need to do an initial sync.
const auto lastOpTime = getMyLastAppliedOpTime();
const auto needsInitialSync =
@@ -3031,6 +3031,17 @@ ReplicationCoordinatorImpl::_updateMemberStateFromTopologyCoordinator(WithLock l
_cancelPriorityTakeover_inlock();
}
+ // Ensure replication is running if we are no longer REMOVED.
+ if (_memberState.removed() && !newState.arbiter()) {
+ log() << "Scheduling a task to begin or continue replication";
+ _scheduleWorkAt(_replExecutor->now(),
+ [=](const mongo::executor::TaskExecutor::CallbackArgs& cbData) {
+ _externalState->startThreads(_settings);
+ auto opCtx = cc().makeOperationContext();
+ _startDataReplication(opCtx.get());
+ });
+ }
+
log() << "transition to " << newState << " from " << _memberState << rsLog;
// Initializes the featureCompatibilityVersion to the latest value, because arbiters do not
// receive the replicated version. This is to avoid bugs like SERVER-32639.