summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorA. Jesse Jiryu Davis <jesse@mongodb.com>2020-11-02 17:05:22 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-20 19:55:51 +0000
commite3c4ee9c2549c9cfe7d63c528e03488d6d7d2385 (patch)
treecd299c5335e4106ef70e8dc822210129e8671f20
parent4d9f5f74f33bce0a54f5faba754aaba83d03b8bb (diff)
downloadmongo-e3c4ee9c2549c9cfe7d63c528e03488d6d7d2385.tar.gz
SERVER-33747 Fix crash when arbiter restarts and enters REMOVED
(cherry picked from commit 72aacd4ffaf6500777a8a51f87b0797f8ea8ad0b)
-rw-r--r--jstests/replsets/arbiter_new_hostname.js46
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h2
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp2
4 files changed, 56 insertions, 2 deletions
diff --git a/jstests/replsets/arbiter_new_hostname.js b/jstests/replsets/arbiter_new_hostname.js
new file mode 100644
index 00000000000..dd7bc11fd36
--- /dev/null
+++ b/jstests/replsets/arbiter_new_hostname.js
@@ -0,0 +1,46 @@
+/*
+ * An arbiter that is stopped and restarted on a different port and rejoins the
+ * replica set should enter removed state and should not start data replication.
+ *
+ * @tags: [
+ * requires_fcv_49,
+ * ]
+ */
+(function() {
+"use strict";
+const replTest = new ReplSetTest({name: 'test', nodes: 3});
+replTest.startSet();
+const nodes = replTest.nodeList();
+let config = {
+ "_id": "test",
+ "members": [
+ {"_id": 0, "host": nodes[0]},
+ {"_id": 1, "host": nodes[1]},
+ {"_id": 2, "host": nodes[2], arbiterOnly: true}
+ ]
+};
+replTest.initiate(config);
+
+let primary = replTest.getPrimary();
+replTest.awaitReplication();
+replTest.awaitSecondaryNodes();
+
+const arbiterId = 2;
+const newPort = replTest.getPort(arbiterId) + 1;
+jsTestLog("Restarting the arbiter node on a new port: " + newPort);
+replTest.stop(arbiterId);
+replTest.start(arbiterId, {port: newPort}, true);
+
+jsTestLog("Reconfiguring the set to change the arbiter's port.");
+config = replTest.getReplSetConfigFromNode();
+jsTestLog(`Original config: ${tojson(config)}`);
+
+const hostname = config.members[arbiterId].host.split(":")[0];
+config.version++;
+config.members[arbiterId].host = hostname + ":" + newPort;
+jsTestLog(`New config: ${tojson(config)}`);
+assert.commandWorked(primary.getDB("admin").runCommand({replSetReconfig: config}));
+replTest.awaitReplication();
+replTest.awaitNodesAgreeOnConfigVersion();
+replTest.stopSet();
+}());
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index ef4eed682ee..2a95493c59e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -672,7 +672,7 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig(
LOGV2_DEBUG(21320, 1, "Current term is now {term}", "Updated term", "term"_attr = term);
_performPostMemberStateUpdateAction(action);
- if (!isArbiter) {
+ if (!isArbiter && myIndex.getValue() != -1) {
_externalState->startThreads(_settings);
_startDataReplication(opCtx.get());
}
@@ -708,6 +708,12 @@ void ReplicationCoordinatorImpl::_stopDataReplication(OperationContext* opCtx) {
void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx,
std::function<void()> startCompleted) {
+ if (_startedSteadyStateReplication.load()) {
+ return;
+ }
+
+ _startedSteadyStateReplication.store(true);
+
// Check to see if we need to do an initial sync.
const auto lastOpTime = getMyLastAppliedOpTime();
const auto needsInitialSync =
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 3a3ee224169..bff4f44ee30 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1624,6 +1624,8 @@ private:
// here so we can update our term to match as part of finishing stepdown.
boost::optional<long long> _pendingTermUpdateDuringStepDown; // (M)
+ AtomicWord<bool> _startedSteadyStateReplication{false};
+
// If we're in terminal shutdown. If true, we'll refuse to vote in elections.
bool _inTerminalShutdown = false; // (M)
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 042d0d488c3..abdbe302ebe 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -654,7 +654,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore(
}
}
- if (!isArbiter && isFirstConfig) {
+ if (!isArbiter && myIndex.isOK() && myIndex.getValue() != -1) {
shouldStartDataReplication = true;
}