diff options
author | A. Jesse Jiryu Davis <jesse@mongodb.com> | 2020-11-02 17:05:22 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-20 19:55:51 +0000 |
commit | e3c4ee9c2549c9cfe7d63c528e03488d6d7d2385 (patch) | |
tree | cd299c5335e4106ef70e8dc822210129e8671f20 | |
parent | 4d9f5f74f33bce0a54f5faba754aaba83d03b8bb (diff) | |
download | mongo-e3c4ee9c2549c9cfe7d63c528e03488d6d7d2385.tar.gz |
SERVER-33747 Fix crash when arbiter restarts and enters REMOVED
(cherry picked from commit 72aacd4ffaf6500777a8a51f87b0797f8ea8ad0b)
4 files changed, 56 insertions, 2 deletions
diff --git a/jstests/replsets/arbiter_new_hostname.js b/jstests/replsets/arbiter_new_hostname.js new file mode 100644 index 00000000000..dd7bc11fd36 --- /dev/null +++ b/jstests/replsets/arbiter_new_hostname.js @@ -0,0 +1,46 @@ +/* + * An arbiter that is stopped and restarted on a different port and rejoins the + * replica set should enter removed state and should not start data replication. + * + * @tags: [ + * requires_fcv_49, + * ] + */ +(function() { +"use strict"; +const replTest = new ReplSetTest({name: 'test', nodes: 3}); +replTest.startSet(); +const nodes = replTest.nodeList(); +let config = { + "_id": "test", + "members": [ + {"_id": 0, "host": nodes[0]}, + {"_id": 1, "host": nodes[1]}, + {"_id": 2, "host": nodes[2], arbiterOnly: true} + ] +}; +replTest.initiate(config); + +let primary = replTest.getPrimary(); +replTest.awaitReplication(); +replTest.awaitSecondaryNodes(); + +const arbiterId = 2; +const newPort = replTest.getPort(arbiterId) + 1; +jsTestLog("Restarting the arbiter node on a new port: " + newPort); +replTest.stop(arbiterId); +replTest.start(arbiterId, {port: newPort}, true); + +jsTestLog("Reconfiguring the set to change the arbiter's port."); +config = replTest.getReplSetConfigFromNode(); +jsTestLog(`Original config: ${tojson(config)}`); + +const hostname = config.members[arbiterId].host.split(":")[0]; +config.version++; +config.members[arbiterId].host = hostname + ":" + newPort; +jsTestLog(`New config: ${tojson(config)}`); +assert.commandWorked(primary.getDB("admin").runCommand({replSetReconfig: config})); +replTest.awaitReplication(); +replTest.awaitNodesAgreeOnConfigVersion(); +replTest.stopSet(); +}()); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index ef4eed682ee..2a95493c59e 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -672,7 +672,7 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig( LOGV2_DEBUG(21320, 1, "Current term is now {term}", "Updated term", "term"_attr = term); _performPostMemberStateUpdateAction(action); - if (!isArbiter) { + if (!isArbiter && myIndex.getValue() != -1) { _externalState->startThreads(_settings); _startDataReplication(opCtx.get()); } @@ -708,6 +708,12 @@ void ReplicationCoordinatorImpl::_stopDataReplication(OperationContext* opCtx) { void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx, std::function<void()> startCompleted) { + if (_startedSteadyStateReplication.load()) { + return; + } + + _startedSteadyStateReplication.store(true); + // Check to see if we need to do an initial sync. const auto lastOpTime = getMyLastAppliedOpTime(); const auto needsInitialSync = diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 3a3ee224169..bff4f44ee30 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -1624,6 +1624,8 @@ private: // here so we can update our term to match as part of finishing stepdown. boost::optional<long long> _pendingTermUpdateDuringStepDown; // (M) + AtomicWord<bool> _startedSteadyStateReplication{false}; + // If we're in terminal shutdown. If true, we'll refuse to vote in elections. bool _inTerminalShutdown = false; // (M) diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 042d0d488c3..abdbe302ebe 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -654,7 +654,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( } } - if (!isArbiter && isFirstConfig) { + if (!isArbiter && myIndex.isOK() && myIndex.getValue() != -1) { shouldStartDataReplication = true; } |