From 9fcb1f6911dc7904163515f45b361a1333a55310 Mon Sep 17 00:00:00 2001 From: "A. Jesse Jiryu Davis" Date: Mon, 2 Nov 2020 17:05:22 -0500 Subject: SERVER-33747 Fix crash when arbiter restarts and enters REMOVED (cherry picked from commit 72aacd4ffaf6500777a8a51f87b0797f8ea8ad0b) --- jstests/replsets/arbiter_new_hostname.js | 46 ++++++++++++++++++++++ src/mongo/db/repl/replication_coordinator_impl.cpp | 7 +++- src/mongo/db/repl/replication_coordinator_impl.h | 2 + .../replication_coordinator_impl_heartbeat.cpp | 2 +- 4 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 jstests/replsets/arbiter_new_hostname.js diff --git a/jstests/replsets/arbiter_new_hostname.js b/jstests/replsets/arbiter_new_hostname.js new file mode 100644 index 00000000000..4850cacead9 --- /dev/null +++ b/jstests/replsets/arbiter_new_hostname.js @@ -0,0 +1,46 @@ +/* + * An arbiter that is stopped and restarted on a different port and rejoins the + * replica set should enter removed state and should not start data replication. + * + * @tags: [ + * requires_fcv_49, + * ] + */ +(function() { + "use strict"; + const replTest = new ReplSetTest({name: 'test', nodes: 3}); + replTest.startSet(); + const nodes = replTest.nodeList(); + let config = { + "_id": "test", + "members": [ + {"_id": 0, "host": nodes[0]}, + {"_id": 1, "host": nodes[1]}, + {"_id": 2, "host": nodes[2], arbiterOnly: true} + ] + }; + replTest.initiate(config); + + let primary = replTest.getPrimary(); + replTest.awaitReplication(); + replTest.awaitSecondaryNodes(); + + const arbiterId = 2; + const newPort = replTest.getPort(arbiterId) + 1; + jsTestLog("Restarting the arbiter node on a new port: " + newPort); + replTest.stop(arbiterId); + replTest.start(arbiterId, {port: newPort}, true); + + jsTestLog("Reconfiguring the set to change the arbiter's port."); + config = replTest.getReplSetConfigFromNode(); + jsTestLog(`Original config: ${tojson(config)}`); + + const hostname = config.members[arbiterId].host.split(":")[0]; + config.version++; + config.members[arbiterId].host = hostname + ":" + newPort; + jsTestLog(`New config: ${tojson(config)}`); + assert.commandWorked(primary.getDB("admin").runCommand({replSetReconfig: config})); + replTest.awaitReplication(); + replTest.awaitNodesAgreeOnConfigVersion(); + replTest.stopSet(); +}()); diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index a6a4d0084bb..dc7ba40996a 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -623,7 +623,7 @@ void ReplicationCoordinatorImpl::_finishLoadLocalConfig( _performPostMemberStateUpdateAction(action); } - if (!isArbiter) { + if (!isArbiter && myIndex.getValue() != -1) { _externalState->startThreads(_settings); _startDataReplication(opCtx.get()); } @@ -652,6 +652,11 @@ void ReplicationCoordinatorImpl::_stopDataReplication(OperationContext* opCtx) { void ReplicationCoordinatorImpl::_startDataReplication(OperationContext* opCtx, stdx::function startCompleted) { + if (_startedSteadyStateReplication.load()) { + return; + } + + _startedSteadyStateReplication.store(true); // Check to see if we need to do an initial sync. const auto lastOpTime = getMyLastAppliedOpTime(); const auto needsInitialSync = diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index 0127e2807aa..105269b8855 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -1383,6 +1383,8 @@ private: // here so we can update our term to match as part of finishing stepdown. boost::optional _pendingTermUpdateDuringStepDown; // (M) + AtomicWord _startedSteadyStateReplication{false}; + // If we're in terminal shutdown. If true, we'll refuse to vote in elections. bool _inTerminalShutdown = false; // (M) }; diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp index 1934ad0c40b..36293c867ba 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp @@ -550,7 +550,7 @@ void ReplicationCoordinatorImpl::_heartbeatReconfigStore( } } - if (!isArbiter && isFirstConfig) { + if (!isArbiter && myIndex.isOK() && myIndex.getValue() != -1) { shouldStartDataReplication = true; } -- cgit v1.2.1