summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2016-01-27 21:26:42 -0500
committerBenety Goh <benety@mongodb.com>2016-02-05 13:32:33 -0500
commit0bc96b1ede73c82d45b6d6c2e0b9a33e613ca7c3 (patch)
tree371c6bdd31528769473229fbb37090238831f0a6
parentc58239c96ee83dab5232084cb69238de2b093c49 (diff)
downloadmongo-0bc96b1ede73c82d45b6d6c2e0b9a33e613ca7c3.tar.gz
SERVER-22287 added replica set ID validation to heartbeat response processing, replSetReconfigure and replica set quorum check.
-rw-r--r--buildscripts/resmokeconfig/suites/replica_sets_legacy.yml2
-rw-r--r--jstests/replsets/disallow_adding_initialized_node1.js88
-rw-r--r--jstests/replsets/disallow_adding_initialized_node2.js99
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change.cpp16
-rw-r--r--src/mongo/db/repl/check_quorum_for_config_change_test.cpp76
-rw-r--r--src/mongo/db/repl/replica_set_config_checks.cpp8
-rw-r--r--src/mongo/db/repl/replica_set_config_checks_test.cpp43
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp3
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp18
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp62
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp38
11 files changed, 450 insertions, 3 deletions
diff --git a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
index 9219a342714..44b1b7a572d 100644
--- a/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
+++ b/buildscripts/resmokeconfig/suites/replica_sets_legacy.yml
@@ -4,6 +4,8 @@ selector:
- jstests/replsets/*.js
exclude_files:
- jstests/replsets/config_server_checks.js
+ - jstests/replsets/disallow_adding_initialized_node1.js
+ - jstests/replsets/disallow_adding_initialized_node2.js
- jstests/replsets/last_op_visible.js
- jstests/replsets/oplog_truncated_on_recovery.js
- jstests/replsets/priority_takeover_cascading_priorities.js
diff --git a/jstests/replsets/disallow_adding_initialized_node1.js b/jstests/replsets/disallow_adding_initialized_node1.js
new file mode 100644
index 00000000000..fe348a81e54
--- /dev/null
+++ b/jstests/replsets/disallow_adding_initialized_node1.js
@@ -0,0 +1,88 @@
+// If a node is already in an active replica set, it is not possible to add this node to another
+// replica set.
+// Initialize two replica sets A and B with the same name: A_0; B_0
+// Add B_0 to the replica set A. This operation should fail on replica set A should fail on
+// detecting an inconsistent replica set ID in the heartbeat response metadata from B_0.
+(function () {
+ 'use strict';
+
+ var name = 'disallow_adding_initialized_node1';
+ var replSetA = new ReplSetTest({name: name, nodes: [
+ {rsConfig: {_id: 10}},
+ ]});
+ replSetA.startSet({dbpath : "$set-A-$node"});
+ replSetA.initiate();
+
+ var replSetB = new ReplSetTest({name: name, nodes: [
+ {rsConfig: {_id: 20}},
+ ]});
+ replSetB.startSet({dbpath : "$set-B-$node"});
+ replSetB.initiate();
+
+ var primaryA = replSetA.getPrimary();
+ var primaryB = replSetB.getPrimary();
+ jsTestLog('Before merging: primary A = ' + primaryA.host + '; primary B = ' + primaryB.host);
+
+ var configA = assert.commandWorked(primaryA.adminCommand({replSetGetConfig: 1})).config;
+ var configB = assert.commandWorked(primaryB.adminCommand({replSetGetConfig: 1})).config;
+ assert(configA.settings.replicaSetId instanceof ObjectId);
+ assert(configB.settings.replicaSetId instanceof ObjectId);
+ jsTestLog('Replica set A ID = ' + configA.settings.replicaSetId);
+ jsTestLog('Replica set B ID = ' + configB.settings.replicaSetId);
+ assert.neq(configA.settings.replicaSetId, configB.settings.replicaSetId);
+
+ jsTestLog("Adding replica set B's primary " + primaryB.host + " to replica set A's config");
+ configA.version++;
+ configA.members.push({_id: 11, host: primaryB.host});
+ var reconfigResult = assert.commandFailedWithCode(
+ primaryA.adminCommand({replSetReconfig: configA}),
+ ErrorCodes.NewReplicaSetConfigurationIncompatible);
+ var msgA =
+ 'Our replica set ID of ' + configA.settings.replicaSetId + ' did not match that of ' +
+ primaryB.host + ', which is ' + configB.settings.replicaSetId;
+ assert.neq(-1, reconfigResult.errmsg.indexOf(msgA));
+
+ var newPrimaryA = replSetA.getPrimary();
+ var newPrimaryB = replSetB.getPrimary();
+ jsTestLog('After merging: primary A = ' + newPrimaryA.host + '; primary B = ' +
+ newPrimaryB.host);
+ assert.eq(primaryA, newPrimaryA);
+ assert.eq(primaryB, newPrimaryB);
+
+ // Mismatch replica set IDs in heartbeat responses should be logged.
+ var checkLog = function(node, msg) {
+ assert.soon(function() {
+ var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
+ for (var i = 0; i < logMessages.length; i++) {
+ if (logMessages[i].indexOf(msg) != -1) {
+ return true;
+ }
+ }
+ return false;
+ }, 'Did not see a log entry containing the following message: ' + msg, 10000, 1000);
+ };
+ var msgB =
+ "replica set IDs do not match, ours: " + configB.settings.replicaSetId +
+ "; remote node's: " + configA.settings.replicaSetId;
+ checkLog(primaryB, msgB);
+
+ var statusA = assert.commandWorked(primaryA.adminCommand({replSetGetStatus: 1}));
+ var statusB = assert.commandWorked(primaryB.adminCommand({replSetGetStatus: 1}));
+ jsTestLog('After merging: replica set status A = ' + tojson(statusA));
+ jsTestLog('After merging: replica set status B = ' + tojson(statusB));
+
+ // Replica set A's config should remain unchanged due to failed replSetReconfig command.
+ assert.eq(1, statusA.members.length);
+ assert.eq(10, statusA.members[0]._id);
+ assert.eq(primaryA.host, statusA.members[0].name);
+ assert.eq(ReplSetTest.State.PRIMARY, statusA.members[0].state);
+
+ // Replica set B's config should remain unchanged.
+ assert.eq(1, statusB.members.length);
+ assert.eq(20, statusB.members[0]._id);
+ assert.eq(primaryB.host, statusB.members[0].name);
+ assert.eq(ReplSetTest.State.PRIMARY, statusB.members[0].state);
+
+ replSetB.stopSet();
+ replSetA.stopSet();
+})();
diff --git a/jstests/replsets/disallow_adding_initialized_node2.js b/jstests/replsets/disallow_adding_initialized_node2.js
new file mode 100644
index 00000000000..2ed6522e81e
--- /dev/null
+++ b/jstests/replsets/disallow_adding_initialized_node2.js
@@ -0,0 +1,99 @@
+// If a node is already in an active replica set, it is not possible to add this node to another
+// replica set.
+// Initialize two replica sets A and B with the same name: A_0, A_1; B_0
+// Stop B_0.
+// Add B_0 to the replica set A.
+// Start B_0.
+// B_0 should show up in A's replica set status as DOWN.
+(function () {
+ 'use strict';
+
+ var name = 'disallow_adding_initialized_node2';
+ var replSetA = new ReplSetTest({name: name, nodes: [
+ {rsConfig: {_id: 10}},
+ {rsConfig: {_id: 11, arbiterOnly: true}},
+ ]});
+ replSetA.startSet({dbpath : "$set-A-$node"});
+ replSetA.initiate();
+
+ var replSetB = new ReplSetTest({name: name, nodes: [
+ {rsConfig: {_id: 20}},
+ ]});
+ replSetB.startSet({dbpath : "$set-B-$node"});
+ replSetB.initiate();
+
+ var primaryA = replSetA.getPrimary();
+ var primaryB = replSetB.getPrimary();
+ jsTestLog('Before merging: primary A = ' + primaryA.host + '; primary B = ' + primaryB.host);
+
+ var configA = assert.commandWorked(primaryA.adminCommand({replSetGetConfig: 1})).config;
+ var configB = assert.commandWorked(primaryB.adminCommand({replSetGetConfig: 1})).config;
+ assert(configA.settings.replicaSetId instanceof ObjectId);
+ assert(configB.settings.replicaSetId instanceof ObjectId);
+ jsTestLog('Replica set A ID = ' + configA.settings.replicaSetId);
+ jsTestLog('Replica set B ID = ' + configB.settings.replicaSetId);
+ assert.neq(configA.settings.replicaSetId, configB.settings.replicaSetId);
+
+ jsTestLog("Stopping B's primary " + primaryB.host);
+ replSetB.stop(0);
+
+ jsTestLog("Adding replica set B's primary " + primaryB.host + " to replica set A's config");
+ configA.version++;
+ configA.members.push({_id: 12, host: primaryB.host});
+ assert.commandWorked(primaryA.adminCommand({replSetReconfig: configA}));
+
+ jsTestLog("Restarting B's primary " + primaryB.host);
+ primaryB = replSetB.start(0, {dbpath : "$set-B-$node", restart: true});
+
+ var newPrimaryA = replSetA.getPrimary();
+ var newPrimaryB = replSetB.getPrimary();
+ jsTestLog('After merging: primary A = ' + newPrimaryA.host + '; primary B = ' +
+ newPrimaryB.host);
+ assert.eq(primaryA, newPrimaryA);
+ assert.eq(primaryB, newPrimaryB);
+
+ // Mismatch replica set IDs in heartbeat responses should be logged.
+ var checkLog = function(node, msg) {
+ assert.soon(function() {
+ var logMessages = assert.commandWorked(node.adminCommand({getLog: 'global'})).log;
+ for (var i = 0; i < logMessages.length; i++) {
+ if (logMessages[i].indexOf(msg) != -1) {
+ return true;
+ }
+ }
+ return false;
+ }, 'Did not see a log entry containing the following message: ' + msg, 10000, 1000);
+ };
+ var msgA =
+ "replica set IDs do not match, ours: " + configA.settings.replicaSetId +
+ "; remote node's: " + configB.settings.replicaSetId;
+ var msgB =
+ "replica set IDs do not match, ours: " + configB.settings.replicaSetId +
+ "; remote node's: " + configA.settings.replicaSetId;
+ checkLog(primaryA, msgA);
+ checkLog(primaryB, msgB);
+
+ var statusA = assert.commandWorked(primaryA.adminCommand({replSetGetStatus: 1}));
+ var statusB = assert.commandWorked(primaryB.adminCommand({replSetGetStatus: 1}));
+ jsTestLog('After merging: replica set status A = ' + tojson(statusA));
+ jsTestLog('After merging: replica set status B = ' + tojson(statusB));
+
+ // B's primary should show up in A's status as DOWN.
+ assert.eq(3, statusA.members.length);
+ assert.eq(10, statusA.members[0]._id);
+ assert.eq(primaryA.host, statusA.members[0].name);
+ assert.eq(ReplSetTest.State.PRIMARY, statusA.members[0].state);
+ assert.eq(12, statusA.members[2]._id);
+ assert.eq(primaryB.host, statusA.members[2].name);
+ assert.eq(ReplSetTest.State.DOWN, statusA.members[2].state);
+ assert.neq(-1, statusA.members[2].lastHeartbeatMessage.indexOf(msgA));
+
+ // Replica set B's config should remain unchanged.
+ assert.eq(1, statusB.members.length);
+ assert.eq(20, statusB.members[0]._id);
+ assert.eq(primaryB.host, statusB.members[0].name);
+ assert.eq(ReplSetTest.State.PRIMARY, statusB.members[0].state);
+
+ replSetB.stopSet();
+ replSetA.stopSet();
+})();
diff --git a/src/mongo/db/repl/check_quorum_for_config_change.cpp b/src/mongo/db/repl/check_quorum_for_config_change.cpp
index 4d944c9101d..ae1a1f9c7fa 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change.cpp
@@ -39,6 +39,7 @@
#include "mongo/db/repl/replica_set_config.h"
#include "mongo/db/repl/scatter_gather_algorithm.h"
#include "mongo/db/repl/scatter_gather_runner.h"
+#include "mongo/rpc/metadata/repl_set_metadata.h"
#include "mongo/util/log.h"
#include "mongo/util/mongoutils/str.h"
@@ -101,6 +102,7 @@ std::vector<RemoteCommandRequest> QuorumChecker::getRequests() const {
requests.push_back(RemoteCommandRequest(_rsConfig->getMemberAt(i).getHostAndPort(),
"admin",
hbRequest,
+ BSON(rpc::kReplSetMetadataFieldName << 1),
_rsConfig->getHeartbeatTimeoutPeriodMillis()));
}
@@ -214,6 +216,20 @@ void QuorumChecker::_tabulateHeartbeatResponse(const RemoteCommandRequest& reque
}
}
+ if (_rsConfig->hasReplicaSetId()) {
+ StatusWith<rpc::ReplSetMetadata> replMetadata =
+ rpc::ReplSetMetadata::readFromMetadata(response.getValue().metadata);
+ if (replMetadata.isOK() && replMetadata.getValue().getReplicaSetId().isSet() &&
+ _rsConfig->getReplicaSetId() != replMetadata.getValue().getReplicaSetId()) {
+ std::string message = str::stream()
+ << "Our replica set ID of " << _rsConfig->getReplicaSetId()
+ << " did not match that of " << request.target.toString() << ", which is "
+ << replMetadata.getValue().getReplicaSetId();
+ _vetoStatus = Status(ErrorCodes::NewReplicaSetConfigurationIncompatible, message);
+ warning() << message;
+ }
+ }
+
const bool isInitialConfig = _rsConfig->getConfigVersion() == 1;
if (isInitialConfig && hbResp.hasData()) {
std::string message = str::stream() << "'" << request.target.toString()
diff --git a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
index 88904a9ff02..493cbbe2380 100644
--- a/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
+++ b/src/mongo/db/repl/check_quorum_for_config_change_test.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/repl/storage_interface_mock.h"
#include "mongo/executor/network_interface_mock.h"
#include "mongo/platform/unordered_set.h"
+#include "mongo/rpc/metadata/repl_set_metadata.h"
#include "mongo/stdx/functional.h"
#include "mongo/stdx/thread.h"
#include "mongo/unittest/unittest.h"
@@ -384,6 +385,81 @@ TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSetNameMismatch) {
ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
}
+TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToSetIdMismatch) {
+ // In this test, "we" are host "h3:1". All nodes respond
+ // successfully to their heartbeat requests, but quorum check fails because
+ // "h4" declares that the requested replica set ID was not what it expected.
+
+ const auto replicaSetId = OID::gen();
+ const ReplicaSetConfig rsConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1:1")
+ << BSON("_id" << 2 << "host"
+ << "h2:1") << BSON("_id" << 3 << "host"
+ << "h3:1")
+ << BSON("_id" << 4 << "host"
+ << "h4:1") << BSON("_id" << 5 << "host"
+ << "h5:1"))
+ << "settings" << BSON("replicaSetId" << replicaSetId)));
+ const int myConfigIndex = 2;
+ const BSONObj hbRequest = makeHeartbeatRequest(rsConfig, myConfigIndex);
+
+ startQuorumCheck(rsConfig, myConfigIndex);
+ const Date_t startDate = _net->now();
+ const int numCommandsExpected = rsConfig.getNumMembers() - 1;
+ unordered_set<HostAndPort> seenHosts;
+ _net->enterNetwork();
+ HostAndPort incompatibleHost("h4", 1);
+ OID unexpectedId = OID::gen();
+ for (int i = 0; i < numCommandsExpected; ++i) {
+ const NetworkInterfaceMock::NetworkOperationIterator noi = _net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ ASSERT_EQUALS("admin", request.dbname);
+ ASSERT_EQUALS(hbRequest, request.cmdObj);
+ ASSERT_EQUALS(BSON(rpc::kReplSetMetadataFieldName << 1), request.metadata);
+ ASSERT(seenHosts.insert(request.target).second) << "Already saw "
+ << request.target.toString();
+ if (request.target == incompatibleHost) {
+ OpTime opTime{Timestamp{10, 10}, 10};
+ rpc::ReplSetMetadata metadata(opTime.getTerm(),
+ opTime,
+ opTime,
+ rsConfig.getConfigVersion(),
+ unexpectedId,
+ rpc::ReplSetMetadata::kNoPrimary,
+ -1);
+ BSONObjBuilder metadataBuilder;
+ metadata.writeToMetadata(&metadataBuilder);
+
+ _net->scheduleResponse(noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(
+ BSON("ok" << 1), metadataBuilder.obj(), Milliseconds(8))));
+ } else {
+ _net->scheduleResponse(
+ noi,
+ startDate + Milliseconds(10),
+ ResponseStatus(RemoteCommandResponse(BSON("ok" << 1), BSONObj(), Milliseconds(8))));
+ }
+ }
+ _net->runUntil(startDate + Milliseconds(10));
+ _net->exitNetwork();
+ Status status = waitForQuorumCheck();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_REASON_CONTAINS(status,
+ str::stream() << "Our replica set ID of " << replicaSetId
+ << " did not match that of " << incompatibleHost.toString()
+ << ", which is " << unexpectedId);
+ ASSERT_NOT_REASON_CONTAINS(status, "h1:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h2:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h3:1");
+ ASSERT_REASON_CONTAINS(status, "h4:1");
+ ASSERT_NOT_REASON_CONTAINS(status, "h5:1");
+}
+
TEST_F(CheckQuorumForInitiate, QuorumCheckFailedDueToInitializedNode) {
// In this test, "we" are host "h3:1". All nodes respond
// successfully to their heartbeat requests, but quorum check fails because
diff --git a/src/mongo/db/repl/replica_set_config_checks.cpp b/src/mongo/db/repl/replica_set_config_checks.cpp
index a382353fc27..6539bd08e3f 100644
--- a/src/mongo/db/repl/replica_set_config_checks.cpp
+++ b/src/mongo/db/repl/replica_set_config_checks.cpp
@@ -150,6 +150,13 @@ Status validateOldAndNewConfigsCompatible(const ReplicaSetConfig& oldConfig,
<< newConfig.getReplSetName());
}
+ if (oldConfig.getReplicaSetId() != newConfig.getReplicaSetId()) {
+ return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ str::stream() << "New and old configurations differ in replica set ID; "
+ "old was " << oldConfig.getReplicaSetId() << ", and new is "
+ << newConfig.getReplicaSetId());
+ }
+
if (oldConfig.isConfigServer() && !newConfig.isConfigServer()) {
return Status(ErrorCodes::NewReplicaSetConfigurationIncompatible,
str::stream() << "Cannot remove \""
@@ -157,7 +164,6 @@ Status validateOldAndNewConfigsCompatible(const ReplicaSetConfig& oldConfig,
<< "\" from replica set configuration on reconfig");
}
-
//
// For every member config mNew in newConfig, if there exists member config mOld
// in oldConfig such that mNew.getHostAndPort() == mOld.getHostAndPort(), it is required
diff --git a/src/mongo/db/repl/replica_set_config_checks_test.cpp b/src/mongo/db/repl/replica_set_config_checks_test.cpp
index e88f69f34f6..ab43959d37d 100644
--- a/src/mongo/db/repl/replica_set_config_checks_test.cpp
+++ b/src/mongo/db/repl/replica_set_config_checks_test.cpp
@@ -193,6 +193,49 @@ TEST(ValidateConfigForReconfig, NewConfigMustNotChangeSetName) {
validateConfigForReconfig(&externalState, newConfig, oldConfig, true).getStatus());
}
+TEST(ValidateConfigForReconfig, NewConfigMustNotChangeSetId) {
+ ReplicationCoordinatorExternalStateMock externalState;
+ externalState.addSelf(HostAndPort("h1"));
+
+ ReplicaSetConfig oldConfig;
+ ReplicaSetConfig newConfig;
+
+ // Two configurations, compatible except for set ID.
+ ASSERT_OK(
+ oldConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3"))
+ << "settings" << BSON("replicaSetId" << OID::gen()))));
+
+ ASSERT_OK(
+ newConfig.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "h1")
+ << BSON("_id" << 2 << "host"
+ << "h2") << BSON("_id" << 3 << "host"
+ << "h3"))
+ << "settings" << BSON("replicaSetId" << OID::gen()))));
+
+ ASSERT_OK(oldConfig.validate());
+ ASSERT_OK(newConfig.validate());
+ const auto status =
+ validateConfigForReconfig(&externalState, oldConfig, newConfig, false).getStatus();
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible, status);
+ ASSERT_STRING_CONTAINS(status.reason(), "New and old configurations differ in replica set ID");
+
+ // Forced reconfigs also do not allow this.
+ ASSERT_EQUALS(
+ ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ validateConfigForReconfig(&externalState, newConfig, oldConfig, true).getStatus());
+}
+
TEST(ValidateConfigForReconfig, NewConfigMustNotFlipBuildIndexesFlag) {
ReplicationCoordinatorExternalStateMock externalState;
externalState.addSelf(HostAndPort("h1"));
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index ab0f3c8336a..76be96c4b3a 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -2241,7 +2241,8 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* txn,
if (args.force) {
newConfigObj = incrementConfigVersionByRandom(newConfigObj);
}
- Status status = newConfig.initialize(newConfigObj, oldConfig.getProtocolVersion() == 1);
+ Status status = newConfig.initialize(
+ newConfigObj, oldConfig.getProtocolVersion() == 1, oldConfig.getReplicaSetId());
if (!status.isOK()) {
error() << "replSetReconfig got " << status << " while parsing " << newConfigObj;
return Status(ErrorCodes::InvalidReplicaSetConfig, status.reason());
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 600c16e8b0e..fc2cbcde928 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -131,6 +131,24 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
responseStatus = hbResponse.initialize(resp, _topCoord->getTerm());
StatusWith<rpc::ReplSetMetadata> replMetadata =
rpc::ReplSetMetadata::readFromMetadata(cbData.response.getValue().metadata);
+
+ // Reject heartbeat responses (and metadata) from nodes with mismatched replica set IDs.
+ // It is problematic to perform this check in the heartbeat reconfiguring logic because it
+ // is possible for two mismatched replica sets to have the same replica set name and
+ // configuration version. A heartbeat reconfiguration would not take place in that case.
+ // Additionally, this is where we would stop further processing of the metadata from an
+ // unknown replica set.
+ if (replMetadata.isOK() && _rsConfig.isInitialized() && _rsConfig.hasReplicaSetId() &&
+ replMetadata.getValue().getReplicaSetId().isSet() &&
+ _rsConfig.getReplicaSetId() != replMetadata.getValue().getReplicaSetId()) {
+ responseStatus =
+ Status(ErrorCodes::InvalidReplicaSetConfig,
+ str::stream()
+ << "replica set IDs do not match, ours: " << _rsConfig.getReplicaSetId()
+ << "; remote node's: " << replMetadata.getValue().getReplicaSetId());
+ // Ignore metadata.
+ replMetadata = responseStatus;
+ }
if (replMetadata.isOK()) {
// Asynchronous stepdown could happen, but it will be queued in executor after
// this function, so we cannot and don't need to wait for it to finish.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index 8b064ada513..615e5d1f791 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -361,6 +361,68 @@ TEST_F(ReplCoordHBV1Test, ArbiterRecordsCommittedOpTimeFromHeartbeatMetadata) {
OpTime olderOpTime{Timestamp{2, 2}, 9};
test(olderOpTime, committedOpTime);
}
+
+TEST_F(ReplCoordHBV1Test, IgnoreTheContentsOfMetadataWhenItsReplicaSetIdDoesNotMatchOurs) {
+ // Tests that a secondary node will not update its committed optime from the heartbeat metadata
+ // if the replica set ID is inconsistent with the existing configuration.
+ HostAndPort host2("node2:12345");
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host" << host2.toString()))
+ << "settings" << BSON("replicaSetId" << OID::gen()) << "protocolVersion"
+ << 1),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ auto rsConfig = getReplCoord()->getConfig();
+
+ // process heartbeat
+ enterNetwork();
+ auto net = getNet();
+ const NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
+ const RemoteCommandRequest& request = noi->getRequest();
+ log() << request.target.toString() << " processing " << request.cmdObj;
+ ASSERT_EQUALS(host2, request.target);
+
+ ReplSetHeartbeatResponse hbResp;
+ hbResp.setSetName(rsConfig.getReplSetName());
+ hbResp.setState(MemberState::RS_PRIMARY);
+ hbResp.setConfigVersion(rsConfig.getConfigVersion());
+
+ BSONObjBuilder responseBuilder;
+ responseBuilder << "ok" << 1;
+ hbResp.addToBSON(&responseBuilder, true);
+
+ OID unexpectedId = OID::gen();
+ OpTime opTime{Timestamp{10, 10}, 10};
+ rpc::ReplSetMetadata metadata(
+ opTime.getTerm(), opTime, opTime, rsConfig.getConfigVersion(), unexpectedId, 1, -1);
+ BSONObjBuilder metadataBuilder;
+ metadata.writeToMetadata(&metadataBuilder);
+
+ net->scheduleResponse(
+ noi, net->now(), makeResponseStatus(responseBuilder.obj(), metadataBuilder.obj()));
+
+ startCapturingLogMessages();
+ net->runReadyNetworkOperations();
+ stopCapturingLogMessages();
+
+ exitNetwork();
+
+ ASSERT_NOT_EQUALS(opTime, getReplCoord()->getLastCommittedOpTime());
+ ASSERT_NOT_EQUALS(opTime.getTerm(), getTopoCoord().getTerm());
+
+ ASSERT_EQUALS(1,
+ countLogLinesContaining(
+ str::stream()
+ << "Error in heartbeat request to node2:12345; InvalidReplicaSetConfig: "
+ "replica set IDs do not match, ours: " << rsConfig.getReplicaSetId()
+ << "; remote node's: " << unexpectedId));
+}
+
} // namespace
} // namespace repl
} // namespace mongo
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 444bf83d5ac..5c97e3bc976 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -157,6 +157,40 @@ TEST_F(ReplCoordTest, NodeReturnsInvalidReplicaSetConfigWhenReconfigReceivedWith
ASSERT_TRUE(result.obj().isEmpty());
}
+TEST_F(ReplCoordTest, NodeReturnsInvalidReplicaSetConfigWhenReconfigReceivedWithIncorrectSetId) {
+ // start up, become primary, receive config with incorrect replset name
+ OperationContextNoop txn;
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")) << "settings"
+ << BSON("replicaSetId" << OID::gen())),
+ HostAndPort("node1", 12345));
+ ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));
+ getReplCoord()->setMyLastDurableOpTime(OpTime(Timestamp(100, 0), 0));
+ simulateSuccessfulV1Election();
+
+ BSONObjBuilder result;
+ ReplSetReconfigArgs args;
+ args.force = false;
+ args.newConfigObj = BSON("_id"
+ << "mySet"
+ << "version" << 3 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")) << "settings"
+ << BSON("replicaSetId" << OID::gen()));
+
+ ASSERT_EQUALS(ErrorCodes::NewReplicaSetConfigurationIncompatible,
+ getReplCoord()->processReplSetReconfig(&txn, args, &result));
+ ASSERT_TRUE(result.obj().isEmpty());
+}
+
TEST_F(ReplCoordTest,
NodeReturnsNewReplicaSetConfigurationIncompatibleWhenANewConfigFailsToValidate) {
// start up, become primary, validate fails
@@ -210,6 +244,7 @@ void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) {
BSONObjBuilder garbage;
ReplSetReconfigArgs args;
args.force = false;
+ // Replica set id will be copied from existing configuration.
args.newConfigObj = BSON("_id"
<< "mySet"
<< "version" << 3 << "members"
@@ -377,7 +412,8 @@ TEST_F(ReplCoordTest, PrimaryNodeAcceptsNewConfigWhenReceivingAReconfigWithAComp
<< BSON_ARRAY(BSON("_id" << 1 << "host"
<< "node1:12345")
<< BSON("_id" << 2 << "host"
- << "node2:12345"))),
+ << "node2:12345")) << "settings"
+ << BSON("replicaSetId" << OID::gen())),
HostAndPort("node1", 12345));
ASSERT(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
getReplCoord()->setMyLastAppliedOpTime(OpTime(Timestamp(100, 0), 0));