From 8009cf96fb0ee09ad0b6c8287afb188d35021f95 Mon Sep 17 00:00:00 2001 From: William Schultz Date: Tue, 14 Jan 2020 20:53:42 +0000 Subject: SERVER-45079 Only allow addition or removal of a single voting node in non-force reconfig --- .../resmokelib/testing/fixtures/replicaset.py | 3 +- jstests/replsets/libs/rename_across_dbs.js | 3 +- jstests/replsets/libs/tags.js | 3 +- jstests/replsets/replset5.js | 9 +- ...primary_config_server_blackholed_from_mongos.js | 3 +- src/mongo/db/repl/repl_set_config.h | 13 ++ src/mongo/db/repl/repl_set_config_checks.cpp | 54 ++++++++ src/mongo/db/repl/repl_set_config_checks_test.cpp | 138 ++++++++++++++++++++- .../db/repl/replication_coordinator_impl_test.cpp | 7 +- src/mongo/shell/replsettest.js | 10 +- 10 files changed, 224 insertions(+), 19 deletions(-) diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py index 1f02b770fd6..b1010f80483 100644 --- a/buildscripts/resmokelib/testing/fixtures/replicaset.py +++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py @@ -217,7 +217,8 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst repl_config["version"] = 2 repl_config["members"] = members self.logger.info("Issuing replSetReconfig command: %s", repl_config) - self._configure_repl_set(client, {"replSetReconfig": repl_config}) + # Temporarily use 'force: true' to allow multi-node reconfig. + self._configure_repl_set(client, {"replSetReconfig": repl_config, "force": True}) self._await_secondaries() def pids(self): diff --git a/jstests/replsets/libs/rename_across_dbs.js b/jstests/replsets/libs/rename_across_dbs.js index d32d6a11627..ba9584c83aa 100644 --- a/jstests/replsets/libs/rename_across_dbs.js +++ b/jstests/replsets/libs/rename_across_dbs.js @@ -85,7 +85,8 @@ var RenameAcrossDatabasesTest = function(options) { version: nextVersion, }; - reconfig(replTest, replSetConfig); + const force = true; // TODO (SERVER-45575): Update this to be a non-force reconfig. + reconfig(replTest, replSetConfig, force); replTest.waitForState(replTest.nodes[0], ReplSetTest.State.PRIMARY); replTest.awaitReplication(); diff --git a/jstests/replsets/libs/tags.js b/jstests/replsets/libs/tags.js index e5861ee0bad..d51683b2610 100644 --- a/jstests/replsets/libs/tags.js +++ b/jstests/replsets/libs/tags.js @@ -133,7 +133,8 @@ var TagsTest = function(options) { version: nextVersion, }; - reconfig(replTest, replSetConfig); + const force = true; // TODO (SERVER-45575): Update this to be a non-force reconfig. + reconfig(replTest, replSetConfig, force); assert.soonNoExcept(() => replTest.nodes[2].adminCommand({replSetStepUp: 1}).ok); replTest.waitForState(replTest.nodes[2], ReplSetTest.State.PRIMARY); diff --git a/jstests/replsets/replset5.js b/jstests/replsets/replset5.js index e714e034b87..477ced5307f 100644 --- a/jstests/replsets/replset5.js +++ b/jstests/replsets/replset5.js @@ -7,9 +7,11 @@ load("jstests/replsets/rslib.js"); var replTest = new ReplSetTest({name: 'testSet', nodes: 3}); var nodes = replTest.startSet(); +replTest.initiate(); -// Initiate set with default for write concern -var config = replTest.getReplSetConfig(); +// Set default for write concern +var config = replTest.getReplSetConfigFromNode(); +config.version++; config.settings = {}; config.settings.getLastErrorDefaults = { 'w': 3, @@ -18,8 +20,7 @@ config.settings.getLastErrorDefaults = { config.settings.heartbeatTimeoutSecs = 15; // Prevent node 2 from becoming primary, as we will attempt to set it to hidden later. config.members[2].priority = 0; - -replTest.initiate(config); +reconfig(replTest, config); // var master = replTest.getPrimary(); diff --git a/jstests/sharding/primary_config_server_blackholed_from_mongos.js b/jstests/sharding/primary_config_server_blackholed_from_mongos.js index 06afeb2abfc..0424a307621 100644 --- a/jstests/sharding/primary_config_server_blackholed_from_mongos.js +++ b/jstests/sharding/primary_config_server_blackholed_from_mongos.js @@ -36,7 +36,8 @@ for (let i = 0; i < conf.members.length; i++) { } } conf.version++; -const response = admin.runCommand({replSetReconfig: conf}); +// TODO (SERVER-45575): Update this to be a non-force reconfig. +const response = admin.runCommand({replSetReconfig: conf, force: true}); assert.commandWorked(response); jsTest.log('Partitioning the config server primary from the mongos'); diff --git a/src/mongo/db/repl/repl_set_config.h b/src/mongo/db/repl/repl_set_config.h index 969a8689c86..4bf779fe76f 100644 --- a/src/mongo/db/repl/repl_set_config.h +++ b/src/mongo/db/repl/repl_set_config.h @@ -175,6 +175,19 @@ public: return _members; } + /** + * Returns all voting members in this ReplSetConfig. + */ + std::vector votingMembers() const { + std::vector votingMembers; + for (const MemberConfig& m : _members) { + if (m.getNumVotes() > 0) { + votingMembers.push_back(m); + } + } + return votingMembers; + }; + /** * Access a MemberConfig element by index. */ diff --git a/src/mongo/db/repl/repl_set_config_checks.cpp b/src/mongo/db/repl/repl_set_config_checks.cpp index fde701362ca..e25f6d7f2e2 100644 --- a/src/mongo/db/repl/repl_set_config_checks.cpp +++ b/src/mongo/db/repl/repl_set_config_checks.cpp @@ -31,6 +31,7 @@ #include "mongo/db/repl/repl_set_config_checks.h" +#include #include #include "mongo/db/repl/repl_set_config.h" @@ -136,6 +137,50 @@ Status validateArbiterPriorities(const ReplSetConfig& config) { return Status::OK(); } +/** + * Compares two initialized and validated replica set configurations and checks to see if the + * transition from 'oldConfig' to 'newConfig' adds or removes at most 1 voting node. + * + * Assumes that the member id uniquely identifies a logical replica set node. We use the set of + * member ids in the old and new config to determine the safety of the single node change. + */ +Status validateSingleNodeChange(const ReplSetConfig& oldConfig, const ReplSetConfig& newConfig) { + // Add MemberId of voting nodes from each config into respective sets. + std::set oldIdSet, newIdSet; + for (MemberConfig m : oldConfig.votingMembers()) { + oldIdSet.insert(m.getId()); + } + for (MemberConfig m : newConfig.votingMembers()) { + newIdSet.insert(m.getId()); + } + + // + // The symmetric difference between the id sets of each config is the set of ids that are + // present in either set but not in their intersection. A set X can be transformed into set Y + // with 1 addition or removal operation if and only if their symmetric difference is equal to 1. + // If the symmetric difference is 1, there are two possibilities: + // + // (1) There is exactly 1 element e in Y that does not appear in X. In this case we can + // transform X to Y by adding the element e to X. + // + // (2) There is exactly 1 element in X that does not appear in Y. In this case we can transform + // X to Y by removing the element e from X. + // + + // The symmetric difference can't be larger than the union of both sets. + std::vector symmDiff(oldIdSet.size() + newIdSet.size()); + auto diffEndIt = std::set_symmetric_difference( + oldIdSet.begin(), oldIdSet.end(), newIdSet.begin(), newIdSet.end(), symmDiff.begin()); + auto symmDiffSize = std::distance(symmDiff.begin(), diffEndIt); + + if (symmDiffSize > 1) { + return Status(ErrorCodes::InvalidReplicaSetConfig, + str::stream() << "Non force replica set reconfig can only add or remove at " + "most 1 voting member."); + } + return Status::OK(); +} + /** * Compares two initialized and validated replica set configurations, and checks to * see if "newConfig" is a legal successor configuration to "oldConfig". @@ -309,6 +354,15 @@ StatusWith validateConfigForReconfig(ReplicationCoordinatorExternalState* e return StatusWith(status); } + // For non-force reconfigs, verify that the reconfig only adds or removes a single node. This + // ensures that all quorums of the new config overlap with all quorums of the old config. + if (!force) { + status = validateSingleNodeChange(oldConfig, newConfig); + if (!status.isOK()) { + return StatusWith(status); + } + } + status = validateArbiterPriorities(newConfig); if (!status.isOK()) { return StatusWith(status); diff --git a/src/mongo/db/repl/repl_set_config_checks_test.cpp b/src/mongo/db/repl/repl_set_config_checks_test.cpp index d360dff5e5c..de48c7c9f79 100644 --- a/src/mongo/db/repl/repl_set_config_checks_test.cpp +++ b/src/mongo/db/repl/repl_set_config_checks_test.cpp @@ -514,12 +514,14 @@ TEST_F(ServiceContextTest, ValidateConfigForReconfig_HostAndIdRemappingRestricte << BSON("_id" << 3 << "host" << "h3"))))); ASSERT_OK(legalNewConfigWithNewHostAndId.validate()); - ASSERT_OK(validateConfigForReconfig(&externalState, - oldConfig, - legalNewConfigWithNewHostAndId, - getGlobalServiceContext(), - false) - .getStatus()); + ASSERT_OK( + validateConfigForReconfig(&externalState, + oldConfig, + legalNewConfigWithNewHostAndId, + getGlobalServiceContext(), + // Use 'force' since we're adding and removing a node atomically. + true) + .getStatus()); // // Here, the new config is invalid because we've reused host name "h2" with @@ -1023,6 +1025,130 @@ TEST_F(ServiceContextTest, ValidateForReconfig_ForceStillNeedsSelfPresent) { .getStatus()); } +// +// Reusable member object definitions for reconfig tests below. +// +BSONObj m1 = BSON("_id" << 1 << "host" + << "h1"); +BSONObj m2 = BSON("_id" << 2 << "host" + << "h2"); +BSONObj m3 = BSON("_id" << 3 << "host" + << "h3"); +BSONObj m4 = BSON("_id" << 4 << "host" + << "h4"); +BSONObj m2_Arbiter = BSON("_id" << 2 << "host" + << "h2" + << "arbiterOnly" << true); +BSONObj m3_Arbiter = BSON("_id" << 3 << "host" + << "h3" + << "arbiterOnly" << true); +BSONObj m4_Arbiter = BSON("_id" << 4 << "host" + << "h4" + << "arbiterOnly" << true); +BSONObj m2_NonVoting = BSON("_id" << 2 << "host" + << "h2" + << "votes" << 0 << "priority" << 0); +BSONObj m3_NonVoting = BSON("_id" << 3 << "host" + << "h3" + << "votes" << 0 << "priority" << 0); +BSONObj m4_NonVoting = BSON("_id" << 4 << "host" + << "h4" + << "votes" << 0 << "priority" << 0); + +// Test helper to initialize config more concisely. +Status initializeConfig(ReplSetConfig& cfg, std::string id, int version, BSONArray members) { + return cfg.initialize(BSON("_id" << id << "version" << version << "protocolVersion" << 1 + << "members" << members)); +} + +// Validate reconfig between the two given member arrays and return the Status. +Status validateMemberReconfig(BSONArray oldMembers, BSONArray newMembers, BSONObj selfMember) { + // Initialize configs. + ReplSetConfig oldConfig, newConfig; + ASSERT_OK(initializeConfig(oldConfig, "rs0", 1, oldMembers)); + ASSERT_OK(initializeConfig(newConfig, "rs0", 2, newMembers)); + ReplicationCoordinatorExternalStateMock presentOnceExternalState; + presentOnceExternalState.addSelf(HostAndPort(selfMember.getStringField("host"))); + + // Do reconfig. + const bool force = false; + return validateConfigForReconfig( + &presentOnceExternalState, oldConfig, newConfig, getGlobalServiceContext(), force) + .getStatus(); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_SingleNodeAdditionAllowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2); + BSONArray newMembers = BSON_ARRAY(m1 << m2 << m3); // add 1 voting node. + ASSERT_OK(validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_SingleNodeRemovalAllowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2 << m3); + BSONArray newMembers = BSON_ARRAY(m1 << m2); // remove 1 voting node. + ASSERT_OK(validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_SimultaneousAddAndRemoveDisallowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2); + BSONArray newMembers = BSON_ARRAY(m1 << m3); // remove node 2, add node 3. + ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, + validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_MultiNodeAdditionDisallowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2); + BSONArray newMembers = BSON_ARRAY(m1 << m2 << m3 << m4); // add 2 voting nodes. + ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, + validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_MultiNodeRemovalDisallowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2 << m3 << m4); + BSONArray newMembers = BSON_ARRAY(m1 << m2); // remove 2 voting nodes. + ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, + validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_MultiNodeAdditionOfNonVotingNodesAllowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2); + BSONArray newMembers = + BSON_ARRAY(m1 << m2 << m3_NonVoting << m4_NonVoting); // add 2 non-voting nodes. + ASSERT_OK(validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_MultiNodeRemovalOfNonVotingNodesAllowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2 << m3_NonVoting << m4_NonVoting); + BSONArray newMembers = BSON_ARRAY(m1 << m2); // remove 2 non-voting nodes. + ASSERT_OK(validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_SimultaneousAddAndRemoveOfNonVotingNodesAllowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2_NonVoting); + BSONArray newMembers = BSON_ARRAY(m1 << m3_NonVoting); // Remove non-voter 2, add non-voter 3. + ASSERT_OK(validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_SingleNodeAdditionOfArbitersAllowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2); + BSONArray newMembers = BSON_ARRAY(m1 << m2 << m3_Arbiter); // add 1 arbiter. + ASSERT_OK(validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_SimultaneousAddAndRemoveOfArbitersDisallowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2_Arbiter); + BSONArray newMembers = BSON_ARRAY(m1 << m3_Arbiter); // remove node 2, add node 3. + ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, + validateMemberReconfig(oldMembers, newMembers, m1)); +} + +TEST_F(ServiceContextTest, ValidateForReconfig_MultiNodeAdditionOfArbitersDisallowed) { + BSONArray oldMembers = BSON_ARRAY(m1 << m2); + BSONArray newMembers = BSON_ARRAY(m1 << m2 << m3_Arbiter << m4_Arbiter); // add two arbiters. + ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, + validateMemberReconfig(oldMembers, newMembers, m1)); +} + } // namespace } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index ac428ddaca3..b49058cbcb8 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -3675,13 +3675,13 @@ TEST_F(ReplCoordTest, ASSERT_OK(getReplCoord()->awaitReplication(opCtx.get(), time2, writeConcern).status); } -void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status) { +void doReplSetReconfig(ReplicationCoordinatorImpl* replCoord, Status* status, bool force = false) { auto client = getGlobalServiceContext()->makeClient("rsr"); auto opCtx = client->makeOperationContext(); BSONObjBuilder garbage; ReplSetReconfigArgs args; - args.force = false; + args.force = force; args.newConfigObj = BSON("_id" << "mySet" << "version" << 3 << "protocolVersion" << 1 << "members" @@ -3961,7 +3961,8 @@ TEST_F(ReplCoordTest, // reconfig to three nodes Status status(ErrorCodes::InternalError, "Not Set"); - stdx::thread reconfigThread([&] { doReplSetReconfig(getReplCoord(), &status); }); + stdx::thread reconfigThread( + [&] { doReplSetReconfig(getReplCoord(), &status, true /* force */); }); replyToReceivedHeartbeatV1(); reconfigThread.join(); diff --git a/src/mongo/shell/replsettest.js b/src/mongo/shell/replsettest.js index ff2e713a695..9ba30379340 100644 --- a/src/mongo/shell/replsettest.js +++ b/src/mongo/shell/replsettest.js @@ -1232,9 +1232,15 @@ var ReplSetTest = function(opts) { config.configsvr = true; } - cmd = {replSetReconfig: config}; + // Add in nodes 1 at a time since non-force reconfig allows only single node + // addition/removal. print("Reconfiguring replica set to add in other nodes"); - replSetCommandWithRetry(master, cmd); + for (let i = 2; i <= originalMembers.length; i++) { + print("ReplSetTest adding in node " + i); + config.members = originalMembers.slice(0, i); + replSetCommandWithRetry(master, {replSetReconfig: config}); + config.version++; + } } // Setup authentication if running test with authentication -- cgit v1.2.1