summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorJudah Schvimer <judah@mongodb.com>2020-03-26 08:46:08 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-04-02 12:14:43 +0000
commitb040399238a9450ac1713c5cf7269145074d1fab (patch)
tree486b557532bc8d86fab3dc808c0d3e7494f43c56 /src/mongo/db
parentd2789d7e75be524212b8b6ab213577c69632fbfd (diff)
downloadmongo-b040399238a9450ac1713c5cf7269145074d1fab.tar.gz
SERVER-46345 Remove newlyAdded field on heartbeats that indicate a node left initial sync
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/repl/repl_set_config.cpp23
-rw-r--r--src/mongo/db/repl/repl_set_config.h14
-rw-r--r--src/mongo/db/repl/repl_set_config_test.cpp88
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp71
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h8
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp38
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp30
7 files changed, 247 insertions, 25 deletions
diff --git a/src/mongo/db/repl/repl_set_config.cpp b/src/mongo/db/repl/repl_set_config.cpp
index 67668019c81..dac354d5eb1 100644
--- a/src/mongo/db/repl/repl_set_config.cpp
+++ b/src/mongo/db/repl/repl_set_config.cpp
@@ -739,6 +739,15 @@ const MemberConfig* ReplSetConfig::findMemberByID(int id) const {
return nullptr;
}
+MemberConfig* ReplSetConfig::_findMemberByID(MemberId id) {
+ for (std::vector<MemberConfig>::iterator it = _members.begin(); it != _members.end(); ++it) {
+ if (it->getId() == id) {
+ return &(*it);
+ }
+ }
+ return nullptr;
+}
+
int ReplSetConfig::findMemberIndexByHostAndPort(const HostAndPort& hap) const {
int x = 0;
for (std::vector<MemberConfig>::const_iterator it = _members.begin(); it != _members.end();
@@ -994,12 +1003,22 @@ bool ReplSetConfig::containsArbiter() const {
return false;
}
-void ReplSetConfig::setNewlyAddedFieldForMemberAtIndex(int memberIndex, bool newlyAdded) {
- _members[memberIndex].setNewlyAdded(newlyAdded);
+void ReplSetConfig::addNewlyAddedFieldForMember(MemberId memberId) {
+ _findMemberByID(memberId)->setNewlyAdded(true);
// We must recalculate the majority, since nodes with the 'newlyAdded' field set
// should be treated as non-voting nodes.
_calculateMajorities();
+ _addInternalWriteConcernModes();
+}
+
+void ReplSetConfig::removeNewlyAddedFieldForMember(MemberId memberId) {
+ _findMemberByID(memberId)->setNewlyAdded(boost::none);
+
+ // We must recalculate the majority, since nodes with the 'newlyAdded' field removed
+ // should be treated as voting nodes.
+ _calculateMajorities();
+ _addInternalWriteConcernModes();
}
} // namespace repl
diff --git a/src/mongo/db/repl/repl_set_config.h b/src/mongo/db/repl/repl_set_config.h
index b3f500b5827..943f543cf59 100644
--- a/src/mongo/db/repl/repl_set_config.h
+++ b/src/mongo/db/repl/repl_set_config.h
@@ -490,9 +490,14 @@ public:
bool containsArbiter() const;
/**
- * Sets the 'newlyAdded' field of the MemberConfig at memberIndex to the value passed in.
+ * Adds 'newlyAdded=true' to the MemberConfig of the specified member.
*/
- void setNewlyAddedFieldForMemberAtIndex(int memberIndex, bool newlyAdded);
+ void addNewlyAddedFieldForMember(MemberId memberId);
+
+ /**
+ * Removes the 'newlyAdded' field from the MemberConfig of the specified member.
+ */
+ void removeNewlyAddedFieldForMember(MemberId memberId);
private:
/**
@@ -516,6 +521,11 @@ private:
void _initializeConnectionString();
/**
+ * Returns a pointer to a mutable MemberConfig.
+ */
+ MemberConfig* _findMemberByID(MemberId id);
+
+ /**
* Sets replica set ID to 'defaultReplicaSetId' if forInitiate is false and 'cfg' does not
* contain an ID.
* Sets _term to kInitialTerm for initiate.
diff --git a/src/mongo/db/repl/repl_set_config_test.cpp b/src/mongo/db/repl/repl_set_config_test.cpp
index db6cbecaabc..e088108c723 100644
--- a/src/mongo/db/repl/repl_set_config_test.cpp
+++ b/src/mongo/db/repl/repl_set_config_test.cpp
@@ -912,13 +912,84 @@ TEST(ReplSetConfig, SetNewlyAddedFieldForMemberConfig) {
<< "rs0"
<< "version" << 1 << "protocolVersion" << 1 << "members"
<< BSON_ARRAY(BSON("_id" << 1 << "host"
- << "localhost:12345")))));
+ << "n1:1")
+ << BSON("_id" << 2 << "host"
+ << "n2:1")))));
// The member should have its 'newlyAdded' field set to false by default.
ASSERT_FALSE(config.findMemberByID(1)->isNewlyAdded());
+ ASSERT_EQ(2, config.getTotalVotingMembers());
+ ASSERT_EQ(2, config.getMajorityVoteCount());
+ ASSERT_EQ(2, config.getWriteMajority());
+ ASSERT_EQ(2, config.getWritableVotingMembersCount());
+
+ {
+ auto modeSW = config.findCustomWriteMode("$majority");
+ ASSERT(modeSW.isOK());
+ auto modeIt = modeSW.getValue().constraintsBegin();
+ ASSERT_EQ(modeIt->getMinCount(), 2);
+ }
+
+ config.addNewlyAddedFieldForMember(MemberId(1));
+
+ ASSERT_TRUE(config.findMemberByID(1)->isNewlyAdded());
+ ASSERT_EQ(1, config.getTotalVotingMembers());
+ ASSERT_EQ(1, config.getMajorityVoteCount());
+ ASSERT_EQ(1, config.getWriteMajority());
+ ASSERT_EQ(1, config.getWritableVotingMembersCount());
+
+ {
+ auto modeSW = config.findCustomWriteMode("$majority");
+ ASSERT(modeSW.isOK());
+ auto modeIt = modeSW.getValue().constraintsBegin();
+ ASSERT_EQ(modeIt->getMinCount(), 1);
+ }
+}
+
+TEST(ReplSetConfig, RemoveNewlyAddedFieldForMemberConfig) {
+ // Set the flag to add the 'newlyAdded' field to MemberConfigs.
+ enableAutomaticReconfig = true;
+ // Set the flag back to false after this test exits.
+ ON_BLOCK_EXIT([] { enableAutomaticReconfig = false; });
+
+ ReplSetConfig config;
+ ASSERT_OK(config.initialize(BSON("_id"
+ << "rs0"
+ << "version" << 1 << "protocolVersion" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "n1:1"
+ << "newlyAdded" << true)
+ << BSON("_id" << 2 << "host"
+ << "n2:1")))));
+
- config.setNewlyAddedFieldForMemberAtIndex(0, true);
ASSERT_TRUE(config.findMemberByID(1)->isNewlyAdded());
+ ASSERT_EQ(1, config.getTotalVotingMembers());
+ ASSERT_EQ(1, config.getMajorityVoteCount());
+ ASSERT_EQ(1, config.getWriteMajority());
+ ASSERT_EQ(1, config.getWritableVotingMembersCount());
+
+ {
+ auto modeSW = config.findCustomWriteMode("$majority");
+ ASSERT(modeSW.isOK());
+ auto modeIt = modeSW.getValue().constraintsBegin();
+ ASSERT_EQ(modeIt->getMinCount(), 1);
+ }
+
+ config.removeNewlyAddedFieldForMember(MemberId(1));
+
+ ASSERT_FALSE(config.findMemberByID(1)->isNewlyAdded());
+ ASSERT_EQ(2, config.getTotalVotingMembers());
+ ASSERT_EQ(2, config.getMajorityVoteCount());
+ ASSERT_EQ(2, config.getWriteMajority());
+ ASSERT_EQ(2, config.getWritableVotingMembersCount());
+
+ {
+ auto modeSW = config.findCustomWriteMode("$majority");
+ ASSERT(modeSW.isOK());
+ auto modeIt = modeSW.getValue().constraintsBegin();
+ ASSERT_EQ(modeIt->getMinCount(), 2);
+ }
}
TEST(ReplSetConfig, ParsingNewlyAddedSetsFieldToTrueCorrectly) {
@@ -956,19 +1027,6 @@ TEST(ReplSetConfig, ParseFailsWithNewlyAddedSetToFalse) {
ASSERT_EQUALS(ErrorCodes::InvalidReplicaSetConfig, status);
}
-TEST(ReplSetConfig, CannotSetNewlyAddedFieldToFalseForMemberConfig) {
- ReplSetConfig config;
- ASSERT_OK(config.initialize(BSON("_id"
- << "rs0"
- << "version" << 1 << "protocolVersion" << 1 << "members"
- << BSON_ARRAY(BSON("_id" << 1 << "host"
- << "localhost:12345")))));
- // Cannot set 'newlyAdded' field to false.
- ASSERT_THROWS_CODE(config.setNewlyAddedFieldForMemberAtIndex(0, false),
- AssertionException,
- ErrorCodes::InvalidReplicaSetConfig);
-}
-
TEST(ReplSetConfig, NodeWithNewlyAddedFieldHasVotesZero) {
// Set the flag to add the 'newlyAdded' field to MemberConfigs.
enableAutomaticReconfig = true;
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index e78e9c645af..a13520f2b7c 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -3187,9 +3187,8 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt
if (newMem.isArbiter()) {
continue;
}
-
- const int newMemId = newMem.getId().getData();
- const auto oldMem = oldConfig.findMemberByID(newMemId);
+ const auto newMemId = newMem.getId();
+ const auto oldMem = oldConfig.findMemberByID(newMemId.getData());
const bool isNewVotingMember = (oldMem == nullptr && newMem.isVoter());
const bool isCurrentlyNewlyAdded =
@@ -3199,7 +3198,7 @@ Status ReplicationCoordinatorImpl::processReplSetReconfig(OperationContext* opCt
// 1) Is a new, voting node
// 2) Already has a 'newlyAdded' field in the old config
if (isNewVotingMember || isCurrentlyNewlyAdded) {
- newConfig.setNewlyAddedFieldForMemberAtIndex(i, true);
+ newConfig.addNewlyAddedFieldForMember(newMemId);
addedNewlyAddedField = true;
}
}
@@ -3507,6 +3506,70 @@ Status ReplicationCoordinatorImpl::awaitConfigCommitment(OperationContext* opCtx
}
+void ReplicationCoordinatorImpl::_reconfigToRemoveNewlyAddedField(
+ const executor::TaskExecutor::CallbackArgs& cbData,
+ MemberId memberId,
+ ConfigVersionAndTerm versionAndTerm) {
+ if (cbData.status == ErrorCodes::CallbackCanceled) {
+ LOGV2_DEBUG(4634502,
+ 2,
+ "Failed to remove 'newlyAdded' config field",
+ "memberId"_attr = memberId.getData(),
+ "error"_attr = cbData.status);
+ // We will retry on the next heartbeat.
+ return;
+ }
+
+ LOGV2(4634505,
+ "Beginning automatic reconfig to remove 'newlyAdded' config field",
+ "memberId"_attr = memberId.getData());
+
+ auto getNewConfig = [&](const repl::ReplSetConfig& oldConfig,
+ long long term) -> StatusWith<ReplSetConfig> {
+ // Even though memberIds should properly identify nodes across config changes, to be safe we
+ // only want to do an automatic reconfig where the base config is the one that specified
+ // this memberId.
+ if (oldConfig.getConfigVersionAndTerm() != versionAndTerm) {
+ return Status(ErrorCodes::StaleConfig,
+ str::stream()
+ << "Current config is no longer consistent with heartbeat "
+ "data. Current config version: "
+ << oldConfig.getConfigVersionAndTerm().toString()
+ << ", heartbeat data config version: " << versionAndTerm.toString());
+ }
+
+ auto newConfig = oldConfig;
+ newConfig.setConfigVersion(newConfig.getConfigVersion() + 1);
+
+ const auto hasNewlyAddedField =
+ oldConfig.findMemberByID(memberId.getData())->isNewlyAdded();
+ if (!hasNewlyAddedField) {
+ return Status(ErrorCodes::NoSuchKey, "Old config no longer has 'newlyAdded' field");
+ }
+
+ newConfig.removeNewlyAddedFieldForMember(memberId);
+ return newConfig;
+ };
+
+ auto opCtx = cc().makeOperationContext();
+ auto status = doReplSetReconfig(opCtx.get(), getNewConfig, false /* force */);
+
+ if (!status.isOK()) {
+ LOGV2_DEBUG(4634503,
+ 2,
+ "Failed to remove 'newlyAdded' config field",
+ "memberId"_attr = memberId.getData(),
+ "error"_attr = status);
+ // It is safe to do nothing here as we will retry this on the next heartbeat, or we may
+ // instead find out the reconfig already took place and is no longer necessary.
+ return;
+ }
+
+ // We intentionally do not wait for config commitment. If the config does not get committed, we
+ // will try again on the next heartbeat.
+ LOGV2(4634504, "Removed 'newlyAdded' config field", "memberId"_attr = memberId.getData());
+}
+
Status ReplicationCoordinatorImpl::processReplSetInitiate(OperationContext* opCtx,
const BSONObj& configObj,
BSONObjBuilder* resultObj) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index b08d49f4429..79038a99da7 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -1419,6 +1419,14 @@ private:
const std::string& dbName,
const BSONObj& cmdObj);
+ /**
+ * This is called by a primary when they become aware that a node has completed initial sync.
+ * That primary initiates a reconfig to remove the 'newlyAdded' for that node, if it was set.
+ */
+ void _reconfigToRemoveNewlyAddedField(const executor::TaskExecutor::CallbackArgs& cbData,
+ MemberId memberId,
+ ConfigVersionAndTerm versionAndTerm);
+
//
// All member variables are labeled with one of the following codes indicating the
// synchronization rules for accessing them.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index 1b056553be9..089553f6a56 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -47,6 +47,7 @@
#include "mongo/db/logical_time_validator.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/heartbeat_response_action.h"
+#include "mongo/db/repl/repl_server_parameters_gen.h"
#include "mongo/db/repl/repl_set_config_checks.h"
#include "mongo/db/repl/repl_set_heartbeat_args_v1.h"
#include "mongo/db/repl/repl_set_heartbeat_response.h"
@@ -277,6 +278,43 @@ void ReplicationCoordinatorImpl::_handleHeartbeatResponse(
_wakeReadyWaiters(lk);
}
+ if (enableAutomaticReconfig) {
+ // When receiving a heartbeat response indicating that the remote is in a state past
+ // STARTUP_2, the primary will initiate a reconfig to remove the 'newlyAdded' field for that
+ // node (if present). This field is normally set when we add new members with votes:1 to the
+ // set.
+ if (_getMemberState_inlock().primary() && hbStatusResponse.isOK() &&
+ hbStatusResponse.getValue().hasState()) {
+ auto remoteState = hbStatusResponse.getValue().getState();
+ if (remoteState == MemberState::RS_SECONDARY ||
+ remoteState == MemberState::RS_RECOVERING ||
+ remoteState == MemberState::RS_ROLLBACK) {
+ const auto mem = _rsConfig.getMemberAt(targetIndex);
+ const auto memId = mem.getId();
+ if (mem.isNewlyAdded()) {
+ auto status = _replExecutor->scheduleWork(
+ [=](const executor::TaskExecutor::CallbackArgs& cbData) {
+ _reconfigToRemoveNewlyAddedField(
+ cbData, memId, _rsConfig.getConfigVersionAndTerm());
+ });
+
+ if (!status.isOK()) {
+ LOGV2_DEBUG(4634500,
+ 1,
+ "Failed to schedule work for removing 'newlyAdded' field.",
+ "memberId"_attr = memId.getData(),
+ "error"_attr = status.getStatus());
+ } else {
+ LOGV2_DEBUG(4634501,
+ 1,
+ "Scheduled automatic reconfig to remove 'newlyAdded' field.",
+ "memberId"_attr = memId.getData());
+ }
+ }
+ }
+ }
+ }
+
// Abort catchup if we have caught up to the latest known optime after heartbeat refreshing.
if (_catchupState) {
_catchupState->signalHeartbeatUpdate_inlock();
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 3565f7e168a..abd6fd32aa1 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -711,6 +711,9 @@ TEST_F(ReplCoordTest, NodeAcceptsConfigFromAReconfigWithForceTrueWhileNotPrimary
class ReplCoordReconfigTest : public ReplCoordTest {
public:
+ int counter = 0;
+ std::vector<HostAndPort> initialSyncNodes;
+
void setUp() {
setMinimumLoggedSeverity(logv2::LogSeverity::Debug(3));
}
@@ -727,14 +730,23 @@ public:
}
void respondToHeartbeat() {
+ counter++;
+ unittest::log() << "Going to respond to heartbeat " << counter;
auto net = getNet();
auto noi = net->getNextReadyRequest();
auto&& request = noi->getRequest();
+ unittest::log() << "Going to respond to heartbeat request " << counter << ": "
+ << request.cmdObj << " from " << request.target;
repl::ReplSetHeartbeatArgsV1 hbArgs;
ASSERT_OK(hbArgs.initialize(request.cmdObj));
repl::ReplSetHeartbeatResponse hbResp;
hbResp.setSetName("mySet");
- hbResp.setState(MemberState::RS_SECONDARY);
+ if (std::find(initialSyncNodes.begin(), initialSyncNodes.end(), request.target) !=
+ initialSyncNodes.end()) {
+ hbResp.setState(MemberState::RS_STARTUP2);
+ } else {
+ hbResp.setState(MemberState::RS_SECONDARY);
+ }
// Secondaries learn of the config version and term immediately.
hbResp.setConfigVersion(getReplCoord()->getConfig().getConfigVersion());
hbResp.setConfigTerm(getReplCoord()->getConfig().getConfigTerm());
@@ -743,8 +755,12 @@ public:
hbResp.setDurableOpTimeAndWallTime({OpTime(Timestamp(100, 1), 0), Date_t() + Seconds(100)});
respObj << "ok" << 1;
hbResp.addToBSON(&respObj);
+ unittest::log() << "Scheduling response to heartbeat request " << counter
+ << " with response " << hbResp.toBSON();
net->scheduleResponse(noi, net->now(), makeResponseStatus(respObj.obj()));
+ unittest::log() << "Responding to heartbeat request " << counter;
net->runReadyNetworkOperations();
+ unittest::log() << "Responded to heartbeat request " << counter;
}
void setUpNewlyAddedFieldTest() {
@@ -775,19 +791,23 @@ public:
}
void respondToNHeartbeats(int n) {
+ unittest::log() << "Responding to " << n << " heartbeats";
enterNetwork();
for (int i = 0; i < n; i++) {
respondToHeartbeat();
}
exitNetwork();
+ unittest::log() << "Responded to " << n << " heartbeats";
}
void respondToAllHeartbeats() {
+ unittest::log() << "Responding to all heartbeats";
enterNetwork();
while (getNet()->hasReadyRequests()) {
respondToHeartbeat();
}
exitNetwork();
+ unittest::log() << "Responded to all heartbeats";
}
Status doSafeReconfig(OperationContext* opCtx,
@@ -809,6 +829,7 @@ public:
// Consume any outstanding heartbeats that were scheduled after reconfig finished.
respondToAllHeartbeats();
+
return status;
}
@@ -1538,7 +1559,7 @@ TEST_F(ReplCoordReconfigTest, ParseFailedIfUserProvidesNewlyAddedFieldDuringSafe
"Appended the 'newlyAdded' field to a node in the new config."));
}
-TEST_F(ReplCoordReconfigTest, ReconfigNeverModifiesExistingNewlyAddedField) {
+TEST_F(ReplCoordReconfigTest, ReconfigNeverModifiesExistingNewlyAddedFieldForMember) {
// Set the flag to add the 'newlyAdded' field to MemberConfigs.
enableAutomaticReconfig = true;
// Set the flag back to false after this test exits.
@@ -1549,6 +1570,7 @@ TEST_F(ReplCoordReconfigTest, ReconfigNeverModifiesExistingNewlyAddedField) {
auto opCtx = makeOperationContext();
// Do a reconfig that adds a new member.
auto members = BSON_ARRAY(member(1, "n1:1") << member(2, "n2:1") << member(3, "n3:1"));
+ initialSyncNodes.emplace_back(HostAndPort("n3:1"));
startCapturingLogMessages();
ASSERT_OK(doSafeReconfig(opCtx.get(), 2, members, 1 /* quorumHbs */));
@@ -1599,6 +1621,7 @@ TEST_F(ReplCoordReconfigTest, ReconfigNeverModifiesExistingNewlyAddedFieldForPre
auto opCtx = makeOperationContext();
// Do a reconfig that adds a new member.
auto members = BSON_ARRAY(member(1, "n1:1") << member(2, "n2:1") << member(3, "n3:1"));
+ initialSyncNodes.emplace_back(HostAndPort("n3:1"));
startCapturingLogMessages();
ASSERT_OK(doSafeReconfig(opCtx.get(), 2, members, 1 /* quorumHbs */));
@@ -1623,6 +1646,7 @@ TEST_F(ReplCoordReconfigTest, ReconfigNeverModifiesExistingNewlyAddedFieldForPre
// Add another new member to the set.
members = BSON_ARRAY(member(1, "n1:1")
<< member(2, "n2:1") << member(3, "n3:1") << member(4, "n4:1"));
+ initialSyncNodes.emplace_back(HostAndPort("n4:1"));
startCapturingLogMessages();
ASSERT_OK(doSafeReconfig(opCtx.get(), 3, members, 2 /* quorumHbs */));
@@ -1688,6 +1712,7 @@ TEST_F(ReplCoordReconfigTest, NodesWithNewlyAddedFieldSetHavePriorityZero) {
<< BSON("_id" << 3 << "host"
<< "n3:1"
<< "priority" << 3));
+ initialSyncNodes.emplace_back(HostAndPort("n3:1"));
startCapturingLogMessages();
ASSERT_OK(doSafeReconfig(opCtx.get(), 2, members, 1 /* quorumHbs */));
@@ -1719,6 +1744,7 @@ TEST_F(ReplCoordReconfigTest, NodesWithNewlyAddedFieldSetHavePriorityZero) {
<< BSON("_id" << 4 << "host"
<< "n4:1"
<< "priority" << 4));
+ initialSyncNodes.emplace_back(HostAndPort("n4:1"));
startCapturingLogMessages();
ASSERT_OK(doSafeReconfig(opCtx.get(), 3, members, 2 /* quorumHbs */));