summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXueruiFa <xuerui.fa@mongodb.com>2020-10-08 19:58:21 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-14 15:45:17 +0000
commit4d212ea86b46ec06196cc979b3f760c278958a8f (patch)
tree7491f874815f2bffa1c107c839ad07edddbf32ee
parent1f0009a389042c24360509625d50a9e3812658c7 (diff)
downloadmongo-4d212ea86b46ec06196cc979b3f760c278958a8f.tar.gz
SERVER-51418: Resolve race condition in StepdownShouldInterruptConfigWrite
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp133
1 files changed, 73 insertions, 60 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
index 2979160b523..b92245d47db 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_reconfig_test.cpp
@@ -1408,66 +1408,79 @@ TEST_F(ReplCoordReconfigTest,
ASSERT_OK(status);
}
-// TEST_F(ReplCoordReconfigTest, StepdownShouldInterruptConfigWrite) {
-// // Start out in a non-initial config version.
-// init();
-// auto configVersion = 2;
-// assertStartSuccess(configWithMembers(configVersion,
-// 0,
-// BSON_ARRAY(member(1, "n1:1")
-// << member(2, "n2:1") << member(3, "n3:1",
-// 0))),
-// HostAndPort("n1", 1));
-// ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
-
-// // Simulate application of one oplog entry.
-// replCoordSetMyLastAppliedAndDurableOpTime(OpTime(Timestamp(1, 1), 0));
-
-// // Get elected primary.
-// simulateSuccessfulV1Election();
-// ASSERT_EQ(getReplCoord()->getMemberState(), MemberState::RS_PRIMARY);
-// ASSERT_EQ(getReplCoord()->getTerm(), 1);
-
-// // Advance your optime.
-// auto commitPoint = OpTime(Timestamp(2, 1), 1);
-// replCoordSetMyLastAppliedAndDurableOpTime(commitPoint);
-// replicateOpTo(2, commitPoint);
-
-// // Respond to heartbeats before reconfig.
-// respondToAllHeartbeats();
-
-// // Do a reconfig that should fail due to stepdown.
-// configVersion = 3;
-// ReplSetReconfigArgs args;
-// args.newConfigObj = configWithMembers(
-// configVersion, 1, BSON_ARRAY(member(1, "n1:1") << member(2, "n2:1") << member(3,
-// "n3:1")));
-
-// BSONObjBuilder result;
-// Status status(ErrorCodes::InternalError, "Not Set");
-// const auto opCtx = makeOperationContext();
-// auto reconfigResult = stdx::async(stdx::launch::async, [&] {
-// status = getReplCoord()->processReplSetReconfig(opCtx.get(), args, &result);
-// });
-
-// // Step down due to a higher term.
-// TopologyCoordinator::UpdateTermResult termUpdated;
-// auto updateTermEvh = getReplCoord()->updateTerm_forTest(2, &termUpdated);
-// ASSERT(termUpdated == TopologyCoordinator::UpdateTermResult::kTriggerStepDown);
-// ASSERT(updateTermEvh.isValid());
-// getReplExec()->waitForEvent(updateTermEvh);
-
-// // Respond to quorum check to resume the reconfig. We keep responding until the reconfig
-// thread
-// // finishes.
-// while (stdx::future_status::ready !=
-// reconfigResult.wait_for(Milliseconds::zero().toSystemDuration())) {
-// respondToAllHeartbeats();
-// }
-
-// ASSERT_EQ(status.code(), ErrorCodes::NotWritablePrimary);
-// ASSERT_EQ(status.reason(), "Stepped down when persisting new config");
-// }
+TEST_F(ReplCoordReconfigTest, StepdownShouldInterruptConfigWrite) {
+ // Start out in a non-initial config version.
+ init();
+ auto configVersion = 2;
+ assertStartSuccess(configWithMembers(configVersion,
+ 0,
+ BSON_ARRAY(member(1, "n1:1")
+ << member(2, "n2:1") << member(3, "n3:1", 0))),
+ HostAndPort("n1", 1));
+ ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+
+ // Simulate application of one oplog entry.
+ replCoordSetMyLastAppliedAndDurableOpTime(OpTime(Timestamp(1, 1), 0));
+
+ // Get elected primary.
+ simulateSuccessfulV1Election();
+ ASSERT_EQ(getReplCoord()->getMemberState(), MemberState::RS_PRIMARY);
+ ASSERT_EQ(getReplCoord()->getTerm(), 1);
+
+ // Advance your optime.
+ auto commitPoint = OpTime(Timestamp(2, 1), 1);
+ replCoordSetMyLastAppliedAndDurableOpTime(commitPoint);
+ replicateOpTo(2, commitPoint);
+
+ // Respond to heartbeats before reconfig.
+ respondToAllHeartbeats();
+
+ // Do a reconfig that should fail due to stepdown.
+ configVersion = 3;
+ ReplSetReconfigArgs args;
+ args.newConfigObj = configWithMembers(
+ configVersion, 1, BSON_ARRAY(member(1, "n1:1") << member(2, "n2:1") << member(3, "n3:1")));
+
+ // Turn on a reconfig hang to ensure that the reconfig thread will be scheduled before the node
+ // is stepped down. If the node is stepped down before reconfig is initiated, the reconfig will
+ // fail with a different reason than expected.
+ auto hangReconfig = globalFailPointRegistry().find("ReconfigHangBeforeConfigValidationCheck");
+ auto timesEnteredFailPoint = hangReconfig->setMode(FailPoint::alwaysOn);
+
+ BSONObjBuilder result;
+ Status status(ErrorCodes::InternalError, "Not Set");
+ const auto opCtx = makeOperationContext();
+ auto reconfigResult = stdx::async(stdx::launch::async, [&] {
+ status = getReplCoord()->processReplSetReconfig(opCtx.get(), args, &result);
+ });
+
+ // Ensure the reconfig thread has started before stepping down as primary.
+ hangReconfig->waitForTimesEntered(timesEnteredFailPoint + 1);
+ hangReconfig->setMode(FailPoint::off);
+
+ // The failpoint should be released now. We run the clock forward so that the reconfig can
+ // continue.
+ getNet()->enterNetwork();
+ getNet()->runUntil(getNet()->now() + Milliseconds(200));
+ getNet()->exitNetwork();
+
+ // Step down due to a higher term.
+ TopologyCoordinator::UpdateTermResult termUpdated;
+ auto updateTermEvh = getReplCoord()->updateTerm_forTest(2, &termUpdated);
+ ASSERT(termUpdated == TopologyCoordinator::UpdateTermResult::kTriggerStepDown);
+ ASSERT(updateTermEvh.isValid());
+ getReplExec()->waitForEvent(updateTermEvh);
+
+ // Respond to quorum check to resume the reconfig. We keep responding until the reconfig thread
+ // finishes.
+ while (stdx::future_status::ready !=
+ reconfigResult.wait_for(Milliseconds::zero().toSystemDuration())) {
+ respondToAllHeartbeats();
+ }
+
+ ASSERT_EQ(status.code(), ErrorCodes::NotWritablePrimary);
+ ASSERT_EQ(status.reason(), "Stepped down when persisting new config");
+}
TEST_F(ReplCoordReconfigTest, StartElectionOnReconfigToSingleNode) {
// Start up as a secondary.