From 63b46f14867df2de8f823b9eb1b65bda3ac4b1c2 Mon Sep 17 00:00:00 2001 From: Matthew Russotto Date: Thu, 18 Nov 2021 21:57:48 -0500 Subject: SERVER-61593 Test command waitForMemberState should be interruptible --- src/mongo/db/repl/repl_set_commands.cpp | 2 +- src/mongo/db/repl/replication_coordinator.h | 5 ++- src/mongo/db/repl/replication_coordinator_impl.cpp | 5 ++- src/mongo/db/repl/replication_coordinator_impl.h | 4 +- ...lication_coordinator_impl_heartbeat_v1_test.cpp | 3 +- .../db/repl/replication_coordinator_impl_test.cpp | 45 +++++++++++++++------- src/mongo/db/repl/replication_coordinator_mock.cpp | 3 +- src/mongo/db/repl/replication_coordinator_mock.h | 4 +- src/mongo/db/repl/replication_coordinator_noop.cpp | 2 +- src/mongo/db/repl/replication_coordinator_noop.h | 2 +- .../embedded/replication_coordinator_embedded.cpp | 4 +- .../embedded/replication_coordinator_embedded.h | 2 +- 12 files changed, 56 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/mongo/db/repl/repl_set_commands.cpp b/src/mongo/db/repl/repl_set_commands.cpp index 41ae248bf07..7299875254e 100644 --- a/src/mongo/db/repl/repl_set_commands.cpp +++ b/src/mongo/db/repl/repl_set_commands.cpp @@ -129,7 +129,7 @@ public: "timeout"_attr = timeout, "expectedState"_attr = expectedState); - status = replCoord->waitForMemberState(expectedState, timeout); + status = replCoord->waitForMemberState(opCtx, expectedState, timeout); uassertStatusOK(status); return true; diff --git a/src/mongo/db/repl/replication_coordinator.h b/src/mongo/db/repl/replication_coordinator.h index 06810324584..e1a49788d4f 100644 --- a/src/mongo/db/repl/replication_coordinator.h +++ b/src/mongo/db/repl/replication_coordinator.h @@ -196,8 +196,11 @@ public: * Returns OK if member state is 'state'. * Returns ErrorCodes::ExceededTimeLimit if we timed out waiting for the state change. * Returns ErrorCodes::BadValue if timeout is negative. + * Throws if interrupted (pass Interruptible::notInterruptible() if interruption is not desired. */ - virtual Status waitForMemberState(MemberState expectedState, Milliseconds timeout) = 0; + virtual Status waitForMemberState(Interruptible* interruptible, + MemberState expectedState, + Milliseconds timeout) = 0; /** * Returns true if this node is in state PRIMARY or SECONDARY. diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index ded42849496..4a7c66832d1 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -1074,7 +1074,8 @@ MemberState ReplicationCoordinatorImpl::_getMemberState_inlock() const { return _memberState; } -Status ReplicationCoordinatorImpl::waitForMemberState(MemberState expectedState, +Status ReplicationCoordinatorImpl::waitForMemberState(Interruptible* interruptible, + MemberState expectedState, Milliseconds timeout) { if (timeout < Milliseconds(0)) { return Status(ErrorCodes::BadValue, "Timeout duration cannot be negative"); @@ -1082,7 +1083,7 @@ Status ReplicationCoordinatorImpl::waitForMemberState(MemberState expectedState, stdx::unique_lock lk(_mutex); auto pred = [this, expectedState]() { return _memberState == expectedState; }; - if (!_memberStateChange.wait_for(lk, timeout.toSystemDuration(), pred)) { + if (!interruptible->waitForConditionOrInterruptFor(_memberStateChange, lk, timeout, pred)) { return Status(ErrorCodes::ExceededTimeLimit, str::stream() << "Timed out waiting for state to become " << expectedState.toString() diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h index dae323acb51..0119194b0dc 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.h +++ b/src/mongo/db/repl/replication_coordinator_impl.h @@ -126,7 +126,9 @@ public: virtual bool canAcceptNonLocalWrites() const override; - virtual Status waitForMemberState(MemberState expectedState, Milliseconds timeout) override; + virtual Status waitForMemberState(Interruptible* interruptible, + MemberState expectedState, + Milliseconds timeout) override; virtual bool isInPrimaryOrSecondaryState(OperationContext* opCtx) const override; diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp index 2e08f3890d0..d6b75739267 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp @@ -1520,7 +1520,8 @@ void HBStepdownAndReconfigTest::assertSteppedDown() { LOGV2(463811, "Waiting for step down to complete"); // Wait for step down to finish since it may be asynchronous. auto timeout = Milliseconds(5 * 60 * 1000); - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_SECONDARY, timeout)); + ASSERT_OK(getReplCoord()->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_SECONDARY, timeout)); // Primary stepped down. ASSERT_EQUALS(2, getReplCoord()->getTerm()); diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp index ffbd17f7c82..9a2e2b84c5c 100644 --- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp @@ -3092,7 +3092,8 @@ TEST_F(ReplCoordTest, DoNotAllowMaintenanceModeWhilePrimary) { // Step down from primary. getReplCoord()->updateTerm(opCtx.get(), getReplCoord()->getTerm() + 1).transitional_ignore(); - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_SECONDARY, Seconds(1))); + ASSERT_OK( + getReplCoord()->waitForMemberState(opCtx.get(), MemberState::RS_SECONDARY, Seconds(1))); status = getReplCoord()->setMaintenanceMode(opCtx.get(), false); ASSERT_EQUALS(ErrorCodes::OperationFailed, status); @@ -3125,7 +3126,7 @@ TEST_F(ReplCoordTest, DoNotAllowSettingMaintenanceModeWhileConductingAnElection) // Step down from primary. getReplCoord()->updateTerm(opCtx.get(), getReplCoord()->getTerm() + 1).transitional_ignore(); getReplCoord() - ->waitForMemberState(MemberState::RS_SECONDARY, Milliseconds(10 * 1000)) + ->waitForMemberState(opCtx.get(), MemberState::RS_SECONDARY, Milliseconds(10 * 1000)) .transitional_ignore(); // Can't modify maintenance mode when running for election (before and after dry run). @@ -3739,7 +3740,8 @@ TEST_F(ReplCoordTest, HelloReturnsErrorInQuiesceModeWhenNodeIsRemoved) { exitNetwork(); // Wait for the node to be removed. Test that we increment the topology version. - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_REMOVED, Seconds(1))); + ASSERT_OK(getReplCoord()->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_REMOVED, Seconds(1))); ASSERT_EQUALS(removedFromConfig.getConfigVersion(), getReplCoord()->getConfigVersion()); auto topologyVersionAfterRemoved = getTopoCoord().getTopologyVersion(); ASSERT_EQUALS(topologyVersionAfterQuiesceMode.getCounter() + 1, @@ -4392,7 +4394,8 @@ TEST_F(ReplCoordTest, HelloOnRemovedNode) { exitNetwork(); // node1 no longer exists in the replica set config. - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_REMOVED, Seconds(1))); + ASSERT_OK(getReplCoord()->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_REMOVED, Seconds(1))); ASSERT_EQUALS(removedFromConfig.getConfigVersion(), getReplCoord()->getConfigVersion()); const auto maxAwaitTime = Milliseconds(5000); @@ -4553,7 +4556,7 @@ TEST_F(ReplCoordTest, AwaitHelloRespondsCorrectlyWhenNodeRemovedAndReadded) { exitNetwork(); // node1 no longer exists in the replica set config. - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_REMOVED, Seconds(1))); + ASSERT_OK(getReplCoord()->waitForMemberState(opCtx.get(), MemberState::RS_REMOVED, Seconds(1))); ASSERT_EQUALS(removedFromConfig.getConfigVersion(), getReplCoord()->getConfigVersion()); getHelloWaitingForRemovedNodeThread.join(); const std::string newHorizonSniName = "newhorizon.com"; @@ -4593,7 +4596,8 @@ TEST_F(ReplCoordTest, AwaitHelloRespondsCorrectlyWhenNodeRemovedAndReadded) { }); replyToReceivedHeartbeatV1(); reconfigThread.join(); - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_SECONDARY, Seconds(1))); + ASSERT_OK( + getReplCoord()->waitForMemberState(opCtx.get(), MemberState::RS_SECONDARY, Seconds(1))); getHelloThread.join(); stdx::thread getHelloThreadNewHorizon([&] { @@ -6605,7 +6609,8 @@ TEST_F(ReplCoordTest, net->runReadyNetworkOperations(); net->exitNetwork(); - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_REMOVED, Seconds(1))); + ASSERT_OK(getReplCoord()->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_REMOVED, Seconds(1))); ASSERT_EQUALS(config.getConfigVersion(), getReplCoord()->getConfigVersion()); getReplCoord()->cancelAndRescheduleElectionTimeout(); @@ -7293,7 +7298,8 @@ TEST_F(ReplCoordTest, StepDownWhenHandleLivenessTimeoutMarksAMajorityOfVotingNod } getNet()->exitNetwork(); - ASSERT_OK(getReplCoord()->waitForMemberState(MemberState::RS_SECONDARY, Hours{1})); + ASSERT_OK(getReplCoord()->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_SECONDARY, Hours{1})); } TEST_F(ReplCoordTest, WaitForMemberState) { @@ -7318,17 +7324,30 @@ TEST_F(ReplCoordTest, WaitForMemberState) { ASSERT_EQUALS(initialTerm + 1, replCoord->getTerm()); auto timeout = Milliseconds(1); - ASSERT_OK(replCoord->waitForMemberState(MemberState::RS_PRIMARY, timeout)); + ASSERT_OK(replCoord->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_PRIMARY, timeout)); ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, - replCoord->waitForMemberState(MemberState::RS_REMOVED, timeout)); + replCoord->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_REMOVED, timeout)); ASSERT_EQUALS(ErrorCodes::BadValue, - replCoord->waitForMemberState(MemberState::RS_PRIMARY, Milliseconds(-1))); + replCoord->waitForMemberState(Interruptible::notInterruptible(), + MemberState::RS_PRIMARY, + Milliseconds(-1))); // Zero timeout is fine. - ASSERT_OK(replCoord->waitForMemberState(MemberState::RS_PRIMARY, Milliseconds(0))); + ASSERT_OK(replCoord->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_PRIMARY, Milliseconds(0))); ASSERT_EQUALS(ErrorCodes::ExceededTimeLimit, - replCoord->waitForMemberState(MemberState::RS_ARBITER, Milliseconds(0))); + replCoord->waitForMemberState( + Interruptible::notInterruptible(), MemberState::RS_ARBITER, Milliseconds(0))); + + // Make sure it can be interrupted. + auto opCtx = makeOperationContext(); + opCtx->markKilled(ErrorCodes::Interrupted); + ASSERT_THROWS( + replCoord->waitForMemberState(opCtx.get(), MemberState::RS_ARBITER, Milliseconds(0)), + ExceptionFor); } TEST_F( diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 788f2991a70..88df8f224ec 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -139,7 +139,8 @@ void ReplicationCoordinatorMock::setCanAcceptNonLocalWrites(bool canAcceptNonLoc _canAcceptNonLocalWrites = canAcceptNonLocalWrites; } -Status ReplicationCoordinatorMock::waitForMemberState(MemberState expectedState, +Status ReplicationCoordinatorMock::waitForMemberState(Interruptible* interruptible, + MemberState expectedState, Milliseconds timeout) { MONGO_UNREACHABLE; return Status::OK(); diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h index f20e2cadadc..60e48011f1e 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.h +++ b/src/mongo/db/repl/replication_coordinator_mock.h @@ -91,7 +91,9 @@ public: virtual bool canAcceptNonLocalWrites() const; - virtual Status waitForMemberState(MemberState expectedState, Milliseconds timeout) override; + virtual Status waitForMemberState(Interruptible* interruptible, + MemberState expectedState, + Milliseconds timeout) override; virtual bool isInPrimaryOrSecondaryState(OperationContext* opCtx) const; diff --git a/src/mongo/db/repl/replication_coordinator_noop.cpp b/src/mongo/db/repl/replication_coordinator_noop.cpp index 655a5f48904..8429ac7e568 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.cpp +++ b/src/mongo/db/repl/replication_coordinator_noop.cpp @@ -165,7 +165,7 @@ bool ReplicationCoordinatorNoOp::canAcceptNonLocalWrites() const { MONGO_UNREACHABLE; } -Status ReplicationCoordinatorNoOp::waitForMemberState(MemberState, Milliseconds) { +Status ReplicationCoordinatorNoOp::waitForMemberState(Interruptible*, MemberState, Milliseconds) { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/repl/replication_coordinator_noop.h b/src/mongo/db/repl/replication_coordinator_noop.h index 7183fdaac4a..74ef742d088 100644 --- a/src/mongo/db/repl/replication_coordinator_noop.h +++ b/src/mongo/db/repl/replication_coordinator_noop.h @@ -101,7 +101,7 @@ public: std::vector getMemberData() const final; - Status waitForMemberState(MemberState, Milliseconds) final; + Status waitForMemberState(Interruptible*, MemberState, Milliseconds) final; Seconds getSecondaryDelaySecs() const final; diff --git a/src/mongo/embedded/replication_coordinator_embedded.cpp b/src/mongo/embedded/replication_coordinator_embedded.cpp index 5a117927635..7e574d26f2c 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.cpp +++ b/src/mongo/embedded/replication_coordinator_embedded.cpp @@ -170,7 +170,9 @@ bool ReplicationCoordinatorEmbedded::canAcceptNonLocalWrites() const { UASSERT_NOT_IMPLEMENTED; } -Status ReplicationCoordinatorEmbedded::waitForMemberState(MemberState, Milliseconds) { +Status ReplicationCoordinatorEmbedded::waitForMemberState(Interruptible*, + MemberState, + Milliseconds) { UASSERT_NOT_IMPLEMENTED; } diff --git a/src/mongo/embedded/replication_coordinator_embedded.h b/src/mongo/embedded/replication_coordinator_embedded.h index 9a408d645fd..cf2287dc478 100644 --- a/src/mongo/embedded/replication_coordinator_embedded.h +++ b/src/mongo/embedded/replication_coordinator_embedded.h @@ -105,7 +105,7 @@ public: std::vector getMemberData() const override; - Status waitForMemberState(repl::MemberState, Milliseconds) override; + Status waitForMemberState(Interruptible*, repl::MemberState, Milliseconds) override; Seconds getSecondaryDelaySecs() const override; -- cgit v1.2.1