From d8ff665343ad29cf286ee2cf4a1960d29371937b Mon Sep 17 00:00:00 2001 From: Matthew Russotto Date: Thu, 10 Feb 2022 11:11:41 -0500 Subject: SERVER-63143 Operation can be interrupted by maxTimeMS timeout while waiting for lock even if _ignoreInterruptsExceptForReplStateChange is set --- src/mongo/db/operation_context.cpp | 10 ++--- src/mongo/db/operation_context.h | 10 +++++ src/mongo/db/operation_context_test.cpp | 68 +++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 6 deletions(-) (limited to 'src/mongo') diff --git a/src/mongo/db/operation_context.cpp b/src/mongo/db/operation_context.cpp index d39cb440260..649ad363b0c 100644 --- a/src/mongo/db/operation_context.cpp +++ b/src/mongo/db/operation_context.cpp @@ -218,11 +218,7 @@ bool opShouldFail(Client* client, const BSONObj& failPointInfo) { } // namespace Status OperationContext::checkForInterruptNoAssert() noexcept { - const auto killStatus = getKillStatus(); - - if (_ignoreInterruptsExceptForReplStateChange && - killStatus != ErrorCodes::InterruptedDueToReplStateChange && - !_killRequestedForReplStateChange.loadRelaxed()) { + if (_noReplStateChangeWhileIgnoringOtherInterrupts()) { return Status::OK(); } @@ -258,6 +254,7 @@ Status OperationContext::checkForInterruptNoAssert() noexcept { }, [&](auto&& data) { return opShouldFail(getClient(), data); }); + const auto killStatus = getKillStatus(); if (killStatus != ErrorCodes::OK) { if (killStatus == ErrorCodes::TransactionExceededLifetimeLimitSeconds) return Status( @@ -314,7 +311,8 @@ StatusWith OperationContext::waitForConditionOrInterruptNoAsser // maxTimeNeverTimeOut is set) then we assume that the incongruity is due to a clock mismatch // and return _timeoutError regardless. To prevent this behaviour, only consider the op's // deadline in the event that the maxTimeNeverTimeOut failpoint is not set. - bool opHasDeadline = (hasDeadline() && !MONGO_unlikely(maxTimeNeverTimeOut.shouldFail())); + bool opHasDeadline = (hasDeadline() && !_noReplStateChangeWhileIgnoringOtherInterrupts() && + !MONGO_unlikely(maxTimeNeverTimeOut.shouldFail())); if (opHasDeadline) { deadline = std::min(deadline, getDeadline()); diff --git a/src/mongo/db/operation_context.h b/src/mongo/db/operation_context.h index 12b5eb60283..be234281d8b 100644 --- a/src/mongo/db/operation_context.h +++ b/src/mongo/db/operation_context.h @@ -634,6 +634,16 @@ private: } } + /** + * Returns true if ignoring interrupts other than repl state change and no repl state change + * has occurred. + */ + bool _noReplStateChangeWhileIgnoringOtherInterrupts() const { + return _ignoreInterruptsExceptForReplStateChange && + getKillStatus() != ErrorCodes::InterruptedDueToReplStateChange && + !_killRequestedForReplStateChange.loadRelaxed(); + } + /** * Returns true if this operation has a deadline and it has passed according to the fast clock * on ServiceContext. diff --git a/src/mongo/db/operation_context_test.cpp b/src/mongo/db/operation_context_test.cpp index 66fcda8632a..6850bc61f03 100644 --- a/src/mongo/db/operation_context_test.cpp +++ b/src/mongo/db/operation_context_test.cpp @@ -737,6 +737,74 @@ TEST_F(OperationDeadlineTests, DuringWaitMaxTimeExpirationDominatesUntilExpirati ASSERT_TRUE(opCtx->getCancellationToken().isCanceled()); } +TEST_F(OperationDeadlineTests, + MaxTimeExpirationIgnoredWhenIgnoringInterruptsExceptReplStateChange) { + auto opCtx = client->makeOperationContext(); + opCtx->setDeadlineByDate(mockClock->now(), ErrorCodes::MaxTimeMSExpired); + auto m = MONGO_MAKE_LATCH(); + stdx::condition_variable cv; + stdx::unique_lock lk(m); + ASSERT_FALSE(opCtx->getCancellationToken().isCanceled()); + opCtx->setIgnoreInterruptsExceptForReplStateChange(true); + // Advance the clock so the MaxTimeMS is hit before the timeout. + mockClock->advance(Milliseconds(100)); + ASSERT_FALSE( + opCtx->waitForConditionOrInterruptUntil(cv, lk, mockClock->now(), [] { return false; })); + ASSERT_FALSE(opCtx->getCancellationToken().isCanceled()); +} + +TEST_F(OperationDeadlineTests, + AlreadyExpiredMaxTimeIgnoredWhenIgnoringInterruptsExceptReplStateChange) { + auto opCtx = client->makeOperationContext(); + opCtx->setDeadlineByDate(mockClock->now(), ErrorCodes::MaxTimeMSExpired); + auto m = MONGO_MAKE_LATCH(); + stdx::condition_variable cv; + stdx::unique_lock lk(m); + ASSERT_FALSE(opCtx->getCancellationToken().isCanceled()); + ASSERT_THROWS_CODE(opCtx->waitForConditionOrInterruptUntil( + cv, lk, mockClock->now() + Seconds(1), [] { return false; }), + DBException, + ErrorCodes::MaxTimeMSExpired); + + ASSERT_EQ(ErrorCodes::MaxTimeMSExpired, opCtx->checkForInterruptNoAssert()); + ASSERT_TRUE(opCtx->getCancellationToken().isCanceled()); + opCtx->setIgnoreInterruptsExceptForReplStateChange(true); + ASSERT_OK(opCtx->checkForInterruptNoAssert()); + // Advance the clock so the MaxTimeMS is hit before the timeout. + mockClock->advance(Milliseconds(100)); + ASSERT_FALSE( + opCtx->waitForConditionOrInterruptUntil(cv, lk, mockClock->now(), [] { return false; })); + ASSERT_TRUE(opCtx->getCancellationToken().isCanceled()); +} + +TEST_F(OperationDeadlineTests, + MaxTimeRespectedAfterReplStateChangeWhenIgnoringInterruptsExceptReplStateChange) { + auto opCtx = client->makeOperationContext(); + opCtx->setDeadlineByDate(mockClock->now(), ErrorCodes::MaxTimeMSExpired); + auto m = MONGO_MAKE_LATCH(); + stdx::condition_variable cv; + stdx::unique_lock lk(m); + ASSERT_FALSE(opCtx->getCancellationToken().isCanceled()); + ASSERT_THROWS_CODE(opCtx->waitForConditionOrInterruptUntil( + cv, lk, mockClock->now() + Seconds(1), [] { return false; }), + DBException, + ErrorCodes::MaxTimeMSExpired); + + ASSERT_EQ(ErrorCodes::MaxTimeMSExpired, opCtx->checkForInterruptNoAssert()); + ASSERT_TRUE(opCtx->getCancellationToken().isCanceled()); + opCtx->setIgnoreInterruptsExceptForReplStateChange(true); + ASSERT_OK(opCtx->checkForInterruptNoAssert()); + opCtx->markKilled(ErrorCodes::InterruptedDueToReplStateChange); + ASSERT_EQ(ErrorCodes::MaxTimeMSExpired, opCtx->checkForInterruptNoAssert()); + // Advance the clock so the MaxTimeMS is hit before the timeout. + mockClock->advance(Milliseconds(100)); + ASSERT_THROWS_CODE( + opCtx->waitForConditionOrInterruptUntil(cv, lk, mockClock->now(), [] { return false; }), + DBException, + ErrorCodes::MaxTimeMSExpired); + ASSERT_TRUE(opCtx->getCancellationToken().isCanceled()); +} + class ThreadedOperationDeadlineTests : public OperationDeadlineTests { public: using CvPred = std::function; -- cgit v1.2.1