diff options
author | Randolph Tan <randolph@10gen.com> | 2021-01-14 15:20:00 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-04-23 13:31:50 +0000 |
commit | c2295adab43675bfde8c9b2aa5795d9b7fccb6b0 (patch) | |
tree | f929e0234e5486497872bddb7183c2ae68cead81 | |
parent | b67407b986ea715b0b9948c64a90369809ff6da0 (diff) | |
download | mongo-c2295adab43675bfde8c9b2aa5795d9b7fccb6b0.tar.gz |
SERVER-52564 Deadlock between step down and MongoDOperationContextSession
(cherry picked from commit 6ee5a25cfc951f6e914dcc9f7d1a63d2e7aeaa67)
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/session_catalog.cpp | 22 | ||||
-rw-r--r-- | src/mongo/db/session_catalog.h | 32 |
3 files changed, 60 insertions, 0 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index b9c730d12c1..da72aaa2fb3 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -83,6 +83,7 @@ #include "mongo/db/repl/update_position_args.h" #include "mongo/db/repl/vote_requester.h" #include "mongo/db/server_options.h" +#include "mongo/db/session_catalog.h" #include "mongo/db/storage/storage_options.h" #include "mongo/db/write_concern.h" #include "mongo/db/write_concern_options.h" @@ -2050,6 +2051,11 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx, CurOpFailpointHelpers::waitWhileFailPointEnabled( &stepdownHangBeforeRSTLEnqueue, opCtx, "stepdownHangBeforeRSTLEnqueue"); + // To prevent a deadlock between session checkout and RSTL lock taking, disallow new sessions + // from being checked out. Existing sessions currently checked out will be killed by the + // killOpThread. + ScopedBlockSessionCheckouts blockSessions(opCtx); + // Using 'force' sets the default for the wait time to zero, which means the stepdown will // fail if it does not acquire the lock immediately. In such a scenario, we use the // stepDownUntil deadline instead. diff --git a/src/mongo/db/session_catalog.cpp b/src/mongo/db/session_catalog.cpp index 838c805b48f..6ae6662f3f2 100644 --- a/src/mongo/db/session_catalog.cpp +++ b/src/mongo/db/session_catalog.cpp @@ -79,6 +79,10 @@ SessionCatalog::ScopedCheckedOutSession SessionCatalog::_checkOutSession(Operati invariant(!opCtx->lockState()->isLocked()); stdx::unique_lock<Latch> ul(_mutex); + uassert(ErrorCodes::InterruptedDueToReplStateChange, + "a stepdown process started, can't checkout sessions except for killing", + _checkoutAllowed); + auto sri = _getOrCreateSessionRuntimeInfo(ul, opCtx, *opCtx->getLogicalSessionId()); // Wait until the session is no longer checked out and until the previously scheduled kill has @@ -171,6 +175,16 @@ void SessionCatalog::scanSessions(const SessionKiller::Matcher& matcher, } } +void SessionCatalog::_disallowCheckoutsExceptForKilling() { + stdx::unique_lock<Latch> ul(_mutex); + _checkoutAllowed = false; +} + +void SessionCatalog::_allowCheckouts() { + stdx::lock_guard<Latch> lg(_mutex); + _checkoutAllowed = true; +} + SessionCatalog::KillToken SessionCatalog::killSession(const LogicalSessionId& lsid) { stdx::lock_guard<Latch> lg(_mutex); auto it = _sessions.find(lsid); @@ -320,4 +334,12 @@ void OperationContextSession::checkOut(OperationContext* opCtx) { checkedOutSession.emplace(std::move(scopedCheckedOutSession)); } +ScopedBlockSessionCheckouts::ScopedBlockSessionCheckouts(OperationContext* opCtx) : _opCtx(opCtx) { + SessionCatalog::get(_opCtx)->_disallowCheckoutsExceptForKilling(); +} + +ScopedBlockSessionCheckouts::~ScopedBlockSessionCheckouts() { + SessionCatalog::get(_opCtx)->_allowCheckouts(); +} + } // namespace mongo diff --git a/src/mongo/db/session_catalog.h b/src/mongo/db/session_catalog.h index 0a51e9afe09..53bbfa2e22a 100644 --- a/src/mongo/db/session_catalog.h +++ b/src/mongo/db/session_catalog.h @@ -46,6 +46,7 @@ namespace mongo { class ObservableSession; +class ScopedBlockSessionCheckouts; /** * Keeps track of the transaction runtime state for every active session on this instance. @@ -116,6 +117,7 @@ public: size_t size() const; private: + friend ScopedBlockSessionCheckouts; struct SessionRuntimeInfo { SessionRuntimeInfo(LogicalSessionId lsid) : session(std::move(lsid)) {} ~SessionRuntimeInfo(); @@ -153,12 +155,27 @@ private: */ void _releaseSession(SessionRuntimeInfo* sri, boost::optional<KillToken> killToken); + /** + * Disallow checkouts that are not for killing. + */ + void _disallowCheckoutsExceptForKilling(); + + /** + * Re-enable checkouts if it was disallowed earlier. + */ + void _allowCheckouts(); + // Protects the state below mutable Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "SessionCatalog::_mutex"); // Owns the Session objects for all current Sessions. SessionRuntimeInfoMap _sessions; + + // If false no new sessions can be checked out. Reasons why this could be true is because step + // down is in progress and we should not allow new sessions to get checked out in order to + // prevent deadlocks. + bool _checkoutAllowed{true}; }; /** @@ -374,4 +391,19 @@ private: OperationContext* const _opCtx; }; +/** + * Scoped object, while active will prevent the checkout of sessions except for killing. + */ +class ScopedBlockSessionCheckouts { + ScopedBlockSessionCheckouts(const ScopedBlockSessionCheckouts&) = delete; + ScopedBlockSessionCheckouts& operator=(const ScopedBlockSessionCheckouts&) = delete; + +public: + ScopedBlockSessionCheckouts(OperationContext* opCtx); + ~ScopedBlockSessionCheckouts(); + +private: + OperationContext* const _opCtx; +}; + } // namespace mongo |