From e44576384c3ed73e60f5e40b128df38ddb6ac981 Mon Sep 17 00:00:00 2001 From: Paolo Polato Date: Mon, 4 Apr 2022 13:35:54 +0000 Subject: SERVER-65169 fix deadlock in BalancerCommandsScheduler while stepping down --- src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp | 6 ++++-- src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp index 338734c905e..db6ab8093a4 100644 --- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp +++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp @@ -561,15 +561,17 @@ void BalancerCommandsSchedulerImpl::_workerThread() { } } // Wait for each outstanding command to complete, clean out its resources and leave. + stdx::unordered_map requestsToClean; { stdx::unique_lock ul(_mutex); _stateUpdatedCV.wait( ul, [this] { return (_requests.size() == _recentlyCompletedRequestIds.size()); }); - auto opCtxHolder = cc().makeOperationContext(); - _performDeferredCleanup(opCtxHolder.get(), _requests); + requestsToClean.swap(_requests); _requests.clear(); _recentlyCompletedRequestIds.clear(); } + auto opCtxHolder = cc().makeOperationContext(); + _performDeferredCleanup(opCtxHolder.get(), requestsToClean); } diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h index 7f57527367e..3cb23e4e565 100644 --- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h +++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h @@ -635,6 +635,12 @@ private: void _enqueueRequest(WithLock, RequestData&& request); + /** + * Clears any persisted state and releases any distributed lock associated to the list of + * requests specified. + * This method must not be called while holding any mutex (this could cause deadlocks if a + * stepdown request is also being served). + */ void _performDeferredCleanup( OperationContext* opCtx, const stdx::unordered_map& requestsHoldingResources); -- cgit v1.2.1