From f7dfccadbb6f19018765b96ca746dcdeb74cf1ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcos=20Jos=C3=A9=20Grillo=20Ramirez?= Date: Wed, 2 Mar 2022 15:01:37 +0100 Subject: SERVER-62521 Ensure distributed locks are being released even if a remote stepdown error occurs (cherry picked from commit c9a74181ade07e824a0b6bec6913d27c56e8bf21) --- src/mongo/db/s/sharding_ddl_coordinator.cpp | 47 +++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp index 95bee8bcfc0..88d8ec01246 100644 --- a/src/mongo/db/s/sharding_ddl_coordinator.cpp +++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp @@ -213,7 +213,17 @@ SemiFuture ShardingDDLCoordinator::run(std::shared_ptr() || + status.isA()))); + + // Ensure coordinator cleanup if the document has not been saved. + _completeOnError = !_recoveredFromDisk; + static constexpr auto& errorMsg = "Failed to complete construction of sharding DDL coordinator"; LOGV2_ERROR( @@ -264,20 +274,30 @@ SemiFuture ShardingDDLCoordinator::run(std::shared_ptr() || + status.isA()) || + token.isCanceled() || _completeOnError); - bool isSteppingDown = status.isA() || - status.isA(); + auto completionStatus = status; - // Release the coordinator only in case the node is not stepping down or in case of - // acceptable error - if (!isSteppingDown || (!status.isOK() && _completeOnError)) { - LOGV2( - 5565601, "Releasing sharding DDL coordinator", "coordinatorId"_attr = _coordId); + bool isSteppingDown = token.isCanceled(); - auto session = metadata().getSession(); + // Remove the ddl coordinator and release locks if the execution was successfull or if + // there was any error and we have the _completeOnError flag set or if we are not + // stepping down. + auto cleanup = [&]() { + return completionStatus.isOK() || _completeOnError || !isSteppingDown; + }; + if (cleanup()) { try { + LOGV2(5565601, + "Releasing sharding DDL coordinator", + "coordinatorId"_attr = _coordId); + // We need to execute this in another executor to ensure the remove work is // done. const auto docWasRemoved = _removeDocumentUntillSuccessOrStepdown( @@ -291,6 +311,8 @@ SemiFuture ShardingDDLCoordinator::run(std::shared_ptr ShardingDDLCoordinator::run(std::shared_ptr() || completionStatus.isA() || completionStatus.isA(); @@ -306,7 +329,7 @@ SemiFuture ShardingDDLCoordinator::run(std::shared_ptr ShardingDDLCoordinator::run(std::shared_ptrunlock(opCtx, resource); -- cgit v1.2.1