summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos José Grillo Ramirez <marcos.grillo@mongodb.com>2022-03-01 15:59:58 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-01 18:50:27 +0000
commit2551b634920f2fec7e8d780e7ea99d078b3af3bd (patch)
tree5ccebfe972048c684893b7eaf3dc20c8f2800bb0
parent5ead63b5661c2becde994b9dc47eba623e826579 (diff)
downloadmongo-2551b634920f2fec7e8d780e7ea99d078b3af3bd.tar.gz
SERVER-62521 Ensure distributed locks are being released even if a remote stepdown error occurs
(cherry picked from commit 42c5d825b962b3190b8c8a639d022f6660eaeee0)
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.cpp26
1 files changed, 18 insertions, 8 deletions
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp
index 95bee8bcfc0..b184f596053 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp
@@ -269,15 +269,22 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas
bool isSteppingDown = status.isA<ErrorCategory::NotPrimaryError>() ||
status.isA<ErrorCategory::ShutdownError>();
- // Release the coordinator only in case the node is not stepping down or in case of
- // acceptable error
- if (!isSteppingDown || (!status.isOK() && _completeOnError)) {
- LOGV2(
- 5565601, "Releasing sharding DDL coordinator", "coordinatorId"_attr = _coordId);
+ // If we are stepping down the token MUST be cancelled. Each implementation of the
+ // coordinator must retry remote stepping down errors, unless, we allow finalizing the
+ // coordinator in the presence of errors.
+ dassert(!isSteppingDown || token.isCanceled() || _completeOnError);
- auto session = metadata().getSession();
+ // Remove the ddl coordinator and release locks if the execution was successfull or if
+ // there was any error and we have the _completeOnError flag set or if we are not
+ // stepping down.
+ auto cleanup = [&]() { return status.isOK() || _completeOnError || !isSteppingDown; };
+ if (cleanup()) {
try {
+ LOGV2(5565601,
+ "Releasing sharding DDL coordinator",
+ "coordinatorId"_attr = _coordId);
+
// We need to execute this in another executor to ensure the remove work is
// done.
const auto docWasRemoved = _removeDocumentUntillSuccessOrStepdown(
@@ -291,6 +298,8 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas
Status::OK());
}
+ auto session = metadata().getSession();
+
if (status.isOK() && session) {
// Return lsid to the SessionCache. If status is not OK, let the lsid be
// discarded.
@@ -299,6 +308,7 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas
}
} catch (const DBException& ex) {
completionStatus = ex.toStatus();
+ // Ensure the only possible error is that we're stepping down.
isSteppingDown = completionStatus.isA<ErrorCategory::NotPrimaryError>() ||
completionStatus.isA<ErrorCategory::ShutdownError>() ||
completionStatus.isA<ErrorCategory::CancellationError>();
@@ -306,7 +316,7 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas
}
}
- if (isSteppingDown) {
+ if (!cleanup()) {
LOGV2(5950000,
"Not releasing distributed locks because the node is stepping down or "
"shutting down",
@@ -315,7 +325,7 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas
}
while (!_scopedLocks.empty()) {
- if (!isSteppingDown) {
+ if (cleanup()) {
// (SERVER-59500) Only release the remote locks in case of no stepdown/shutdown
const auto& resource = _scopedLocks.top().getNs();
DistLockManager::get(opCtx)->unlock(opCtx, resource);