summaryrefslogtreecommitdiff
path: root/src/mongo/db/s
diff options
context:
space:
mode:
authorTommaso Tocci <tommaso.tocci@mongodb.com>2022-08-12 12:33:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-12 13:46:37 +0000
commit8ce8a89418d3480b1dff7ba1167a89b644463de9 (patch)
tree85739df8524b41c05379d3ff11e93c75dfabb9e3 /src/mongo/db/s
parent32c2f632eaa7bf80607880162ec5e4eaeb22d7fe (diff)
downloadmongo-8ce8a89418d3480b1dff7ba1167a89b644463de9.tar.gz
SERVER-68708 Stop acquiring dist locks for chunk migrations
Diffstat (limited to 'src/mongo/db/s')
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp35
-rw-r--r--src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h38
-rw-r--r--src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp68
-rw-r--r--src/mongo/db/s/balancer/balancer_dist_locks.cpp75
-rw-r--r--src/mongo/db/s/balancer/balancer_dist_locks.h66
6 files changed, 10 insertions, 273 deletions
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 38913ff31ad..5808fdeabb0 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -271,7 +271,6 @@ env.Library(
'balancer/balancer_chunk_selection_policy.cpp',
'balancer/balancer_commands_scheduler_impl.cpp',
'balancer/balancer_defragmentation_policy_impl.cpp',
- 'balancer/balancer_dist_locks.cpp',
'balancer/balancer_policy.cpp',
'balancer/balancer.cpp',
'balancer/cluster_chunks_resize_policy_impl.cpp',
diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
index 03f7715365c..88300a97779 100644
--- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
+++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
@@ -422,24 +422,23 @@ CommandSubmissionResult BalancerCommandsSchedulerImpl::_submit(
OperationContext* opCtx, const CommandSubmissionParameters& params) {
LOGV2_DEBUG(
5847203, 2, "Balancer command request submitted for execution", "reqId"_attr = params.id);
- bool distLockTaken = false;
const auto shardWithStatus =
Grid::get(opCtx)->shardRegistry()->getShard(opCtx, params.commandInfo->getTarget());
if (!shardWithStatus.isOK()) {
- return CommandSubmissionResult(params.id, distLockTaken, shardWithStatus.getStatus());
+ return CommandSubmissionResult(params.id, shardWithStatus.getStatus());
}
const auto shardHostWithStatus = shardWithStatus.getValue()->getTargeter()->findHost(
opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly});
if (!shardHostWithStatus.isOK()) {
- return CommandSubmissionResult(params.id, distLockTaken, shardHostWithStatus.getStatus());
+ return CommandSubmissionResult(params.id, shardHostWithStatus.getStatus());
}
if (params.commandInfo->requiresRecoveryOnCrash()) {
auto writeStatus = persistRecoveryInfo(opCtx, *(params.commandInfo));
if (!writeStatus.isOK()) {
- return CommandSubmissionResult(params.id, distLockTaken, writeStatus);
+ return CommandSubmissionResult(params.id, writeStatus);
}
}
@@ -454,18 +453,9 @@ CommandSubmissionResult BalancerCommandsSchedulerImpl::_submit(
_applyCommandResponse(requestId, args.response);
};
- if (params.commandInfo->requiresDistributedLock()) {
- Status lockAcquisitionResponse =
- _distributedLocks.acquireFor(opCtx, params.commandInfo->getNameSpace());
- if (!lockAcquisitionResponse.isOK()) {
- return CommandSubmissionResult(params.id, distLockTaken, lockAcquisitionResponse);
- }
- distLockTaken = true;
- }
-
auto swRemoteCommandHandle =
(*_executor)->scheduleRemoteCommand(remoteCommand, onRemoteResponseReceived);
- return CommandSubmissionResult(params.id, distLockTaken, swRemoteCommandHandle.getStatus());
+ return CommandSubmissionResult(params.id, swRemoteCommandHandle.getStatus());
}
void BalancerCommandsSchedulerImpl::_applySubmissionResult(
@@ -509,18 +499,14 @@ void BalancerCommandsSchedulerImpl::_applyCommandResponse(
void BalancerCommandsSchedulerImpl::_performDeferredCleanup(
OperationContext* opCtx,
- const stdx::unordered_map<UUID, RequestData, UUID::Hash>& requestsHoldingResources,
- bool includePersistedData) {
+ const stdx::unordered_map<UUID, RequestData, UUID::Hash>& requestsHoldingResources) {
if (requestsHoldingResources.empty()) {
return;
}
DBDirectClient dbClient(opCtx);
for (const auto& [_, request] : requestsHoldingResources) {
- if (request.holdsDistributedLock()) {
- _distributedLocks.releaseFor(opCtx, request.getNamespace());
- }
- if (includePersistedData && request.requiresRecoveryCleanupOnCompletion()) {
+ if (request.requiresRecoveryCleanupOnCompletion()) {
deletePersistedRecoveryInfo(dbClient, request.getCommandInfo());
}
}
@@ -582,8 +568,7 @@ void BalancerCommandsSchedulerImpl::_workerThread() {
// 2.a Free any resource acquired by already completed/aborted requests.
{
auto opCtxHolder = cc().makeOperationContext();
- _performDeferredCleanup(
- opCtxHolder.get(), completedRequestsToCleanUp, true /*includePersistedData*/);
+ _performDeferredCleanup(opCtxHolder.get(), completedRequestsToCleanUp);
completedRequestsToCleanUp.clear();
}
@@ -614,18 +599,12 @@ void BalancerCommandsSchedulerImpl::_workerThread() {
(*_executor)->shutdown();
(*_executor)->join();
- stdx::unordered_map<UUID, RequestData, UUID::Hash> cancelledRequests;
{
stdx::unique_lock<Latch> ul(_mutex);
- cancelledRequests.swap(_requests);
_requests.clear();
_recentlyCompletedRequestIds.clear();
_executor.reset();
}
- auto opCtxHolder = cc().makeOperationContext();
- // Ensure that the clean up won't delete any request recovery document (the commands will be
- // reissued once the scheduler is restarted)
- _performDeferredCleanup(opCtxHolder.get(), cancelledRequests, false /*includePersistedData*/);
}
diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h
index 537548850e5..04977417db0 100644
--- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h
+++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.h
@@ -30,7 +30,6 @@
#pragma once
#include "mongo/db/s/balancer/balancer_commands_scheduler.h"
-#include "mongo/db/s/balancer/balancer_dist_locks.h"
#include "mongo/db/s/balancer/type_migration.h"
#include "mongo/db/s/forwardable_operation_metadata.h"
#include "mongo/db/service_context.h"
@@ -82,10 +81,6 @@ public:
return false;
}
- virtual bool requiresDistributedLock() const {
- return false;
- }
-
virtual std::string getTargetDb() const {
return NamespaceString::kAdminDb.toString();
}
@@ -137,10 +132,6 @@ public:
return commandBuilder.obj();
}
- bool requiresDistributedLock() const override {
- return true;
- }
-
private:
const ShardsvrMoveRange _request;
const WriteConcernOptions _wc;
@@ -229,10 +220,6 @@ public:
return true;
}
- bool requiresDistributedLock() const override {
- return true;
- }
-
MigrationType asMigrationType() const {
return MigrationType(getNameSpace(),
_chunkBoundaries.getMin(),
@@ -451,12 +438,10 @@ struct CommandSubmissionParameters {
* Helper data structure for storing the outcome of a Command submission.
*/
struct CommandSubmissionResult {
- CommandSubmissionResult(UUID id, bool acquiredDistLock, const Status& outcome)
- : id(id), acquiredDistLock(acquiredDistLock), outcome(outcome) {}
+ CommandSubmissionResult(UUID id, const Status& outcome) : id(id), outcome(outcome) {}
CommandSubmissionResult(CommandSubmissionResult&& rhs) = default;
CommandSubmissionResult(const CommandSubmissionResult& rhs) = delete;
UUID id;
- bool acquiredDistLock;
Status outcome;
};
@@ -469,7 +454,6 @@ public:
RequestData(UUID id, std::shared_ptr<CommandInfo>&& commandInfo)
: _id(id),
_completedOrAborted(false),
- _holdingDistLock(false),
_commandInfo(std::move(commandInfo)),
_responsePromise{NonNullPromiseTag{}} {
invariant(_commandInfo);
@@ -478,7 +462,6 @@ public:
RequestData(RequestData&& rhs)
: _id(rhs._id),
_completedOrAborted(rhs._completedOrAborted),
- _holdingDistLock(rhs._holdingDistLock),
_commandInfo(std::move(rhs._commandInfo)),
_responsePromise(std::move(rhs._responsePromise)) {}
@@ -494,7 +477,6 @@ public:
Status applySubmissionResult(CommandSubmissionResult&& submissionResult) {
invariant(_id == submissionResult.id);
- _holdingDistLock = submissionResult.acquiredDistLock;
if (_completedOrAborted) {
// A remote response was already received by the time the submission gets processed.
// Keep the original outcome and continue the workflow.
@@ -516,10 +498,6 @@ public:
return _commandInfo->getNameSpace();
}
- bool holdsDistributedLock() const {
- return _holdingDistLock;
- }
-
bool requiresRecoveryCleanupOnCompletion() const {
return _commandInfo->requiresRecoveryCleanupOnCompletion();
}
@@ -542,8 +520,6 @@ private:
bool _completedOrAborted;
- bool _holdingDistLock;
-
std::shared_ptr<CommandInfo> _commandInfo;
Promise<executor::RemoteCommandResponse> _responsePromise;
@@ -637,12 +613,6 @@ private:
*/
std::vector<UUID> _recentlyCompletedRequestIds;
- /**
- * Centralised accessor for all the distributed locks required by the Scheduler.
- * Only _workerThread() is supposed to interact with this class.
- */
- BalancerDistLocks _distributedLocks;
-
/*
* Counter of oustanding requests that were interrupted by a prior step-down/crash event,
* and that the scheduler is currently submitting as part of its initial recovery phase.
@@ -655,15 +625,13 @@ private:
void _enqueueRequest(WithLock, RequestData&& request);
/**
- * Clears any persisted state and releases any distributed lock associated to the list of
- * requests specified.
+ * Clears any persisted state associated to the list of requests specified.
* This method must not be called while holding any mutex (this could cause deadlocks if a
* stepdown request is also being served).
*/
void _performDeferredCleanup(
OperationContext* opCtx,
- const stdx::unordered_map<UUID, RequestData, UUID::Hash>& requestsHoldingResources,
- bool includePersistedData);
+ const stdx::unordered_map<UUID, RequestData, UUID::Hash>& requestsHoldingResources);
CommandSubmissionResult _submit(OperationContext* opCtx,
const CommandSubmissionParameters& data);
diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
index a488c414c2c..8b8cfcad412 100644
--- a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
@@ -180,14 +180,6 @@ TEST_F(BalancerCommandsSchedulerTest, SuccessfulMoveChunkCommand) {
ASSERT_OK(futureResponse.getNoThrow());
remoteResponsesFuture.default_timed_get();
deferredCleanupCompletedCheckpoint->waitForTimesEntered(timesEnteredFailPoint + 1);
- // Ensure DistLock is released correctly
- {
- auto opCtx = Client::getCurrent()->getOperationContext();
- const std::string whyMessage(str::stream()
- << "Test acquisition of distLock for " << kNss.ns());
- ASSERT_DOES_NOT_THROW(DDLLockManager::get(opCtx)->lock(
- opCtx, kNss.ns(), whyMessage, DDLLockManager::kSingleLockAttemptTimeout));
- }
deferredCleanupCompletedCheckpoint->setMode(FailPoint::off, 0);
_scheduler.stop();
}
@@ -214,14 +206,6 @@ TEST_F(BalancerCommandsSchedulerTest, SuccessfulMoveRangeCommand) {
ASSERT_OK(futureResponse.getNoThrow());
remoteResponsesFuture.default_timed_get();
deferredCleanupCompletedCheckpoint->waitForTimesEntered(timesEnteredFailPoint + 1);
- // Ensure DistLock is released correctly
- {
- auto opCtx = Client::getCurrent()->getOperationContext();
- const std::string whyMessage(str::stream()
- << "Test acquisition of distLock for " << kNss.ns());
- ASSERT_DOES_NOT_THROW(DDLLockManager::get(opCtx)->lock(
- opCtx, kNss.ns(), whyMessage, DDLLockManager::kSingleLockAttemptTimeout));
- }
deferredCleanupCompletedCheckpoint->setMode(FailPoint::off, 0);
_scheduler.stop();
}
@@ -349,15 +333,6 @@ TEST_F(BalancerCommandsSchedulerTest, CommandFailsWhenNetworkReturnsError) {
ASSERT_EQUALS(futureResponse.getNoThrow(), timeoutError);
remoteResponsesFuture.default_timed_get();
deferredCleanupCompletedCheckpoint->waitForTimesEntered(timesEnteredFailPoint + 1);
-
- // Ensure DistLock is released correctly
- {
- auto opCtx = Client::getCurrent()->getOperationContext();
- const std::string whyMessage(str::stream()
- << "Test acquisition of distLock for " << kNss.ns());
- ASSERT_DOES_NOT_THROW(DDLLockManager::get(opCtx)->lock(
- opCtx, kNss.ns(), whyMessage, DDLLockManager::kSingleLockAttemptTimeout));
- }
deferredCleanupCompletedCheckpoint->setMode(FailPoint::off, 0);
_scheduler.stop();
}
@@ -369,14 +344,6 @@ TEST_F(BalancerCommandsSchedulerTest, CommandFailsWhenSchedulerIsStopped) {
ASSERT_EQUALS(futureResponse.getNoThrow(),
Status(ErrorCodes::BalancerInterrupted,
"Request rejected - balancer scheduler is stopped"));
- // Ensure DistLock is not taken
- {
- auto opCtx = Client::getCurrent()->getOperationContext();
- const std::string whyMessage(str::stream()
- << "Test acquisition of distLock for " << kNss.ns());
- ASSERT_DOES_NOT_THROW(DDLLockManager::get(opCtx)->lock(
- opCtx, kNss.ns(), whyMessage, DDLLockManager::kSingleLockAttemptTimeout));
- }
}
TEST_F(BalancerCommandsSchedulerTest, CommandCanceledIfUnsubmittedBeforeBalancerStops) {
@@ -396,14 +363,6 @@ TEST_F(BalancerCommandsSchedulerTest, CommandCanceledIfUnsubmittedBeforeBalancer
ASSERT_EQUALS(futureResponse.getNoThrow(),
Status(ErrorCodes::BalancerInterrupted,
"Request cancelled - balancer scheduler is stopping"));
- // Ensure DistLock is released correctly
- {
- auto opCtx = Client::getCurrent()->getOperationContext();
- const std::string whyMessage(str::stream()
- << "Test acquisition of distLock for " << kNss.ns());
- ASSERT_DOES_NOT_THROW(DDLLockManager::get(opCtx)->lock(
- opCtx, kNss.ns(), whyMessage, DDLLockManager::kSingleLockAttemptTimeout));
- }
}
TEST_F(BalancerCommandsSchedulerTest, MoveChunkCommandGetsPersistedOnDiskWhenRequestIsSubmitted) {
@@ -428,7 +387,6 @@ TEST_F(BalancerCommandsSchedulerTest, MoveChunkCommandGetsPersistedOnDiskWhenReq
MoveChunkCommandInfo::recoverFrom(swPersistedCommand.getValue(), defaultValues);
ASSERT_EQ(kNss, recoveredCommand->getNameSpace());
ASSERT_EQ(migrateInfo.from, recoveredCommand->getTarget());
- ASSERT_TRUE(recoveredCommand->requiresDistributedLock());
MoveChunkCommandInfo originalCommandInfo(migrateInfo.nss,
migrateInfo.from,
@@ -493,31 +451,5 @@ TEST_F(BalancerCommandsSchedulerTest, PersistedCommandsAreReissuedWhenRecovering
ASSERT_EQUALS(0, persistedCommandDocs.size());
}
-TEST_F(BalancerCommandsSchedulerTest, DistLockPreventsMoveChunkWithConcurrentDDL) {
- OperationContext* opCtx;
- FailPoint* failpoint = globalFailPointRegistry().find("pauseSubmissionsFailPoint");
- failpoint->setMode(FailPoint::Mode::alwaysOn);
- {
- auto remoteResponsesFuture = setRemoteResponses();
- _scheduler.start(operationContext(), getMigrationRecoveryDefaultValues());
- opCtx = Client::getCurrent()->getOperationContext();
- const std::string whyMessage(str::stream()
- << "Test acquisition of distLock for " << kNss.ns());
- auto scopedDDLLock = DDLLockManager::get(opCtx)->lock(
- opCtx, kNss.ns(), whyMessage, DDLLockManager::kSingleLockAttemptTimeout);
- failpoint->setMode(FailPoint::Mode::off);
- MigrateInfo migrateInfo = makeMigrationInfo(0, kShardId1, kShardId0);
- auto futureResponse = _scheduler.requestMoveChunk(operationContext(),
- migrateInfo,
- getMoveChunkSettings(),
- false /* issuedByRemoteUser */);
- remoteResponsesFuture.default_timed_get();
- ASSERT_EQ(
- futureResponse.getNoThrow(),
- Status(ErrorCodes::LockBusy, "Failed to acquire dist lock testDb.testColl locally"));
- }
- _scheduler.stop();
-}
-
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/s/balancer/balancer_dist_locks.cpp b/src/mongo/db/s/balancer/balancer_dist_locks.cpp
deleted file mode 100644
index 58526875649..00000000000
--- a/src/mongo/db/s/balancer/balancer_dist_locks.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/db/s/balancer/balancer_dist_locks.h"
-#include "mongo/db/operation_context.h"
-
-namespace mongo {
-
-BalancerDistLocks::~BalancerDistLocks() {
- invariant(_distLocksByCollection.empty(),
- "Attempting to destroy the keychain while still holding distributed locks");
-}
-
-Status BalancerDistLocks::acquireFor(OperationContext* opCtx, const NamespaceString& nss) {
- auto it = _distLocksByCollection.find(nss);
- if (it != _distLocksByCollection.end()) {
- ++it->second.references;
- return Status::OK();
- } else {
- boost::optional<DDLLockManager::ScopedLock> scopedLock;
- try {
- scopedLock.emplace(
- DDLLockManager::get(opCtx)->lock(opCtx,
- nss.ns(),
- "moveRange" /* reason */,
- DDLLockManager::kSingleLockAttemptTimeout));
- } catch (const DBException& ex) {
- return ex.toStatus(str::stream() << "Could not acquire collection lock for " << nss.ns()
- << " to migrate chunks");
- }
- ReferenceCountedLock refCountedLock(std::move(*scopedLock));
- _distLocksByCollection.insert(std::make_pair(nss, std::move(refCountedLock)));
- }
- return Status::OK();
-}
-
-void BalancerDistLocks::releaseFor(OperationContext* opCtx, const NamespaceString& nss) {
- auto it = _distLocksByCollection.find(nss);
- if (it == _distLocksByCollection.end()) {
- return;
- } else if (it->second.references == 1) {
- _distLocksByCollection.erase(it);
- } else {
- --it->second.references;
- }
-}
-
-
-} // namespace mongo
diff --git a/src/mongo/db/s/balancer/balancer_dist_locks.h b/src/mongo/db/s/balancer/balancer_dist_locks.h
deleted file mode 100644
index 9b64f15d247..00000000000
--- a/src/mongo/db/s/balancer/balancer_dist_locks.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/s/ddl_lock_manager.h"
-
-namespace mongo {
-
-/**
- * Utility class for centralising the control of any distributed lock required by a client.
- * Its usage is not thread-safe.
- */
-class BalancerDistLocks {
-
-public:
- BalancerDistLocks() = default;
-
- ~BalancerDistLocks();
-
- Status acquireFor(OperationContext* opCtx, const NamespaceString& nss);
-
- void releaseFor(OperationContext* opCtx, const NamespaceString& nss);
-
-private:
- struct ReferenceCountedLock {
- ReferenceCountedLock(DDLLockManager::ScopedLock&& lock)
- : lock(std::move(lock)), references(1) {}
-
- DDLLockManager::ScopedLock lock;
- int references;
- };
-
- stdx::unordered_map<NamespaceString, ReferenceCountedLock> _distLocksByCollection;
-
- BalancerDistLocks(const BalancerDistLocks&) = delete;
- BalancerDistLocks& operator=(const BalancerDistLocks&) = delete;
-};
-
-} // namespace mongo