summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordi Serra Torrens <jordi.serra-torrens@mongodb.com>2022-01-07 08:35:06 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-01-07 09:03:20 +0000
commit86773e0404ac646ad94e510852c0274bb9257b42 (patch)
treecf75b947f96d4df6af6aa7b47e7f8f88d6cf9e02
parent774fe7d3a26749f9b71d17c8a5952ab412f3b2b0 (diff)
downloadmongo-86773e0404ac646ad94e510852c0274bb9257b42.tar.gz
SERVER-62064 Serialize resharding with other DDL operations on stepup
-rw-r--r--jstests/sharding/resharding_disallow_drop.js52
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/reshard_collection_coordinator.cpp146
-rw-r--r--src/mongo/db/s/reshard_collection_coordinator.h66
-rw-r--r--src/mongo/db/s/reshard_collection_coordinator_document.idl61
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.cpp18
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.h13
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.idl2
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator_service.cpp8
-rw-r--r--src/mongo/db/s/shardsvr_reshard_collection_command.cpp37
-rw-r--r--src/mongo/s/commands/cluster_reshard_collection_cmd.cpp19
-rw-r--r--src/mongo/s/request_types/sharded_ddl_commands.idl60
-rw-r--r--src/mongo/s/resharding/resharding_feature_flag.idl7
13 files changed, 339 insertions, 151 deletions
diff --git a/jstests/sharding/resharding_disallow_drop.js b/jstests/sharding/resharding_disallow_drop.js
index 37dbca81c6c..144abd55b16 100644
--- a/jstests/sharding/resharding_disallow_drop.js
+++ b/jstests/sharding/resharding_disallow_drop.js
@@ -1,26 +1,17 @@
/**
* Tests that a drop can't happen while resharding is in progress.
- *
* @tags: [
+ * requires_fcv_53,
+ * featureFlagRecoverableShardsvrReshardCollectionCoordinator,
* ]
*/
(function() {
"use strict";
load("jstests/libs/fail_point_util.js");
-load('jstests/libs/parallel_shell_helpers.js');
-
-// Ensures that resharding has acquired the db and collection distLocks. The fact that the entry in
-// config.reshardingOperations exists guarantees that the distLocks have already been acquired.
-function awaitReshardingStarted() {
- assert.soon(() => {
- const coordinatorDoc = st.s.getCollection("config.reshardingOperations").findOne({ns: ns});
- return coordinatorDoc !== null;
- }, "resharding didn't start");
-}
var st = new ShardingTest({
- shards: 1,
+ shards: {rs0: {nodes: 2}},
config: 1,
mongos: 1,
other: {
@@ -36,21 +27,36 @@ const db = st.s.getDB(dbName);
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
-const pauseCoordinatorBeforeDecisionPersistedFailpoint = configureFailPoint(
- st.configRS.getPrimary(), "reshardingPauseCoordinatorBeforeDecisionPersisted");
+const reshardingPauseBeforeInsertCoordinatorDocFailpoint =
+ configureFailPoint(st.configRS.getPrimary(), "pauseBeforeInsertCoordinatorDoc");
+
+assert.commandFailedWithCode(
+ db.adminCommand({reshardCollection: ns, key: {newKey: 1}, maxTimeMS: 1000}),
+ ErrorCodes.MaxTimeMSExpired);
+
+// Wait for resharding to start running on the configsvr
+reshardingPauseBeforeInsertCoordinatorDocFailpoint.wait();
+
+// Drop cannot progress while resharding is in progress
+assert.commandFailedWithCode(db.runCommand({drop: collName, maxTimeMS: 5000}),
+ ErrorCodes.MaxTimeMSExpired);
-const awaitReshardResult = startParallelShell(
- funWithArgs(function(ns) {
- assert.commandWorked(db.adminCommand({reshardCollection: ns, key: {newKey: 1}}));
- }, ns), st.s.port);
+// Stepdown the DB primary shard
+const shard0Primary = st.rs0.getPrimary();
+assert.commandWorked(
+ shard0Primary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
+st.rs0.awaitNodesAgreeOnPrimary();
-awaitReshardingStarted();
+// Even after stepdown, drop cannot progress due to the in-progress resharding
+assert.commandFailedWithCode(db.runCommand({drop: collName, maxTimeMS: 5000}),
+ ErrorCodes.MaxTimeMSExpired);
-let res = db.runCommand({drop: collName, maxTimeMS: 5000});
-assert(ErrorCodes.isExceededTimeLimitError(res.code));
+// Finish resharding
+reshardingPauseBeforeInsertCoordinatorDocFailpoint.off();
+assert.commandWorked(db.adminCommand({reshardCollection: ns, key: {newKey: 1}}));
-pauseCoordinatorBeforeDecisionPersistedFailpoint.off();
-awaitReshardResult();
+// Now the drop can complete
+assert.commandWorked(db.runCommand({drop: collName}));
st.stop();
})();
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 22372d7aed1..b97fc59fded 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -338,6 +338,7 @@ env.Library(
'refine_collection_shard_key_coordinator.cpp',
'refine_collection_shard_key_coordinator_document.idl',
'reshard_collection_coordinator.cpp',
+ 'reshard_collection_coordinator_document.idl',
'resharding_test_commands.cpp',
'resharding_test_commands.idl',
'set_allow_migrations_coordinator.cpp',
diff --git a/src/mongo/db/s/reshard_collection_coordinator.cpp b/src/mongo/db/s/reshard_collection_coordinator.cpp
index 76d75706614..9a9b8af12ba 100644
--- a/src/mongo/db/s/reshard_collection_coordinator.cpp
+++ b/src/mongo/db/s/reshard_collection_coordinator.cpp
@@ -39,51 +39,115 @@
namespace mongo {
-ReshardCollectionCoordinator::ReshardCollectionCoordinator(
- OperationContext* opCtx, const ShardsvrReshardCollection& reshardCollectionParams)
- : ShardingDDLCoordinator_NORESILIENT(opCtx, reshardCollectionParams.getCommandParameter()),
- _serviceContext(opCtx->getServiceContext()),
- _requestObj(reshardCollectionParams.serialize({})),
- _request(ShardsvrReshardCollection::parse(IDLParserErrorContext("_shardsvrReshardCollection"),
- _requestObj)),
- _nss(_request.getCommandParameter()) {}
-
-SemiFuture<void> ReshardCollectionCoordinator::runImpl(
- std::shared_ptr<executor::TaskExecutor> executor) {
- return ExecutorFuture<void>(executor, Status::OK())
- .then([this, anchor = shared_from_this()]() {
- ThreadClient tc("ReshardCollectionCoordinator", _serviceContext);
- auto opCtxHolder = tc->makeOperationContext();
- auto* opCtx = opCtxHolder.get();
- _forwardableOpMetadata.setOn(opCtx);
-
- ConfigsvrReshardCollection configsvrReshardCollection(_nss, _request.getKey());
- configsvrReshardCollection.setDbName(_request.getDbName());
- configsvrReshardCollection.setUnique(_request.getUnique());
- configsvrReshardCollection.setCollation(_request.getCollation());
- configsvrReshardCollection.set_presetReshardedChunks(
- _request.get_presetReshardedChunks());
- configsvrReshardCollection.setZones(_request.getZones());
- configsvrReshardCollection.setNumInitialChunks(_request.getNumInitialChunks());
-
- auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
- const auto cmdResponse = uassertStatusOK(configShard->runCommandWithFixedRetryAttempts(
- opCtx,
- ReadPreferenceSetting(ReadPreference::PrimaryOnly),
- "admin",
- CommandHelpers::appendMajorityWriteConcern(configsvrReshardCollection.toBSON({}),
- opCtx->getWriteConcern()),
- Shard::RetryPolicy::kIdempotent));
- uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(std::move(cmdResponse)));
- })
+ReshardCollectionCoordinator::ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
+ const BSONObj& initialState)
+ : ReshardCollectionCoordinator(service, initialState, true /* persistCoordinatorDocument */) {}
+
+ReshardCollectionCoordinator::ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
+ const BSONObj& initialState,
+ bool persistCoordinatorDocument)
+ : ShardingDDLCoordinator(service, initialState),
+ _initialState(initialState.getOwned()),
+ _doc(ReshardCollectionCoordinatorDocument::parse(
+ IDLParserErrorContext("ReshardCollectionCoordinatorDocument"), _initialState)),
+ _persistCoordinatorDocument(persistCoordinatorDocument) {}
+
+void ReshardCollectionCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
+ const auto otherDoc = ReshardCollectionCoordinatorDocument::parse(
+ IDLParserErrorContext("ReshardCollectionCoordinatorDocument"), doc);
+
+ uassert(ErrorCodes::ConflictingOperationInProgress,
+ "Another reshard collection with different arguments is already running for the same "
+ "namespace",
+ SimpleBSONObjComparator::kInstance.evaluate(
+ _doc.getReshardCollectionRequest().toBSON() ==
+ otherDoc.getReshardCollectionRequest().toBSON()));
+}
+
+boost::optional<BSONObj> ReshardCollectionCoordinator::reportForCurrentOp(
+ MongoProcessInterface::CurrentOpConnectionsMode connMode,
+ MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
+ BSONObjBuilder cmdBob;
+ if (const auto& optComment = getForwardableOpMetadata().getComment()) {
+ cmdBob.append(optComment.get().firstElement());
+ }
+ cmdBob.appendElements(_doc.getReshardCollectionRequest().toBSON());
+
+ BSONObjBuilder bob;
+ bob.append("type", "op");
+ bob.append("desc", "ReshardCollectionCoordinator");
+ bob.append("op", "command");
+ bob.append("ns", nss().toString());
+ bob.append("command", cmdBob.obj());
+ bob.append("active", true);
+ return bob.obj();
+}
+
+void ReshardCollectionCoordinator::_enterPhase(Phase newPhase) {
+ if (!_persistCoordinatorDocument) {
+ return;
+ }
+
+ StateDoc newDoc(_doc);
+ newDoc.setPhase(newPhase);
+
+ LOGV2_DEBUG(6206400,
+ 2,
+ "Reshard collection coordinator phase transition",
+ "namespace"_attr = nss(),
+ "newPhase"_attr = ReshardCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
+ "oldPhase"_attr = ReshardCollectionCoordinatorPhase_serializer(_doc.getPhase()));
+
+ if (_doc.getPhase() == Phase::kUnset) {
+ _doc = _insertStateDocument(std::move(newDoc));
+ return;
+ }
+ _doc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
+}
+
+ExecutorFuture<void> ReshardCollectionCoordinator::_runImpl(
+ std::shared_ptr<executor::ScopedTaskExecutor> executor,
+ const CancellationToken& token) noexcept {
+ return ExecutorFuture<void>(**executor)
+ .then(_executePhase(
+ Phase::kReshard,
+ [this, anchor = shared_from_this()] {
+ auto opCtxHolder = cc().makeOperationContext();
+ auto* opCtx = opCtxHolder.get();
+ getForwardableOpMetadata().setOn(opCtx);
+
+ ConfigsvrReshardCollection configsvrReshardCollection(nss(), _doc.getKey());
+ configsvrReshardCollection.setDbName(nss().db());
+ configsvrReshardCollection.setUnique(_doc.getUnique());
+ configsvrReshardCollection.setCollation(_doc.getCollation());
+ configsvrReshardCollection.set_presetReshardedChunks(
+ _doc.get_presetReshardedChunks());
+ configsvrReshardCollection.setZones(_doc.getZones());
+ configsvrReshardCollection.setNumInitialChunks(_doc.getNumInitialChunks());
+
+ const auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+
+ const auto cmdResponse =
+ uassertStatusOK(configShard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting(ReadPreference::PrimaryOnly),
+ NamespaceString::kAdminDb.toString(),
+ CommandHelpers::appendMajorityWriteConcern(
+ configsvrReshardCollection.toBSON({}), opCtx->getWriteConcern()),
+ Shard::RetryPolicy::kIdempotent));
+ uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(std::move(cmdResponse)));
+ }))
.onError([this, anchor = shared_from_this()](const Status& status) {
- LOGV2_ERROR(5276001,
+ LOGV2_ERROR(6206401,
"Error running reshard collection",
- "namespace"_attr = _nss,
+ "namespace"_attr = nss(),
"error"_attr = redact(status));
return status;
- })
- .semi();
+ });
}
+ReshardCollectionCoordinator_NORESILIENT::ReshardCollectionCoordinator_NORESILIENT(
+ ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+ : ReshardCollectionCoordinator(service, initialState, false /* persistCoordinatorDocument */) {}
+
} // namespace mongo
diff --git a/src/mongo/db/s/reshard_collection_coordinator.h b/src/mongo/db/s/reshard_collection_coordinator.h
index 56b6e5d0430..bd4639f056f 100644
--- a/src/mongo/db/s/reshard_collection_coordinator.h
+++ b/src/mongo/db/s/reshard_collection_coordinator.h
@@ -29,25 +29,67 @@
#pragma once
+#include "mongo/db/s/reshard_collection_coordinator_document_gen.h"
#include "mongo/db/s/sharding_ddl_coordinator.h"
-#include "mongo/s/request_types/sharded_ddl_commands_gen.h"
#include "mongo/util/future.h"
namespace mongo {
-class ReshardCollectionCoordinator final
- : public ShardingDDLCoordinator_NORESILIENT,
- public std::enable_shared_from_this<ReshardCollectionCoordinator> {
+class ReshardCollectionCoordinator : public ShardingDDLCoordinator {
public:
- ReshardCollectionCoordinator(OperationContext* opCtx,
- const ShardsvrReshardCollection& reshardCollectionParams);
+ using StateDoc = ReshardCollectionCoordinatorDocument;
+ using Phase = ReshardCollectionCoordinatorPhaseEnum;
+
+ ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
+ const BSONObj& initialState);
+
+ void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
+
+ boost::optional<BSONObj> reportForCurrentOp(
+ MongoProcessInterface::CurrentOpConnectionsMode connMode,
+ MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+
+protected:
+ ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
+ const BSONObj& initialState,
+ bool persistCoordinatorDocument);
private:
- SemiFuture<void> runImpl(std::shared_ptr<executor::TaskExecutor> executor) override;
+ ShardingDDLCoordinatorMetadata const& metadata() const override {
+ return _doc.getShardingDDLCoordinatorMetadata();
+ }
+
+ ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
+ const CancellationToken& token) noexcept override;
+
+ template <typename Func>
+ auto _executePhase(const Phase& newPhase, Func&& func) {
+ return [=] {
+ const auto& currPhase = _doc.getPhase();
- ServiceContext* _serviceContext;
- const OpMsgRequest _requestObj; // Owned object to guarantee the lifetime of the objects
- // referenced by the parsed request '_request'
- const ShardsvrReshardCollection _request;
- const NamespaceString& _nss;
+ if (currPhase > newPhase) {
+ // Do not execute this phase if we already reached a subsequent one.
+ return;
+ }
+ if (currPhase < newPhase) {
+ // Persist the new phase if this is the first time we are executing it.
+ _enterPhase(newPhase);
+ }
+ return func();
+ };
+ }
+
+ void _enterPhase(Phase newPhase);
+
+ const BSONObj _initialState;
+ ReshardCollectionCoordinatorDocument _doc;
+ const bool _persistCoordinatorDocument; // TODO: SERVER-62338 remove this then 6.0 branches out
+};
+
+// TODO: SERVER-62338 remove this then 6.0 branches out
+class ReshardCollectionCoordinator_NORESILIENT : public ReshardCollectionCoordinator {
+public:
+ ReshardCollectionCoordinator_NORESILIENT(ShardingDDLCoordinatorService* service,
+ const BSONObj& initialState);
};
+
} // namespace mongo
diff --git a/src/mongo/db/s/reshard_collection_coordinator_document.idl b/src/mongo/db/s/reshard_collection_coordinator_document.idl
new file mode 100644
index 00000000000..2d3032957b6
--- /dev/null
+++ b/src/mongo/db/s/reshard_collection_coordinator_document.idl
@@ -0,0 +1,61 @@
+# Copyright (C) 2021-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+
+# This file defines the format of documents stored in config.system.sharding_ddl_coordinators, used
+# by the shard coordinator to guarantee resilience in the event of stepdowns while resharding
+# collections.
+
+global:
+ cpp_namespace: "mongo"
+
+imports:
+ - "mongo/idl/basic_types.idl"
+ - "mongo/db/s/sharding_ddl_coordinator.idl"
+ - "mongo/s/request_types/sharded_ddl_commands.idl"
+
+enums:
+ ReshardCollectionCoordinatorPhase:
+ description: "Current reshard collection coordinator's operation state."
+ type: string
+ values:
+ kUnset: "unset"
+ kReshard: "reshard"
+
+structs:
+ ReshardCollectionCoordinatorDocument:
+ description: "Represents a reshard collection operation on the coordinator shard."
+ generate_comparison_operators: false
+ strict: false
+ chained_structs:
+ ShardingDDLCoordinatorMetadata: ShardingDDLCoordinatorMetadata
+ ReshardCollectionRequest: ReshardCollectionRequest
+ fields:
+ phase:
+ type: ReshardCollectionCoordinatorPhase
+ description: "Coordinator phase."
+ default: kUnset
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp
index afa7a2dfcca..d4ac81b9b24 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp
@@ -326,22 +326,4 @@ void ShardingDDLCoordinator::_performNoopRetryableWriteOnAllShardsAndConfigsvr(
sharding_ddl_util::performNoopRetryableWriteOnShards(opCtx, shardsAndConfigsvr, osi, executor);
}
-ShardingDDLCoordinator_NORESILIENT::ShardingDDLCoordinator_NORESILIENT(OperationContext* opCtx,
- const NamespaceString& ns)
- : _nss(ns), _forwardableOpMetadata(opCtx) {}
-
-SemiFuture<void> ShardingDDLCoordinator_NORESILIENT::run(OperationContext* opCtx) {
- if (!_nss.isConfigDB()) {
- // Check that the operation context has a database version for this namespace
- const auto clientDbVersion = OperationShardingState::get(opCtx).getDbVersion(_nss.db());
- uassert(ErrorCodes::IllegalOperation,
- str::stream() << "Request sent without attaching database version",
- clientDbVersion);
-
- // Checks that this is the primary shard for the namespace's db
- DatabaseShardingState::checkIsPrimaryShardForDb(opCtx, _nss.db());
- }
- return runImpl(Grid::get(opCtx)->getExecutorPool()->getFixedExecutor());
-}
-
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h
index 55e3cb9c559..a68d0aad35b 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.h
+++ b/src/mongo/db/s/sharding_ddl_coordinator.h
@@ -207,17 +207,4 @@ private:
std::stack<DistLockManager::ScopedLock> _scopedLocks;
};
-class ShardingDDLCoordinator_NORESILIENT {
-public:
- ShardingDDLCoordinator_NORESILIENT(OperationContext* opCtx, const NamespaceString& nss);
- SemiFuture<void> run(OperationContext* opCtx);
-
-protected:
- NamespaceString _nss;
- ForwardableOperationMetadata _forwardableOpMetadata;
-
-private:
- virtual SemiFuture<void> runImpl(std::shared_ptr<executor::TaskExecutor>) = 0;
-};
-
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.idl b/src/mongo/db/s/sharding_ddl_coordinator.idl
index 2b43a7b48f4..1a3e5610312 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.idl
+++ b/src/mongo/db/s/sharding_ddl_coordinator.idl
@@ -51,6 +51,8 @@ enums:
kRefineCollectionShardKey: "refineCollectionShardKey"
kSetAllowMigrations: "setAllowMigrations"
kCollMod: "collMod"
+ kReshardCollection: "reshardCollection"
+ kReshardCollectionNoResilient: "reshardCollectionNoResilient"
types:
ForwardableOperationMetadata:
diff --git a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
index 1ec794cc435..ba59bc0f971 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
@@ -49,6 +49,7 @@
#include "mongo/db/s/move_primary_coordinator.h"
#include "mongo/db/s/refine_collection_shard_key_coordinator.h"
#include "mongo/db/s/rename_collection_coordinator.h"
+#include "mongo/db/s/reshard_collection_coordinator.h"
#include "mongo/db/s/set_allow_migrations_coordinator.h"
namespace mongo {
@@ -85,6 +86,13 @@ std::shared_ptr<ShardingDDLCoordinator> constructShardingDDLCoordinatorInstance(
case DDLCoordinatorTypeEnum::kCollMod:
return std::make_shared<CollModCoordinator>(service, std::move(initialState));
break;
+ case DDLCoordinatorTypeEnum::kReshardCollection:
+ return std::make_shared<ReshardCollectionCoordinator>(service, std::move(initialState));
+ break;
+ case DDLCoordinatorTypeEnum::kReshardCollectionNoResilient:
+ return std::make_shared<ReshardCollectionCoordinator_NORESILIENT>(
+ service, std::move(initialState));
+ break;
default:
uasserted(ErrorCodes::BadValue,
str::stream()
diff --git a/src/mongo/db/s/shardsvr_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_reshard_collection_command.cpp
index 1427d27dcfd..46d068c12af 100644
--- a/src/mongo/db/s/shardsvr_reshard_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_reshard_collection_command.cpp
@@ -33,10 +33,11 @@
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/commands.h"
-#include "mongo/db/s/dist_lock_manager.h"
+#include "mongo/db/commands/feature_compatibility_version.h"
#include "mongo/db/s/reshard_collection_coordinator.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/s/request_types/sharded_ddl_commands_gen.h"
+#include "mongo/s/resharding/resharding_feature_flag_gen.h"
namespace mongo {
namespace {
@@ -82,16 +83,30 @@ public:
"Resharding is not supported for this version, please update the FCV to latest.",
!serverGlobalParams.featureCompatibility.isUpgradingOrDowngrading());
- DistLockManager::ScopedDistLock dbDistLock(
- uassertStatusOK(DistLockManager::get(opCtx)->lock(
- opCtx, ns().db(), "reshardCollection", DistLockManager::kDefaultLockTimeout)));
- DistLockManager::ScopedDistLock collDistLock(
- uassertStatusOK(DistLockManager::get(opCtx)->lock(
- opCtx, ns().ns(), "reshardCollection", DistLockManager::kDefaultLockTimeout)));
-
- auto reshardCollectionCoordinator =
- std::make_shared<ReshardCollectionCoordinator>(opCtx, request());
- reshardCollectionCoordinator->run(opCtx).get(opCtx);
+ const auto reshardCollectionCoordinatorCompletionFuture =
+ [&]() -> SharedSemiFuture<void> {
+ FixedFCVRegion fixedFcvRegion(opCtx);
+ const auto coordinatorType =
+ resharding::gFeatureFlagRecoverableShardsvrReshardCollectionCoordinator
+ .isEnabled(serverGlobalParams.featureCompatibility)
+ ? DDLCoordinatorTypeEnum::kReshardCollection
+ : DDLCoordinatorTypeEnum::kReshardCollectionNoResilient;
+
+ ReshardCollectionRequest reshardCollectionRequest =
+ request().getReshardCollectionRequest();
+
+ auto coordinatorDoc = ReshardCollectionCoordinatorDocument();
+ coordinatorDoc.setReshardCollectionRequest(std::move(reshardCollectionRequest));
+ coordinatorDoc.setShardingDDLCoordinatorMetadata({{ns(), coordinatorType}});
+
+ auto service = ShardingDDLCoordinatorService::getService(opCtx);
+ auto reshardCollectionCoordinator =
+ checked_pointer_cast<ReshardCollectionCoordinator>(
+ service->getOrCreateInstance(opCtx, coordinatorDoc.toBSON()));
+ return reshardCollectionCoordinator->getCompletionFuture();
+ }();
+
+ reshardCollectionCoordinatorCompletionFuture.get(opCtx);
}
private:
diff --git a/src/mongo/s/commands/cluster_reshard_collection_cmd.cpp b/src/mongo/s/commands/cluster_reshard_collection_cmd.cpp
index 17c0e36eec1..91a20f8fd70 100644
--- a/src/mongo/s/commands/cluster_reshard_collection_cmd.cpp
+++ b/src/mongo/s/commands/cluster_reshard_collection_cmd.cpp
@@ -55,14 +55,21 @@ public:
void typedRun(OperationContext* opCtx) {
const auto& nss = ns();
- ShardsvrReshardCollection shardsvrReshardCollection(nss, request().getKey());
+
+ ShardsvrReshardCollection shardsvrReshardCollection(nss);
shardsvrReshardCollection.setDbName(request().getDbName());
- shardsvrReshardCollection.setUnique(request().getUnique());
- shardsvrReshardCollection.setCollation(request().getCollation());
- shardsvrReshardCollection.set_presetReshardedChunks(
+
+ ReshardCollectionRequest reshardCollectionRequest;
+ reshardCollectionRequest.setKey(request().getKey());
+ reshardCollectionRequest.setUnique(request().getUnique());
+ reshardCollectionRequest.setCollation(request().getCollation());
+ reshardCollectionRequest.set_presetReshardedChunks(
request().get_presetReshardedChunks());
- shardsvrReshardCollection.setZones(request().getZones());
- shardsvrReshardCollection.setNumInitialChunks(request().getNumInitialChunks());
+ reshardCollectionRequest.setZones(request().getZones());
+ reshardCollectionRequest.setNumInitialChunks(request().getNumInitialChunks());
+
+ shardsvrReshardCollection.setReshardCollectionRequest(
+ std::move(reshardCollectionRequest));
auto catalogCache = Grid::get(opCtx)->catalogCache();
const auto dbInfo = uassertStatusOK(catalogCache->getDatabase(opCtx, nss.db()));
diff --git a/src/mongo/s/request_types/sharded_ddl_commands.idl b/src/mongo/s/request_types/sharded_ddl_commands.idl
index b0151f36182..564a5849800 100644
--- a/src/mongo/s/request_types/sharded_ddl_commands.idl
+++ b/src/mongo/s/request_types/sharded_ddl_commands.idl
@@ -150,6 +150,37 @@ structs:
description: "The index specification document to use as the new shard key."
optional: false
+ ReshardCollectionRequest:
+ description: "Parameters for the reshard collection command"
+ strict: false
+ fields:
+ key:
+ type: object
+ description: "The index specification document to use as the new shard key."
+ optional: false
+ unique:
+ type: bool
+ description: "Whether the shard key index should enforce a unique constraint."
+ optional: true
+ numInitialChunks:
+ type: safeInt64
+ description: "The number of chunks to create initially."
+ optional: true
+ collation:
+ type: object
+ description: "The collation to use for the shard key index."
+ optional: true
+ zones:
+ type: array<ReshardingZoneType>
+ description: "The zones associated with the new shard key."
+ optional: true
+ _presetReshardedChunks:
+ type: array<ReshardedChunk>
+ description: >-
+ "Mapping of chunk ranges to be used as the initial split output. This is only
+ for testing purposes."
+ optional: true
+
SetAllowMigrationsRequest:
description: "Parameters sent for the set allow migrations command"
strict: false
@@ -268,33 +299,8 @@ commands:
namespace: type
api_version: ""
type: namespacestring
- fields:
- key:
- type: object
- description: "The index specification document to use as the new shard key."
- optional: false
- unique:
- type: bool
- description: "Whether the shard key index should enforce a unique constraint."
- optional: true
- numInitialChunks:
- type: safeInt64
- description: "The number of chunks to create initially."
- optional: true
- collation:
- type: object
- description: "The collation to use for the shard key index."
- optional: true
- zones:
- type: array<ReshardingZoneType>
- description: "The zones associated with the new shard key."
- optional: true
- _presetReshardedChunks:
- type: array<ReshardedChunk>
- description: >-
- "Mapping of chunk ranges to be used as the initial split output. This is only for
- testing purposes."
- optional: true
+ chained_structs:
+ ReshardCollectionRequest: ReshardCollectionRequest
_shardsvrRefineCollectionShardKey:
description: "Parser for the _shardsvrRefineCollectionShardKey command"
diff --git a/src/mongo/s/resharding/resharding_feature_flag.idl b/src/mongo/s/resharding/resharding_feature_flag.idl
index e83de02e286..4179b4290f8 100644
--- a/src/mongo/s/resharding/resharding_feature_flag.idl
+++ b/src/mongo/s/resharding/resharding_feature_flag.idl
@@ -40,3 +40,10 @@ feature_flags:
cpp_varname: gFeatureFlagResharding
default: true
version: 5.0
+
+ featureFlagRecoverableShardsvrReshardCollectionCoordinator:
+ description: When enabled, the db primary shard will use a recoverable
+ ShardingDDLCoordinator that ensures proper serialization of resharding and other DDL
+ operations even in the event of primary shard stepdowns.
+ cpp_varname: gFeatureFlagRecoverableShardsvrReshardCollectionCoordinator
+ default: false