summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTommaso Tocci <tommaso.tocci@mongodb.com>2021-02-19 12:19:12 +0100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-22 14:23:16 +0000
commit7951ef4933ba29630e5407caf56e07569ab9f4ea (patch)
tree30cff5e489d921da2de973b5abb707e14f4a8af2
parent6fcbf78f7d94e6d7ace7deea808464f1e0bd7777 (diff)
downloadmongo-7951ef4933ba29630e5407caf56e07569ab9f4ea.tar.gz
SERVER-54945 Make drop database resilient to stepdowns on sharded cluster
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml1
-rw-r--r--jstests/concurrency/fsm_workloads/drop_database.js5
-rw-r--r--jstests/sharding/drop_database.js8
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/drop_collection_coordinator.cpp4
-rw-r--r--src/mongo/db/s/drop_database_coordinator.cpp302
-rw-r--r--src/mongo/db/s/drop_database_coordinator.h44
-rw-r--r--src/mongo/db/s/drop_database_coordinator_document.idl71
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.h1
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.idl1
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator_service.cpp7
-rw-r--r--src/mongo/db/s/shardsvr_drop_database_command.cpp23
26 files changed, 344 insertions, 138 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml
index e3e2ec62bf2..e99f29d927a 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml
@@ -11,7 +11,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml
index c37b9794b11..c7c14704f94 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml
@@ -11,7 +11,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# SERVER-14669 Multi-removes that use $where miscount removed documents
- jstests/concurrency/fsm_workloads/remove_where.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml
index 7f19a47b67d..069c00213b0 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_kill_primary_with_balancer.yml
@@ -11,7 +11,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# SERVER-14669 Multi-removes that use $where miscount removed documents
- jstests/concurrency/fsm_workloads/remove_where.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml
index ec7a573cb90..5a3aac40d27 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn.yml
@@ -15,7 +15,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml
index 45ced264b6d..87d4ad67b37 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_local_read_write_multi_stmt_txn_with_balancer.yml
@@ -15,7 +15,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml
index 032b9d8960b..8ef1b8ac9a0 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn.yml
@@ -15,7 +15,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml
index d57b46cfb98..13126caf23a 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_kill_primary.yml
@@ -20,7 +20,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml
index f1aa215809e..6efcc004a67 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_terminate_primary.yml
@@ -20,7 +20,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml
index 517f1a46abf..fb435998e45 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_balancer.yml
@@ -15,7 +15,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml
index daf4e2cca3b..f64bdcaa343 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_multi_stmt_txn_with_stepdowns.yml
@@ -20,7 +20,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
# collections'. This bug is problematic for these workloads because they assert on count()
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml
index 20c1b011136..6712c7f100c 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml
@@ -12,7 +12,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
- jstests/concurrency/fsm_workloads/map_reduce_drop.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml
index 2d4e21c8de3..dd207dbb4b5 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml
@@ -11,7 +11,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
- jstests/concurrency/fsm_workloads/map_reduce_drop.js
# SERVER-14669 Multi-removes that use $where miscount removed documents
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml
index abb020bb0af..a400794ff09 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_terminate_primary_with_balancer.yml
@@ -11,7 +11,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
# SERVER-14669 Multi-removes that use $where miscount removed documents
- jstests/concurrency/fsm_workloads/remove_where.js
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
index baa19a7c91c..238b9a3b51f 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
@@ -11,7 +11,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
- jstests/concurrency/fsm_workloads/map_reduce_drop.js
# Disabled due to SERVER-33753, '.count() without a predicate can be wrong on sharded
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
index 2649aedbbe7..e6669f6e3b5 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
@@ -11,7 +11,6 @@ selector:
# SERVER-17397 Drops of sharded namespaces may not fully succeed
- jstests/concurrency/fsm_workloads/create_database.js
- - jstests/concurrency/fsm_workloads/drop_database.js
- jstests/concurrency/fsm_workloads/map_reduce_drop.js
# SERVER-14669 Multi-removes that use $where miscount removed documents
diff --git a/jstests/concurrency/fsm_workloads/drop_database.js b/jstests/concurrency/fsm_workloads/drop_database.js
index 247496e05df..3f750864908 100644
--- a/jstests/concurrency/fsm_workloads/drop_database.js
+++ b/jstests/concurrency/fsm_workloads/drop_database.js
@@ -4,6 +4,11 @@
* drop_database.js
*
* Repeatedly creates and drops a database.
+ *
+ * @tags: [
+ * # SERVER-54587 create collection does not support stepdowns
+ * does_not_support_stepdowns,
+ * ]
*/
var $config = (function() {
var states = {
diff --git a/jstests/sharding/drop_database.js b/jstests/sharding/drop_database.js
index 15c85098d22..2c693b98570 100644
--- a/jstests/sharding/drop_database.js
+++ b/jstests/sharding/drop_database.js
@@ -77,6 +77,10 @@ jsTest.log("Test dropping unsharded database");
// Drop the database
assert.commandWorked(db.dropDatabase());
assertDatabaseDropped(db.getName());
+
+ // Test drop database idempotency
+ assert.commandWorked(db.dropDatabase());
+ assertDatabaseDropped(db.getName());
}
jsTest.log("Test dropping unsharded database with multiple collections");
@@ -102,6 +106,10 @@ jsTest.log("Test dropping sharded database");
// Drop the database
assert.commandWorked(db.dropDatabase());
assertDatabaseDropped(db.getName());
+
+ // Test drop database idempotency
+ assert.commandWorked(db.dropDatabase());
+ assertDatabaseDropped(db.getName());
}
jsTest.log("Test dropping database that contains regex characters");
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 888a90bd0d1..28c64b3c6a3 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -335,6 +335,7 @@ env.Library(
'drop_collection_coordinator.cpp',
'drop_collection_coordinator_document.idl',
'drop_database_coordinator.cpp',
+ 'drop_database_coordinator_document.idl',
'flush_database_cache_updates_command.cpp',
'flush_routing_table_cache_updates_command.cpp',
'get_database_version_command.cpp',
diff --git a/src/mongo/db/s/drop_collection_coordinator.cpp b/src/mongo/db/s/drop_collection_coordinator.cpp
index 316d10b78a8..c9fb4705341 100644
--- a/src/mongo/db/s/drop_collection_coordinator.cpp
+++ b/src/mongo/db/s/drop_collection_coordinator.cpp
@@ -54,7 +54,7 @@ boost::optional<BSONObj> DropCollectionCoordinator::reportForCurrentOp(
BSONObjBuilder cmdBob;
if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append("comment", optComment.get());
+ cmdBob.append(optComment.get().firstElement());
}
BSONObjBuilder bob;
bob.append("type", "op");
@@ -180,7 +180,7 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
// We need to send the drop to all the shards because both movePrimary and
// moveChunk leave garbage behind for sharded collections.
auto participants = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx);
- // Remove prumary shard from participants
+ // Remove primary shard from participants
participants.erase(
std::remove(participants.begin(), participants.end(), primaryShardId),
participants.end());
diff --git a/src/mongo/db/s/drop_database_coordinator.cpp b/src/mongo/db/s/drop_database_coordinator.cpp
index 5e6309fc14f..20bb54af73b 100644
--- a/src/mongo/db/s/drop_database_coordinator.cpp
+++ b/src/mongo/db/s/drop_database_coordinator.cpp
@@ -32,46 +32,41 @@
#include "mongo/db/s/drop_database_coordinator.h"
#include "mongo/db/api_parameters.h"
-#include "mongo/db/catalog_raii.h"
-#include "mongo/db/concurrency/lock_manager_defs.h"
-#include "mongo/db/s/database_sharding_state.h"
-#include "mongo/db/s/dist_lock_manager.h"
-#include "mongo/db/s/drop_collection_coordinator.h"
+#include "mongo/db/persistent_task_store.h"
#include "mongo/db/s/sharding_ddl_util.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/logv2/log.h"
#include "mongo/s/catalog/sharding_catalog_client.h"
-#include "mongo/s/catalog/type_chunk.h"
-#include "mongo/s/catalog/type_shard.h"
-#include "mongo/s/catalog/type_tags.h"
#include "mongo/s/client/shard_registry.h"
#include "mongo/s/grid.h"
#include "mongo/s/request_types/sharded_ddl_commands_gen.h"
-#include "mongo/util/assert_util.h"
namespace mongo {
namespace {
-void sendCommandToAllShards(OperationContext* opCtx,
- StringData dbName,
- StringData cmdName,
- BSONObj cmd,
- const std::vector<ShardId>& participants) {
- auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();
- for (const auto& shardId : participants) {
- const auto& shard = uassertStatusOK(shardRegistry->getShard(opCtx, shardId));
-
- const auto swDropResult = shard->runCommandWithFixedRetryAttempts(
- opCtx,
- ReadPreferenceSetting{ReadPreference::PrimaryOnly},
- dbName.toString(),
- CommandHelpers::appendMajorityWriteConcern(cmd),
- Shard::RetryPolicy::kIdempotent);
-
- uassertStatusOKWithContext(
- Shard::CommandResponse::getEffectiveStatus(std::move(swDropResult)),
- str::stream() << "Error processing " << cmdName << " on shard " << shardId);
- }
+void dropShardedCollection(OperationContext* opCtx,
+ const CollectionType& coll,
+ std::shared_ptr<executor::ScopedTaskExecutor> executor) {
+ sharding_ddl_util::removeCollMetadataFromConfig(opCtx, coll);
+
+ const auto primaryShardId = ShardingState::get(opCtx)->shardId();
+ const ShardsvrDropCollectionParticipant dropCollectionParticipant(coll.getNss());
+ const auto cmdObj =
+ CommandHelpers::appendMajorityWriteConcern(dropCollectionParticipant.toBSON({}));
+
+ // The collection needs to be dropped first on the db primary shard
+ // because otherwise changestreams won't receive the drop event.
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx, coll.getNss().db(), cmdObj, {primaryShardId}, **executor);
+
+ // We need to send the drop to all the shards because both movePrimary and
+ // moveChunk leave garbage behind for sharded collections.
+ auto participants = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx);
+ // Remove prumary shard from participants
+ participants.erase(std::remove(participants.begin(), participants.end(), primaryShardId),
+ participants.end());
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx, coll.getNss().db(), cmdObj, participants, **executor);
}
void removeDatabaseMetadataFromConfig(OperationContext* opCtx, StringData dbName) {
@@ -96,85 +91,184 @@ void removeDatabaseMetadataFromConfig(OperationContext* opCtx, StringData dbName
} // namespace
-DropDatabaseCoordinator::DropDatabaseCoordinator(OperationContext* opCtx, StringData dbName)
- : ShardingDDLCoordinator_NORESILIENT(opCtx, {dbName, ""}),
- _serviceContext(opCtx->getServiceContext()) {}
-
-SemiFuture<void> DropDatabaseCoordinator::runImpl(
- std::shared_ptr<executor::TaskExecutor> executor) {
- return ExecutorFuture<void>(executor, Status::OK())
- .then([this, anchor = shared_from_this()]() {
- ThreadClient tc{"DropDatabaseCoordinator", _serviceContext};
- auto opCtxHolder = tc->makeOperationContext();
- auto* opCtx = opCtxHolder.get();
- _forwardableOpMetadata.setOn(opCtx);
-
- const auto dbName = _nss.db();
- auto distLockManager = DistLockManager::get(_serviceContext);
- const auto dbDistLock = uassertStatusOK(distLockManager->lock(
- opCtx, dbName, "DropDatabase", DistLockManager::kDefaultLockTimeout));
-
- // Drop all collections under this DB
- auto const catalogClient = Grid::get(opCtx)->catalogClient();
- const auto allCollectionsForDb = catalogClient->getCollections(
- opCtx, dbName, repl::ReadConcernLevel::kMajorityReadConcern);
-
- for (const auto& coll : allCollectionsForDb) {
- if (coll.getDropped()) {
- continue;
+DropDatabaseCoordinator::DropDatabaseCoordinator(const BSONObj& initialState)
+ : ShardingDDLCoordinator(initialState),
+ _doc(DropDatabaseCoordinatorDocument::parse(
+ IDLParserErrorContext("DropDatabaseCoordinatorDocument"), initialState)),
+ _dbName(nss().db()) {}
+
+boost::optional<BSONObj> DropDatabaseCoordinator::reportForCurrentOp(
+ MongoProcessInterface::CurrentOpConnectionsMode connMode,
+ MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
+ BSONObjBuilder cmdBob;
+ if (const auto& optComment = getForwardableOpMetadata().getComment()) {
+ cmdBob.append(optComment.get().firstElement());
+ }
+ BSONObjBuilder bob;
+ bob.append("type", "op");
+ bob.append("desc", "DropDatabaseCoordinator");
+ bob.append("op", "command");
+ bob.append("ns", nss().toString());
+ bob.append("command", cmdBob.obj());
+ bob.append("currentPhase", _doc.getPhase());
+ bob.append("active", true);
+ return bob.obj();
+}
+
+void DropDatabaseCoordinator::_insertStateDocument(OperationContext* opCtx, StateDoc&& doc) {
+ auto coorMetadata = doc.getShardingDDLCoordinatorMetadata();
+ coorMetadata.setRecoveredFromDisk(true);
+ doc.setShardingDDLCoordinatorMetadata(coorMetadata);
+
+ PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
+ store.add(opCtx, doc, WriteConcerns::kMajorityWriteConcern);
+ _doc = std::move(doc);
+}
+
+void DropDatabaseCoordinator::_updateStateDocument(OperationContext* opCtx, StateDoc&& newDoc) {
+ PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
+ store.update(opCtx,
+ BSON(StateDoc::kIdFieldName << _doc.getId().toBSON()),
+ newDoc.toBSON(),
+ WriteConcerns::kMajorityWriteConcern);
+
+ _doc = std::move(newDoc);
+}
+
+void DropDatabaseCoordinator::_enterPhase(Phase newPhase) {
+ StateDoc newDoc(_doc);
+ newDoc.setPhase(newPhase);
+
+ LOGV2_DEBUG(5494501,
+ 2,
+ "Drop database coordinator phase transition",
+ "namespace"_attr = nss(),
+ "newPhase"_attr = DropDatabaseCoordinatorPhase_serializer(newDoc.getPhase()),
+ "oldPhase"_attr = DropDatabaseCoordinatorPhase_serializer(_doc.getPhase()));
+
+ auto opCtx = cc().makeOperationContext();
+ if (_doc.getPhase() == Phase::kUnset) {
+ _insertStateDocument(opCtx.get(), std::move(newDoc));
+ return;
+ }
+ _updateStateDocument(opCtx.get(), std::move(newDoc));
+}
+
+void DropDatabaseCoordinator::_removeStateDocument() {
+ auto opCtx = cc().makeOperationContext();
+ PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
+ LOGV2_DEBUG(
+ 5549402, 2, "Removing state document for drop database coordinator", "db"_attr = _dbName);
+ store.remove(opCtx.get(),
+ BSON(StateDoc::kIdFieldName << _doc.getId().toBSON()),
+ WriteConcerns::kMajorityWriteConcern);
+
+ _doc = {};
+}
+
+ExecutorFuture<void> DropDatabaseCoordinator::_runImpl(
+ std::shared_ptr<executor::ScopedTaskExecutor> executor,
+ const CancelationToken& token) noexcept {
+ return ExecutorFuture<void>(**executor)
+ .then(_executePhase(
+ Phase::kDrop,
+ [this, executor = executor, anchor = shared_from_this()] {
+ auto opCtxHolder = cc().makeOperationContext();
+ auto* opCtx = opCtxHolder.get();
+ getForwardableOpMetadata().setOn(opCtx);
+
+ if (_doc.getCollInfo()) {
+ const auto& coll = _doc.getCollInfo().get();
+ LOGV2_DEBUG(5494504,
+ 2,
+ "Completing collection drop from previous primary",
+ "namespace"_attr = coll.getNss());
+ dropShardedCollection(opCtx, coll, executor);
+ }
+
+ // Drop all collections under this DB
+ auto const catalogClient = Grid::get(opCtx)->catalogClient();
+ const auto allCollectionsForDb = catalogClient->getCollections(
+ opCtx, _dbName, repl::ReadConcernLevel::kMajorityReadConcern);
+
+ for (const auto& coll : allCollectionsForDb) {
+ const auto& nss = coll.getNss();
+ LOGV2_DEBUG(5494505, 2, "Dropping collection", "namespace"_attr = nss);
+
+ sharding_ddl_util::stopMigrations(opCtx, nss);
+
+ auto newStateDoc = _doc;
+ newStateDoc.setCollInfo(coll);
+ _updateStateDocument(opCtx, std::move(newStateDoc));
+
+ dropShardedCollection(opCtx, coll, executor);
}
- const auto nss = coll.getNss();
-
- // TODO SERVER-53905 to support failovers here we need to store the
- // current namespace of this loop before to delete it from config server
- // so that on step-up we will remmeber to resume the drop collection for that
- // namespace.
- sharding_ddl_util::removeCollMetadataFromConfig(opCtx, coll);
- const auto dropCollParticipantCmd = ShardsvrDropCollectionParticipant(nss);
- auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();
- sendCommandToAllShards(opCtx,
- dbName,
- ShardsvrDropCollectionParticipant::kCommandName,
- dropCollParticipantCmd.toBSON({}),
- shardRegistry->getAllShardIds(opCtx));
+ const auto primaryShardId = ShardingState::get(opCtx)->shardId();
+ auto dropDatabaseParticipantCmd = ShardsvrDropDatabaseParticipant();
+ dropDatabaseParticipantCmd.setDbName(_dbName);
+ const auto cmdObj = CommandHelpers::appendMajorityWriteConcern(
+ dropDatabaseParticipantCmd.toBSON({}));
+
+ // The database needs to be dropped first on the db primary shard
+ // because otherwise changestreams won't receive the drop event.
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx, _dbName, cmdObj, {primaryShardId}, **executor);
+
+ const auto allShardIds = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx);
+ // Remove prumary shard from participants
+ auto participants = allShardIds;
+ participants.erase(
+ std::remove(participants.begin(), participants.end(), primaryShardId),
+ participants.end());
+ // Drop DB on all other shards
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx, _dbName, cmdObj, participants, **executor);
+
+ removeDatabaseMetadataFromConfig(opCtx, _dbName);
+
+ {
+ // Send _flushDatabaseCacheUpdates to all shards
+ IgnoreAPIParametersBlock ignoreApiParametersBlock{opCtx};
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx,
+ "admin",
+ BSON("_flushDatabaseCacheUpdates" << _dbName),
+ allShardIds,
+ **executor);
+ }
+ }))
+ .onCompletion([this, anchor = shared_from_this()](const Status& status) {
+ if (!status.isOK() &&
+ (status.isA<ErrorCategory::NotPrimaryError>() ||
+ status.isA<ErrorCategory::ShutdownError>())) {
+ LOGV2_DEBUG(5494506,
+ 1,
+ "Drop database coordinator has been interrupted and "
+ " will continue on the next elected replicaset primary",
+ "db"_attr = _dbName,
+ "error"_attr = status);
+ return;
+ }
+
+ if (status.isOK()) {
+ LOGV2_DEBUG(5494507, 1, "Database dropped", "db"_attr = _dbName);
+ } else {
+ LOGV2_ERROR(5494508,
+ "Error running drop database",
+ "db"_attr = _dbName,
+ "error"_attr = redact(status));
+ }
+
+ try {
+ _removeStateDocument();
+ } catch (DBException& ex) {
+ ex.addContext("Failed to remove drop database coordinator state document");
+ throw;
}
- // Drop the DB itself.
- // The DistLockManager will prevent to re-create the database before each shard
- // have actually dropped it locally.
- removeDatabaseMetadataFromConfig(opCtx, dbName);
-
- auto dropDatabaseParticipantCmd = ShardsvrDropDatabaseParticipant();
- dropDatabaseParticipantCmd.setDbName(dbName);
- // Drop DB first on primary shard
- const auto primaryShardId = ShardingState::get(opCtx)->shardId();
- sendCommandToAllShards(opCtx,
- dbName,
- ShardsvrDropDatabaseParticipant::kCommandName,
- dropDatabaseParticipantCmd.toBSON({}),
- {primaryShardId});
-
- auto participants = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx);
- // Remove prumary shard from participants
- participants.erase(
- std::remove(participants.begin(), participants.end(), primaryShardId),
- participants.end());
- // Drop DB on all other shards
- sendCommandToAllShards(opCtx,
- dbName,
- ShardsvrDropDatabaseParticipant::kCommandName,
- dropDatabaseParticipantCmd.toBSON({}),
- participants);
- })
- .onError([this, anchor = shared_from_this()](const Status& status) {
- LOGV2_ERROR(5281131,
- "Error running drop database",
- "database"_attr = _nss.db(),
- "error"_attr = redact(status));
- return status;
- })
- .semi();
+ uassertStatusOK(status);
+ });
}
} // namespace mongo
diff --git a/src/mongo/db/s/drop_database_coordinator.h b/src/mongo/db/s/drop_database_coordinator.h
index d2a3765679a..1388e594f9e 100644
--- a/src/mongo/db/s/drop_database_coordinator.h
+++ b/src/mongo/db/s/drop_database_coordinator.h
@@ -29,19 +29,53 @@
#pragma once
+#include "mongo/db/s/drop_database_coordinator_document_gen.h"
#include "mongo/db/s/sharding_ddl_coordinator.h"
namespace mongo {
-class DropDatabaseCoordinator final : public ShardingDDLCoordinator_NORESILIENT,
- public std::enable_shared_from_this<DropDatabaseCoordinator> {
+class DropDatabaseCoordinator final : public ShardingDDLCoordinator {
public:
- DropDatabaseCoordinator(OperationContext* opCtx, StringData dbName);
+ using StateDoc = DropDatabaseCoordinatorDocument;
+ using Phase = DropDatabaseCoordinatorPhaseEnum;
+
+ DropDatabaseCoordinator(const BSONObj& initialState);
+ ~DropDatabaseCoordinator() = default;
+
+ void checkIfOptionsConflict(const BSONObj& doc) const override {}
+
+ boost::optional<BSONObj> reportForCurrentOp(
+ MongoProcessInterface::CurrentOpConnectionsMode connMode,
+ MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
private:
- SemiFuture<void> runImpl(std::shared_ptr<executor::TaskExecutor> executor) override;
+ ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
+ const CancelationToken& token) noexcept override;
+
+ template <typename Func>
+ auto _executePhase(const Phase& newPhase, Func&& func) {
+ return [=] {
+ const auto& currPhase = _doc.getPhase();
+
+ if (currPhase > newPhase) {
+ // Do not execute this phase if we already reached a subsequent one.
+ return;
+ }
+ if (currPhase < newPhase) {
+ // Persist the new phase if this is the first time we are executing it.
+ _enterPhase(newPhase);
+ }
+ return func();
+ };
+ }
+
+ void _insertStateDocument(OperationContext* opCtx, StateDoc&& doc);
+ void _updateStateDocument(OperationContext* opCtx, StateDoc&& newStateDoc);
+ void _removeStateDocument();
+ void _enterPhase(Phase newPhase);
- ServiceContext* _serviceContext;
+ DropDatabaseCoordinatorDocument _doc;
+ StringData _dbName;
};
} // namespace mongo
diff --git a/src/mongo/db/s/drop_database_coordinator_document.idl b/src/mongo/db/s/drop_database_coordinator_document.idl
new file mode 100644
index 00000000000..1e099918524
--- /dev/null
+++ b/src/mongo/db/s/drop_database_coordinator_document.idl
@@ -0,0 +1,71 @@
+# Copyright (C) 2021-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+
+# This file defines the format of drop database coordinator persisted documents.
+
+global:
+ cpp_namespace: "mongo"
+ cpp_includes:
+ - "mongo/s/catalog/type_collection.h"
+
+imports:
+ - "mongo/idl/basic_types.idl"
+ - "mongo/db/s/sharding_ddl_coordinator.idl"
+
+enums:
+ DropDatabaseCoordinatorPhase:
+ description: "The current phase of a drop database coordinator."
+ type: string
+ values:
+ kUnset: "unset"
+ kDrop: "drop"
+
+types:
+ CollectionInfo:
+ description: "Information of the collection to drop."
+ bson_serialization_type: object
+ cpp_type: CollectionType
+ serializer: "mongo::CollectionType::toBSON"
+ deserializer: "mongo::CollectionType"
+
+structs:
+ DropDatabaseCoordinatorDocument:
+ description: "Represents a drop database operation on the coordinator shard."
+ generate_comparison_operators: false
+ strict: true
+ chained_structs:
+ ShardingDDLCoordinatorMetadata: ShardingDDLCoordinatorMetadata
+ fields:
+ phase:
+ type: DropDatabaseCoordinatorPhase
+ default: kUnset
+ collInfo:
+ description: "Collection currently being dropped."
+ type: CollectionInfo
+ optional: true
+
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h
index 7304a612693..efa199af4d2 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.h
+++ b/src/mongo/db/s/sharding_ddl_coordinator.h
@@ -79,6 +79,7 @@ public:
SharedSemiFuture<void> getCompletionFuture() {
return _completionPromise.getFuture();
}
+
const NamespaceString& nss() const {
return _coorMetadata.getId().getNss();
}
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.idl b/src/mongo/db/s/sharding_ddl_coordinator.idl
index 6dd43c89303..51298ce4f17 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.idl
+++ b/src/mongo/db/s/sharding_ddl_coordinator.idl
@@ -42,6 +42,7 @@ enums:
description: "Type of the sharding DDL Operation."
type: string
values:
+ kDropDatabase: "dropDatabase"
kDropCollection: "dropCollection"
types:
diff --git a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
index 05404da0309..ff5f2dca214 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
@@ -35,11 +35,13 @@
#include "mongo/base/checked_cast.h"
#include "mongo/db/s/database_sharding_state.h"
-#include "mongo/db/s/drop_collection_coordinator.h"
#include "mongo/db/s/operation_sharding_state.h"
#include "mongo/db/s/sharding_ddl_coordinator.h"
#include "mongo/logv2/log.h"
+#include "mongo/db/s/drop_collection_coordinator.h"
+#include "mongo/db/s/drop_database_coordinator.h"
+
namespace mongo {
ShardingDDLCoordinatorService* ShardingDDLCoordinatorService::getService(OperationContext* opCtx) {
@@ -56,6 +58,9 @@ ShardingDDLCoordinatorService::constructInstance(BSONObj initialState) const {
"Constructing new sharding DDL coordinator",
"coordinatorDoc"_attr = op.toBSON());
switch (op.getId().getOperationType()) {
+ case DDLCoordinatorTypeEnum::kDropDatabase:
+ return std::make_shared<DropDatabaseCoordinator>(std::move(initialState));
+ break;
case DDLCoordinatorTypeEnum::kDropCollection:
return std::make_shared<DropCollectionCoordinator>(std::move(initialState));
break;
diff --git a/src/mongo/db/s/shardsvr_drop_database_command.cpp b/src/mongo/db/s/shardsvr_drop_database_command.cpp
index 3a4088d01e8..67b9ca40d56 100644
--- a/src/mongo/db/s/shardsvr_drop_database_command.cpp
+++ b/src/mongo/db/s/shardsvr_drop_database_command.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/curop.h"
#include "mongo/db/s/drop_database_coordinator.h"
#include "mongo/db/s/drop_database_legacy.h"
+#include "mongo/db/s/sharding_ddl_coordinator_service.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/logv2/log.h"
#include "mongo/s/grid.h"
@@ -78,30 +79,30 @@ public:
const auto dbName = request().getDbName();
- bool useNewPath = [&] {
- return feature_flags::gShardingFullDDLSupport.isEnabled(
- serverGlobalParams.featureCompatibility) &&
- !feature_flags::gDisableIncompleteShardingDDLSupport.isEnabled(
- serverGlobalParams.featureCompatibility);
- }();
+ const auto useNewPath = feature_flags::gShardingFullDDLSupport.isEnabled(
+ serverGlobalParams.featureCompatibility);
if (!useNewPath) {
LOGV2_DEBUG(
- 5281110, 1, "Running legacy drop database procedure", "database"_attr = dbName);
+ 5281110, 1, "Running legacy drop database procedure", "db"_attr = dbName);
dropDatabaseLegacy(opCtx, dbName);
return;
}
- LOGV2_DEBUG(
- 5281111, 1, "Running new drop database procedure", "database"_attr = dbName);
+ LOGV2_DEBUG(5281111, 1, "Running new drop database procedure", "db"_attr = dbName);
// Since this operation is not directly writing locally we need to force its db
// profile level increase in order to be logged in "<db>.system.profile"
CurOp::get(opCtx)->raiseDbProfileLevel(
CollectionCatalog::get(opCtx)->getDatabaseProfileLevel(dbName));
- auto dropDatabaseCoordinator = std::make_shared<DropDatabaseCoordinator>(opCtx, dbName);
- dropDatabaseCoordinator->run(opCtx).get();
+ auto coordinatorDoc = DropDatabaseCoordinatorDocument();
+ coordinatorDoc.setShardingDDLCoordinatorMetadata(
+ {{ns(), DDLCoordinatorTypeEnum::kDropDatabase}});
+ auto service = ShardingDDLCoordinatorService::getService(opCtx);
+ auto dropDatabaseCoordinator = checked_pointer_cast<DropDatabaseCoordinator>(
+ service->getOrCreateInstance(opCtx, coordinatorDoc.toBSON()));
+ dropDatabaseCoordinator->getCompletionFuture().get(opCtx);
}
private: