summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2023-04-03 14:46:52 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-03 17:29:38 +0000
commit96ea1942d25bfc6b2ab30779590f1b8a8c6887b5 (patch)
tree2924d48b8d292ef4d7c8d709d67c16fcd9886d64
parentb29c35ee63c1eb1fead39db7293e751e9ae173d8 (diff)
downloadmongo-96ea1942d25bfc6b2ab30779590f1b8a8c6887b5.tar.gz
SERVER-75096 Locally drop sharded collections in transitionToDedicatedConfigServer
-rw-r--r--jstests/noPassthrough/catalog_shard.js67
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp23
2 files changed, 83 insertions, 7 deletions
diff --git a/jstests/noPassthrough/catalog_shard.js b/jstests/noPassthrough/catalog_shard.js
index 87ea281b958..54181b23dcd 100644
--- a/jstests/noPassthrough/catalog_shard.js
+++ b/jstests/noPassthrough/catalog_shard.js
@@ -20,8 +20,13 @@ load("jstests/libs/fail_point_util.js");
const dbName = "foo";
const collName = "bar";
const ns = dbName + "." + collName;
+const unshardedDbName = "unsharded_db";
+const unshardedNs = unshardedDbName + ".unsharded_coll";
+const indexedNs = "db_with_index.coll";
function basicCRUD(conn) {
+ assert.commandWorked(st.s.getCollection(unshardedNs).insert([{x: 1}, {x: -1}]));
+
assert.commandWorked(conn.getCollection(ns).insert({_id: 1, x: 1}));
assert.sameMembers(conn.getCollection(ns).find({x: 1}).toArray(), [{_id: 1, x: 1}]);
assert.commandWorked(conn.getCollection(ns).remove({x: 1}));
@@ -170,6 +175,13 @@ const newShardName =
//
// Remove the catalog shard.
//
+ let configPrimary = st.configRS.getPrimary();
+
+ // Shard a second collection to verify it gets dropped locally by the transition.
+ assert.commandWorked(st.s.adminCommand({shardCollection: indexedNs, key: {_id: 1}}));
+ assert.commandWorked(
+ st.s.adminCommand({moveChunk: indexedNs, find: {_id: 0}, to: configShardName}));
+ assert.commandWorked(st.s.getCollection(indexedNs).createIndex({oldKey: 1}));
let removeRes =
assert.commandWorked(st.s0.adminCommand({transitionToDedicatedConfigServer: 1}));
@@ -181,6 +193,8 @@ const newShardName =
assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {skey: -1}, to: newShardName}));
assert.commandWorked(
+ st.s.adminCommand({moveChunk: indexedNs, find: {_id: 0}, to: newShardName}));
+ assert.commandWorked(
st.s.adminCommand({moveChunk: "config.system.sessions", find: {_id: 0}, to: newShardName}));
// Still blocked until the db has been moved away.
@@ -188,17 +202,48 @@ const newShardName =
assert.eq("ongoing", removeRes.state);
assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: newShardName}));
+ assert.commandWorked(st.s.adminCommand({movePrimary: unshardedDbName, to: newShardName}));
+
+ // The draining sharded collections should not have been locally dropped yet.
+ assert(configPrimary.getCollection(ns).exists());
+ assert(configPrimary.getCollection(indexedNs).exists());
+ assert.sameMembers(configPrimary.getCollection(indexedNs).getIndexKeys(),
+ [{_id: 1}, {oldKey: 1}]);
+ assert(configPrimary.getCollection("config.system.sessions").exists());
+
+ // Start the final transition command. This will trigger locally dropping collections on the
+ // config server. Hang after removing one collection and trigger a failover to verify the final
+ // transition can be resumed on the new primary and the collection dropping is idempotent.
+ const hangRemoveFp = configureFailPoint(
+ st.configRS.getPrimary(), "hangAfterDroppingCollectionInTransitionToDedicatedConfigServer");
+ const finishRemoveThread = new Thread(function(mongosHost) {
+ const mongos = new Mongo(mongosHost);
+ return mongos.adminCommand({transitionToDedicatedConfigServer: 1});
+ }, st.s.host);
+ finishRemoveThread.start();
- removeRes = assert.commandWorked(st.s0.adminCommand({transitionToDedicatedConfigServer: 1}));
+ hangRemoveFp.wait();
+ st.configRS.stepUp(st.configRS.getSecondary());
+ hangRemoveFp.off();
+ configPrimary = st.configRS.getPrimary();
+
+ finishRemoveThread.join();
+ removeRes = assert.commandWorked(finishRemoveThread.returnData());
assert.eq("completed", removeRes.state);
+ // All sharded collections should have been dropped locally from the config server.
+ assert(!configPrimary.getCollection(ns).exists());
+ assert(!configPrimary.getCollection(indexedNs).exists());
+ assert.sameMembers(configPrimary.getCollection(indexedNs).getIndexKeys(), []);
+ assert(!configPrimary.getCollection("config.system.sessions").exists());
+
// Basic CRUD and sharded DDL work.
basicCRUD(st.s);
assert.commandWorked(st.s.adminCommand({split: ns, middle: {skey: 220}}));
basicCRUD(st.s);
// Flushing routing / db cache updates works.
- flushRoutingAndDBCacheUpdates(st.configRS.getPrimary());
+ flushRoutingAndDBCacheUpdates(configPrimary);
//
// A config server that isn't currently a shard can support changeStreamPreAndPostImages. Note
@@ -206,8 +251,8 @@ const newShardName =
// to the config server to create a collection on a different db.
//
const directConfigNS = "directDB.onConfig";
- assert.commandWorked(st.configRS.getPrimary().getCollection(directConfigNS).insert({x: 1}));
- assert.commandWorked(st.configRS.getPrimary().getDB("directDB").runCommand({
+ assert.commandWorked(configPrimary.getCollection(directConfigNS).insert({x: 1}));
+ assert.commandWorked(configPrimary.getDB("directDB").runCommand({
collMod: "onConfig",
changeStreamPreAndPostImages: {enabled: true}
}));
@@ -235,9 +280,10 @@ const newShardName =
// Add back the catalog shard.
//
- // movePrimary won't delete from the source, so drop the moved db directly to avoid a conflict
- // in addShard.
- assert.commandWorked(st.configRS.getPrimary().getDB(dbName).dropDatabase());
+ // Create an index while the collection is not on the config server to verify it clones the
+ // correct indexes when receiving its first chunk after the transition.
+ assert.commandWorked(st.s.getCollection(indexedNs).createIndex({newKey: 1}));
+
assert.commandWorked(st.s.adminCommand({transitionToCatalogShard: 1}));
// Basic CRUD and sharded DDL work.
@@ -245,6 +291,13 @@ const newShardName =
assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {skey: 0}, to: configShardName}));
assert.commandWorked(st.s.adminCommand({split: ns, middle: {skey: 5}}));
basicCRUD(st.s);
+
+ // Move a chunk for the indexed collection to the config server and it should create the correct
+ // index locally.
+ assert.commandWorked(
+ st.s.adminCommand({moveChunk: indexedNs, find: {_id: 0}, to: configShardName}));
+ assert.sameMembers(st.configRS.getPrimary().getCollection(indexedNs).getIndexKeys(),
+ [{_id: 1}, {oldKey: 1}, {newKey: 1}]);
}
st.stop();
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
index 09b7d7485f7..89b6f1a76d9 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
@@ -103,6 +103,7 @@ namespace mongo {
namespace {
MONGO_FAIL_POINT_DEFINE(hangBeforeNotifyingaddShardCommitted);
+MONGO_FAIL_POINT_DEFINE(hangAfterDroppingCollectionInTransitionToDedicatedConfigServer);
using CallbackHandle = executor::TaskExecutor::CallbackHandle;
using CallbackArgs = executor::TaskExecutor::CallbackArgs;
@@ -945,6 +946,28 @@ RemoveShardProgress ShardingCatalogManager::removeShard(OperationContext* opCtx,
LOGV2(
21949, "Going to remove shard: {shardId}", "Going to remove shard", "shardId"_attr = name);
+ if (shardId == ShardId::kConfigServerId) {
+ // Drop the drained collections locally so the config server can transition back to catalog
+ // shard mode in the future without requiring users to manually drop them.
+ LOGV2(7509600, "Locally dropping drained collections", "shardId"_attr = name);
+
+ auto shardedCollections = _localCatalogClient->getCollections(opCtx, {});
+ for (auto&& collection : shardedCollections) {
+ DBDirectClient client(opCtx);
+
+ BSONObj result;
+ if (!client.dropCollection(
+ collection.getNss(), ShardingCatalogClient::kLocalWriteConcern, &result)) {
+ // Note attempting to drop a non-existent collection does not return an error, so
+ // it's safe to assert the status is ok even if an earlier attempt was interrupted
+ // by a failover.
+ uassertStatusOK(getStatusFromCommandResult(result));
+ }
+
+ hangAfterDroppingCollectionInTransitionToDedicatedConfigServer.pauseWhileSet(opCtx);
+ }
+ }
+
// Synchronize the control shard selection, the shard's document removal, and the topology time
// update to exclude potential race conditions in case of concurrent add/remove shard
// operations.