diff options
author | Jack Mulrow <jack.mulrow@mongodb.com> | 2023-04-03 14:46:52 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-03 17:29:38 +0000 |
commit | 96ea1942d25bfc6b2ab30779590f1b8a8c6887b5 (patch) | |
tree | 2924d48b8d292ef4d7c8d709d67c16fcd9886d64 | |
parent | b29c35ee63c1eb1fead39db7293e751e9ae173d8 (diff) | |
download | mongo-96ea1942d25bfc6b2ab30779590f1b8a8c6887b5.tar.gz |
SERVER-75096 Locally drop sharded collections in transitionToDedicatedConfigServer
-rw-r--r-- | jstests/noPassthrough/catalog_shard.js | 67 | ||||
-rw-r--r-- | src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp | 23 |
2 files changed, 83 insertions, 7 deletions
diff --git a/jstests/noPassthrough/catalog_shard.js b/jstests/noPassthrough/catalog_shard.js index 87ea281b958..54181b23dcd 100644 --- a/jstests/noPassthrough/catalog_shard.js +++ b/jstests/noPassthrough/catalog_shard.js @@ -20,8 +20,13 @@ load("jstests/libs/fail_point_util.js"); const dbName = "foo"; const collName = "bar"; const ns = dbName + "." + collName; +const unshardedDbName = "unsharded_db"; +const unshardedNs = unshardedDbName + ".unsharded_coll"; +const indexedNs = "db_with_index.coll"; function basicCRUD(conn) { + assert.commandWorked(st.s.getCollection(unshardedNs).insert([{x: 1}, {x: -1}])); + assert.commandWorked(conn.getCollection(ns).insert({_id: 1, x: 1})); assert.sameMembers(conn.getCollection(ns).find({x: 1}).toArray(), [{_id: 1, x: 1}]); assert.commandWorked(conn.getCollection(ns).remove({x: 1})); @@ -170,6 +175,13 @@ const newShardName = // // Remove the catalog shard. // + let configPrimary = st.configRS.getPrimary(); + + // Shard a second collection to verify it gets dropped locally by the transition. + assert.commandWorked(st.s.adminCommand({shardCollection: indexedNs, key: {_id: 1}})); + assert.commandWorked( + st.s.adminCommand({moveChunk: indexedNs, find: {_id: 0}, to: configShardName})); + assert.commandWorked(st.s.getCollection(indexedNs).createIndex({oldKey: 1})); let removeRes = assert.commandWorked(st.s0.adminCommand({transitionToDedicatedConfigServer: 1})); @@ -181,6 +193,8 @@ const newShardName = assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {skey: -1}, to: newShardName})); assert.commandWorked( + st.s.adminCommand({moveChunk: indexedNs, find: {_id: 0}, to: newShardName})); + assert.commandWorked( st.s.adminCommand({moveChunk: "config.system.sessions", find: {_id: 0}, to: newShardName})); // Still blocked until the db has been moved away. @@ -188,17 +202,48 @@ const newShardName = assert.eq("ongoing", removeRes.state); assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: newShardName})); + assert.commandWorked(st.s.adminCommand({movePrimary: unshardedDbName, to: newShardName})); + + // The draining sharded collections should not have been locally dropped yet. + assert(configPrimary.getCollection(ns).exists()); + assert(configPrimary.getCollection(indexedNs).exists()); + assert.sameMembers(configPrimary.getCollection(indexedNs).getIndexKeys(), + [{_id: 1}, {oldKey: 1}]); + assert(configPrimary.getCollection("config.system.sessions").exists()); + + // Start the final transition command. This will trigger locally dropping collections on the + // config server. Hang after removing one collection and trigger a failover to verify the final + // transition can be resumed on the new primary and the collection dropping is idempotent. + const hangRemoveFp = configureFailPoint( + st.configRS.getPrimary(), "hangAfterDroppingCollectionInTransitionToDedicatedConfigServer"); + const finishRemoveThread = new Thread(function(mongosHost) { + const mongos = new Mongo(mongosHost); + return mongos.adminCommand({transitionToDedicatedConfigServer: 1}); + }, st.s.host); + finishRemoveThread.start(); - removeRes = assert.commandWorked(st.s0.adminCommand({transitionToDedicatedConfigServer: 1})); + hangRemoveFp.wait(); + st.configRS.stepUp(st.configRS.getSecondary()); + hangRemoveFp.off(); + configPrimary = st.configRS.getPrimary(); + + finishRemoveThread.join(); + removeRes = assert.commandWorked(finishRemoveThread.returnData()); assert.eq("completed", removeRes.state); + // All sharded collections should have been dropped locally from the config server. + assert(!configPrimary.getCollection(ns).exists()); + assert(!configPrimary.getCollection(indexedNs).exists()); + assert.sameMembers(configPrimary.getCollection(indexedNs).getIndexKeys(), []); + assert(!configPrimary.getCollection("config.system.sessions").exists()); + // Basic CRUD and sharded DDL work. basicCRUD(st.s); assert.commandWorked(st.s.adminCommand({split: ns, middle: {skey: 220}})); basicCRUD(st.s); // Flushing routing / db cache updates works. - flushRoutingAndDBCacheUpdates(st.configRS.getPrimary()); + flushRoutingAndDBCacheUpdates(configPrimary); // // A config server that isn't currently a shard can support changeStreamPreAndPostImages. Note @@ -206,8 +251,8 @@ const newShardName = // to the config server to create a collection on a different db. // const directConfigNS = "directDB.onConfig"; - assert.commandWorked(st.configRS.getPrimary().getCollection(directConfigNS).insert({x: 1})); - assert.commandWorked(st.configRS.getPrimary().getDB("directDB").runCommand({ + assert.commandWorked(configPrimary.getCollection(directConfigNS).insert({x: 1})); + assert.commandWorked(configPrimary.getDB("directDB").runCommand({ collMod: "onConfig", changeStreamPreAndPostImages: {enabled: true} })); @@ -235,9 +280,10 @@ const newShardName = // Add back the catalog shard. // - // movePrimary won't delete from the source, so drop the moved db directly to avoid a conflict - // in addShard. - assert.commandWorked(st.configRS.getPrimary().getDB(dbName).dropDatabase()); + // Create an index while the collection is not on the config server to verify it clones the + // correct indexes when receiving its first chunk after the transition. + assert.commandWorked(st.s.getCollection(indexedNs).createIndex({newKey: 1})); + assert.commandWorked(st.s.adminCommand({transitionToCatalogShard: 1})); // Basic CRUD and sharded DDL work. @@ -245,6 +291,13 @@ const newShardName = assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {skey: 0}, to: configShardName})); assert.commandWorked(st.s.adminCommand({split: ns, middle: {skey: 5}})); basicCRUD(st.s); + + // Move a chunk for the indexed collection to the config server and it should create the correct + // index locally. + assert.commandWorked( + st.s.adminCommand({moveChunk: indexedNs, find: {_id: 0}, to: configShardName})); + assert.sameMembers(st.configRS.getPrimary().getCollection(indexedNs).getIndexKeys(), + [{_id: 1}, {oldKey: 1}, {newKey: 1}]); } st.stop(); diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp index 09b7d7485f7..89b6f1a76d9 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp @@ -103,6 +103,7 @@ namespace mongo { namespace { MONGO_FAIL_POINT_DEFINE(hangBeforeNotifyingaddShardCommitted); +MONGO_FAIL_POINT_DEFINE(hangAfterDroppingCollectionInTransitionToDedicatedConfigServer); using CallbackHandle = executor::TaskExecutor::CallbackHandle; using CallbackArgs = executor::TaskExecutor::CallbackArgs; @@ -945,6 +946,28 @@ RemoveShardProgress ShardingCatalogManager::removeShard(OperationContext* opCtx, LOGV2( 21949, "Going to remove shard: {shardId}", "Going to remove shard", "shardId"_attr = name); + if (shardId == ShardId::kConfigServerId) { + // Drop the drained collections locally so the config server can transition back to catalog + // shard mode in the future without requiring users to manually drop them. + LOGV2(7509600, "Locally dropping drained collections", "shardId"_attr = name); + + auto shardedCollections = _localCatalogClient->getCollections(opCtx, {}); + for (auto&& collection : shardedCollections) { + DBDirectClient client(opCtx); + + BSONObj result; + if (!client.dropCollection( + collection.getNss(), ShardingCatalogClient::kLocalWriteConcern, &result)) { + // Note attempting to drop a non-existent collection does not return an error, so + // it's safe to assert the status is ok even if an earlier attempt was interrupted + // by a failover. + uassertStatusOK(getStatusFromCommandResult(result)); + } + + hangAfterDroppingCollectionInTransitionToDedicatedConfigServer.pauseWhileSet(opCtx); + } + } + // Synchronize the control shard selection, the shard's document removal, and the topology time // update to exclude potential race conditions in case of concurrent add/remove shard // operations. |