diff options
author | jannaerin <golden.janna@gmail.com> | 2018-01-30 18:09:10 -0500 |
---|---|---|
committer | jannaerin <golden.janna@gmail.com> | 2018-02-09 16:56:58 -0500 |
commit | 0c4008ac3510bc3ca55dd36632ea0570d28c6d6f (patch) | |
tree | d180a3d76aa5ed09a968d304ede4457dc4a0889f | |
parent | 9820063b4a88f685704385c13f4ee34aed31c1cd (diff) | |
download | mongo-0c4008ac3510bc3ca55dd36632ea0570d28c6d6f.tar.gz |
SERVER-31768 Don't create the first collection on a primary drained shard
4 files changed, 144 insertions, 46 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index 5346a05e3f8..a57ecd58d43 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -29,6 +29,7 @@ selector: - jstests/sharding/moveChunk_recipient_rejects_chunk_if_UUID_mismatch.js - jstests/sharding/write_cmd_auto_split.js - jstests/sharding/index_and_collection_option_propagation.js + - jstests/sharding/remove4.js # New feature in v3.6 mongos - jstests/sharding/logical_time_metadata.js # New feature in v3.6 mongos and mongod. diff --git a/jstests/sharding/remove4.js b/jstests/sharding/remove4.js new file mode 100644 index 00000000000..adae9442291 --- /dev/null +++ b/jstests/sharding/remove4.js @@ -0,0 +1,55 @@ +// Validates that after a primary shard is drained, a new sharded collection will not be created on +// the primary shard +(function() { + 'use strict'; + + function removeShardAddNewColl(shardCollCmd) { + let st = new ShardingTest({name: "remove_shard4", shards: 2, mongos: 2}); + assert.commandWorked(st.s0.adminCommand({enableSharding: 'TestDB'})); + let primaryShard = st.shard0.shardName; + st.ensurePrimaryShard('TestDB', primaryShard); + + // Remove primary shard + var removeRes; + removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: primaryShard})); + assert.eq('started', removeRes.state); + + // Add a new sharded collection and check that its data is not on the primary drained shard + assert.commandWorked(st.s0.adminCommand(shardCollCmd)); + st.s0.getDB('TestDB').Coll.insert({_id: -2, value: 'Negative value'}); + st.s0.getDB('TestDB').Coll.insert({_id: 2, value: 'Positive value'}); + + let chunks = st.config.chunks.find({'ns': 'TestDB.Coll'}).toArray(); + assert.neq(chunks.length, 0); + + for (let i = 0; i < chunks.length; i++) { + assert.neq(chunks[i].shard, + primaryShard, + 'New sharded collection should not have been created on primary shard'); + } + + removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: primaryShard})); + assert.eq('ongoing', removeRes.state); + + // Drop TestDB so can finish draining + assert.commandWorked(st.s0.getDB('TestDB').runCommand({dropDatabase: 1})); + + // Move the config.system.sessions chunk off primary + assert.commandWorked(st.s0.adminCommand({ + moveChunk: 'config.system.sessions', + find: {_id: 'config.system.sessions-_id_MinKey'}, + to: st.shard1.shardName, + _waitForDelete: true + })); + + // Remove shard must succeed now + removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: primaryShard})); + assert.eq('completed', removeRes.state); + + st.stop(); + } + + removeShardAddNewColl({shardCollection: 'TestDB.Coll', key: {_id: 1}}); + removeShardAddNewColl( + {shardCollection: 'TestDB.Coll', key: {_id: "hashed"}, numInitialChunks: 2}); +})(); diff --git a/src/mongo/s/catalog/sharding_catalog_manager.h b/src/mongo/s/catalog/sharding_catalog_manager.h index cd36a69cbfe..670c57fb678 100644 --- a/src/mongo/s/catalog/sharding_catalog_manager.h +++ b/src/mongo/s/catalog/sharding_catalog_manager.h @@ -413,6 +413,16 @@ private: */ void _appendReadConcern(BSONObjBuilder* builder); + /** + * Creates the first chunks of a new sharded collection. + */ + ChunkVersion _createFirstChunks(OperationContext* opCtx, + const NamespaceString& nss, + const ShardKeyPattern& shardKeyPattern, + const ShardId& primaryShardId, + const std::vector<BSONObj>& initPoints, + const bool distributeInitialChunks); + // The owning service context ServiceContext* const _serviceContext; diff --git a/src/mongo/s/catalog/sharding_catalog_manager_collection_operations.cpp b/src/mongo/s/catalog/sharding_catalog_manager_collection_operations.cpp index 0ce93bfd345..612f4416185 100644 --- a/src/mongo/s/catalog/sharding_catalog_manager_collection_operations.cpp +++ b/src/mongo/s/catalog/sharding_catalog_manager_collection_operations.cpp @@ -77,22 +77,83 @@ const Seconds kDefaultFindHostMaxWaitTime(20); const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{}); const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0)); +void checkForExistingChunks(OperationContext* opCtx, const string& ns) { + BSONObjBuilder countBuilder; + countBuilder.append("count", NamespaceString(ChunkType::ConfigNS).coll()); + countBuilder.append("query", BSON(ChunkType::ns(ns))); + + // OK to use limit=1, since if any chunks exist, we will fail. + countBuilder.append("limit", 1); + + // Use readConcern local to guarantee we see any chunks that have been written and may + // become committed; readConcern majority will not see the chunks if they have not made it + // to the majority snapshot. + repl::ReadConcernArgs readConcern(repl::ReadConcernLevel::kLocalReadConcern); + readConcern.appendInfo(&countBuilder); + + auto cmdResponse = uassertStatusOK( + Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommandWithFixedRetryAttempts( + opCtx, + kConfigReadSelector, + NamespaceString(ChunkType::ConfigNS).db().toString(), + countBuilder.done(), + Shard::kDefaultConfigCommandTimeout, + Shard::RetryPolicy::kIdempotent)); + uassertStatusOK(cmdResponse.commandStatus); + + long long numChunks; + uassertStatusOK(bsonExtractIntegerField(cmdResponse.response, "n", &numChunks)); + uassert(ErrorCodes::ManualInterventionRequired, + str::stream() << "A previous attempt to shard collection " << ns + << " failed after writing some initial chunks to config.chunks. Please " + "manually delete the partially written chunks for collection " + << ns + << " from config.chunks", + numChunks == 0); +} + +} // namespace + /** * Creates and writes to the config server the first chunks for a newly sharded collection. Returns * the version generated for the collection. */ -ChunkVersion createFirstChunks(OperationContext* opCtx, - const NamespaceString& nss, - const ShardKeyPattern& shardKeyPattern, - const ShardId& primaryShardId, - const std::vector<BSONObj>& initPoints, - const bool distributeInitialChunks) { +ChunkVersion ShardingCatalogManager::_createFirstChunks(OperationContext* opCtx, + const NamespaceString& nss, + const ShardKeyPattern& shardKeyPattern, + const ShardId& primaryShardId, + const std::vector<BSONObj>& initPoints, + const bool distributeInitialChunks) { const KeyPattern keyPattern = shardKeyPattern.getKeyPattern(); vector<BSONObj> splitPoints; vector<ShardId> shardIds; + std::string primaryShardName = primaryShardId.toString(); + auto drainingCount = uassertStatusOK(_runCountCommandOnConfig( + opCtx, + NamespaceString(ShardType::ConfigNS), + BSON(ShardType::name() << primaryShardName << ShardType::draining(true)))); + + const bool primaryDraining = (drainingCount > 0); + auto getPrimaryOrFirstNonDrainingShard = + [&opCtx, primaryShardId, &shardIds, primaryDraining]() { + if (primaryDraining) { + vector<ShardId> allShardIds; + Grid::get(opCtx)->shardRegistry()->getAllShardIds(&allShardIds); + + auto dbShardId = allShardIds[0]; + if (allShardIds[0] == primaryShardId && allShardIds.size() > 1) { + dbShardId = allShardIds[1]; + } + + return dbShardId; + } else { + return primaryShardId; + } + }; + if (initPoints.empty()) { // If no split points were specified use the shard's data distribution to determine them auto primaryShard = @@ -128,8 +189,12 @@ ChunkVersion createFirstChunks(OperationContext* opCtx, // otherwise defer to passed-in distribution option. if (numObjects == 0 && distributeInitialChunks) { Grid::get(opCtx)->shardRegistry()->getAllShardIds(&shardIds); + if (primaryDraining && shardIds.size() > 1) { + shardIds.erase(std::remove(shardIds.begin(), shardIds.end(), primaryShardId), + shardIds.end()); + } } else { - shardIds.push_back(primaryShardId); + shardIds.push_back(getPrimaryOrFirstNonDrainingShard()); } } else { // Make sure points are unique and ordered @@ -145,8 +210,12 @@ ChunkVersion createFirstChunks(OperationContext* opCtx, if (distributeInitialChunks) { Grid::get(opCtx)->shardRegistry()->getAllShardIds(&shardIds); + if (primaryDraining) { + shardIds.erase(std::remove(shardIds.begin(), shardIds.end(), primaryShardId), + shardIds.end()); + } } else { - shardIds.push_back(primaryShardId); + shardIds.push_back(getPrimaryOrFirstNonDrainingShard()); } } @@ -184,43 +253,6 @@ ChunkVersion createFirstChunks(OperationContext* opCtx, return version; } -void checkForExistingChunks(OperationContext* opCtx, const string& ns) { - BSONObjBuilder countBuilder; - countBuilder.append("count", NamespaceString(ChunkType::ConfigNS).coll()); - countBuilder.append("query", BSON(ChunkType::ns(ns))); - - // OK to use limit=1, since if any chunks exist, we will fail. - countBuilder.append("limit", 1); - - // Use readConcern local to guarantee we see any chunks that have been written and may - // become committed; readConcern majority will not see the chunks if they have not made it - // to the majority snapshot. - repl::ReadConcernArgs readConcern(repl::ReadConcernLevel::kLocalReadConcern); - readConcern.appendInfo(&countBuilder); - - auto cmdResponse = uassertStatusOK( - Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommandWithFixedRetryAttempts( - opCtx, - kConfigReadSelector, - NamespaceString(ChunkType::ConfigNS).db().toString(), - countBuilder.done(), - Shard::kDefaultConfigCommandTimeout, - Shard::RetryPolicy::kIdempotent)); - uassertStatusOK(cmdResponse.commandStatus); - - long long numChunks; - uassertStatusOK(bsonExtractIntegerField(cmdResponse.response, "n", &numChunks)); - uassert(ErrorCodes::ManualInterventionRequired, - str::stream() << "A previous attempt to shard collection " << ns - << " failed after writing some initial chunks to config.chunks. Please " - "manually delete the partially written chunks for collection " - << ns - << " from config.chunks", - numChunks == 0); -} - -} // namespace - void ShardingCatalogManager::shardCollection(OperationContext* opCtx, const string& ns, const boost::optional<UUID> uuid, @@ -266,7 +298,7 @@ void ShardingCatalogManager::shardCollection(OperationContext* opCtx, ->makeFromBSON(defaultCollation)); } - const auto& collVersion = createFirstChunks( + const auto& collVersion = _createFirstChunks( opCtx, nss, fieldsAndOrder, dbPrimaryShardId, initPoints, distributeInitialChunks); { |