From 5a4fa1d1d7bb23939c104432bb8297778a69e7cc Mon Sep 17 00:00:00 2001 From: Kim Tao Date: Sun, 9 Dec 2018 17:38:45 -0500 Subject: SERVER-38392: remove assertion that we can't shard a non-empty collection associated with tags (cherry picked from commit 778f905b2905c00b6f394d8db6e7d12e87d7ad3d) --- ...harding_last_stable_mongos_and_mixed_shards.yml | 1 + .../sharding/shard_collection_existing_zones.js | 48 ++++++++ src/mongo/db/s/config/initial_split_policy.cpp | 93 ++++++++------- src/mongo/db/s/config/initial_split_policy.h | 4 +- .../db/s/config/initial_split_policy_test.cpp | 5 +- ...rding_catalog_manager_collection_operations.cpp | 14 ++- ...rding_catalog_manager_shard_collection_test.cpp | 125 +++++---------------- src/mongo/db/s/shardsvr_shard_collection.cpp | 15 +-- 8 files changed, 152 insertions(+), 153 deletions(-) diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml index 6648bac3175..5c89f344930 100644 --- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml +++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml @@ -102,6 +102,7 @@ selector: - jstests/sharding/validate_collection.js # Enable if SERVER-36966 is backported to v3.6 - jstests/sharding/mr_output_sharded_validation.js + - jstests/sharding/shard_collection_existing_zones.js executor: config: diff --git a/jstests/sharding/shard_collection_existing_zones.js b/jstests/sharding/shard_collection_existing_zones.js index d6cb10c304e..8782e4e132b 100644 --- a/jstests/sharding/shard_collection_existing_zones.js +++ b/jstests/sharding/shard_collection_existing_zones.js @@ -128,6 +128,52 @@ assert.commandWorked(testDB.runCommand({drop: kCollName})); } + /** + * Tests that a non-empty collection associated with zones can be sharded. + */ + function testNonemptyZonedCollection() { + var shardKey = {x: 1}; + var shards = configDB.shards.find().toArray(); + var testColl = testDB.getCollection(kCollName); + var ranges = [ + {min: {x: 0}, max: {x: 10}}, + {min: {x: 10}, max: {x: 20}}, + {min: {x: 20}, max: {x: 40}} + ]; + + for (let i = 0; i < 40; i++) { + assert.writeOK(testColl.insert({x: i})); + } + + assert.commandWorked(testColl.createIndex(shardKey)); + + for (let i = 0; i < shards.length; i++) { + assert.commandWorked( + mongos.adminCommand({addShardToZone: shards[i]._id, zone: zoneName + i})); + assert.commandWorked(mongos.adminCommand({ + updateZoneKeyRange: ns, + min: ranges[i].min, + max: ranges[i].max, + zone: zoneName + i + })); + } + + assert.commandWorked(mongos.adminCommand({shardCollection: ns, key: shardKey})); + + // Check that there is initially 1 chunk. + assert.eq(1, configDB.chunks.count({ns: ns})); + + st.startBalancer(); + + // Check that the chunks were moved properly. + assert.soon(() => { + let res = configDB.chunks.count({ns: ns}); + return res === 5; + }, 'balancer never ran', 10 * 60 * 1000, 1000); + + assert.commandWorked(testDB.runCommand({drop: kCollName})); + } + // test that shardCollection checks that a zone is associated with a shard. testShardZoneAssociationValidation({x: 1}, false, false); @@ -152,5 +198,7 @@ testChunkSplits(false); testChunkSplits(true); + testNonemptyZonedCollection(); + st.stop(); })(); diff --git a/src/mongo/db/s/config/initial_split_policy.cpp b/src/mongo/db/s/config/initial_split_policy.cpp index b48701bfd7a..5d6a4b2b5bc 100644 --- a/src/mongo/db/s/config/initial_split_policy.cpp +++ b/src/mongo/db/s/config/initial_split_policy.cpp @@ -207,7 +207,8 @@ InitialSplitPolicy::generateShardCollectionInitialZonedChunks( const Timestamp& validAfter, const std::vector& tags, const StringMap>& tagToShards, - const std::vector& allShardIds) { + const std::vector& allShardIds, + const bool isEmpty) { invariant(!allShardIds.empty()); invariant(!tags.empty()); @@ -218,18 +219,31 @@ InitialSplitPolicy::generateShardCollectionInitialZonedChunks( std::vector chunks; - for (const auto& tag : tags) { - if (tag.getMinKey().woCompare(lastChunkMax) > 0) { - // create a chunk for the hole between zones - const ShardId shardId = allShardIds[indx++ % allShardIds.size()]; - appendChunk(nss, lastChunkMax, tag.getMinKey(), &version, validAfter, shardId, &chunks); - } - - // check that this tag is associated with a shard and if so create a chunk for the zone. - const auto it = tagToShards.find(tag.getTag()); - invariant(it != tagToShards.end()); - const auto& shardIdsForChunk = it->second; - uassert(50973, + if (!isEmpty) { + // For a non-empty collection, create one chunk on the primary shard and leave it to the + // balancer to do the final zone partitioning/rebalancing. + appendChunk(nss, + keyPattern.globalMin(), + keyPattern.globalMax(), + &version, + validAfter, + allShardIds[0], + &chunks); + } else { + for (const auto& tag : tags) { + if (tag.getMinKey().woCompare(lastChunkMax) > 0) { + // create a chunk for the hole between zones + const ShardId shardId = allShardIds[indx++ % allShardIds.size()]; + appendChunk( + nss, lastChunkMax, tag.getMinKey(), &version, validAfter, shardId, &chunks); + } + + // check that this tag is associated with a shard and if so create a chunk for the zone. + const auto it = tagToShards.find(tag.getTag()); + invariant(it != tagToShards.end()); + const auto& shardIdsForChunk = it->second; + uassert( + 50973, str::stream() << "cannot shard collection " << nss.ns() @@ -238,21 +252,22 @@ InitialSplitPolicy::generateShardCollectionInitialZonedChunks( << " which is not associated with a shard. please add this zone to a shard.", !shardIdsForChunk.empty()); - appendChunk(nss, - tag.getMinKey(), - tag.getMaxKey(), - &version, - validAfter, - shardIdsForChunk[0], - &chunks); - lastChunkMax = tag.getMaxKey(); - } + appendChunk(nss, + tag.getMinKey(), + tag.getMaxKey(), + &version, + validAfter, + shardIdsForChunk[0], + &chunks); + lastChunkMax = tag.getMaxKey(); + } - if (lastChunkMax.woCompare(keyPattern.globalMax()) < 0) { - // existing zones do not span to $maxKey so create a chunk for that - const ShardId shardId = allShardIds[indx++ % allShardIds.size()]; - appendChunk( - nss, lastChunkMax, keyPattern.globalMax(), &version, validAfter, shardId, &chunks); + if (lastChunkMax.woCompare(keyPattern.globalMax()) < 0) { + // existing zones do not span to $maxKey so create a chunk for that + const ShardId shardId = allShardIds[indx++ % allShardIds.size()]; + appendChunk( + nss, lastChunkMax, keyPattern.globalMax(), &version, validAfter, shardId, &chunks); + } } log() << "Created " << chunks.size() << " chunk(s) for: " << nss << " using new epoch " @@ -269,6 +284,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks( const std::vector& splitPoints, const std::vector& tags, const bool distributeInitialChunks, + const bool isEmpty, const int numContiguousChunksPerShard) { const auto& keyPattern = shardKeyPattern.getKeyPattern(); @@ -281,22 +297,11 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks( auto primaryShard = uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, primaryShardId)); - auto result = uassertStatusOK(primaryShard->runCommandWithFixedRetryAttempts( - opCtx, - ReadPreferenceSetting{ReadPreference::PrimaryPreferred}, - nss.db().toString(), - BSON("count" << nss.coll()), - Shard::RetryPolicy::kIdempotent)); - - long long numObjects = 0; - uassertStatusOK(result.commandStatus); - uassertStatusOK(bsonExtractIntegerField(result.response, "n", &numObjects)); - // Refresh the balancer settings to ensure the chunk size setting, which is sent as part of // the splitVector command and affects the number of chunks returned, has been loaded. uassertStatusOK(Grid::get(opCtx)->getBalancerConfiguration()->refreshAndCheck(opCtx)); - if (numObjects > 0) { + if (!isEmpty) { finalSplitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints( opCtx, primaryShardId, @@ -309,7 +314,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks( // If docs already exist for the collection, must use primary shard, // otherwise defer to passed-in distribution option. - if (numObjects == 0 && distributeInitialChunks) { + if (isEmpty && distributeInitialChunks) { Grid::get(opCtx)->shardRegistry()->getAllShardIdsNoReload(&shardIds); } else { shardIds.push_back(primaryShardId); @@ -348,7 +353,13 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks( shardIds, numContiguousChunksPerShard) : InitialSplitPolicy::generateShardCollectionInitialZonedChunks( - nss, shardKeyPattern, validAfter, tags, getTagToShardIds(opCtx, tags), shardIds); + nss, + shardKeyPattern, + validAfter, + tags, + getTagToShardIds(opCtx, tags), + shardIds, + isEmpty); return initialChunks; } diff --git a/src/mongo/db/s/config/initial_split_policy.h b/src/mongo/db/s/config/initial_split_policy.h index 6a2ca2d2a31..16cb2e82b24 100644 --- a/src/mongo/db/s/config/initial_split_policy.h +++ b/src/mongo/db/s/config/initial_split_policy.h @@ -105,7 +105,8 @@ public: const Timestamp& validAfter, const std::vector& tags, const StringMap>& tagToShards, - const std::vector& allShardIds); + const std::vector& allShardIds, + const bool isEmpty); /** * Creates the first chunks for a newly sharded collection. @@ -118,6 +119,7 @@ public: const std::vector& splitPoints, const std::vector& tags, const bool distributeInitialChunks, + const bool isEmpty, const int numContiguousChunksPerShard = 1); /** diff --git a/src/mongo/db/s/config/initial_split_policy_test.cpp b/src/mongo/db/s/config/initial_split_policy_test.cpp index 7ff8e44fef2..049f5b16892 100644 --- a/src/mongo/db/s/config/initial_split_policy_test.cpp +++ b/src/mongo/db/s/config/initial_split_policy_test.cpp @@ -281,7 +281,8 @@ public: timeStamp(), tags, makeTagToShards(numShards), - makeShardIds(numShards)); + makeShardIds(numShards), + true); const std::vector expectedChunks = makeChunks(expectedChunkRanges, expectedShardIds); assertChunkVectorsAreEqual(expectedChunks, shardCollectionConfig.chunks); @@ -431,7 +432,7 @@ TEST_F(GenerateShardCollectionInitialZonedChunksTest, ZoneNotAssociatedWithAnySh ASSERT_THROWS_CODE( InitialSplitPolicy::generateShardCollectionInitialZonedChunks( - nss(), shardKeyPattern(), timeStamp(), tags, tagToShards, makeShardIds(1)), + nss(), shardKeyPattern(), timeStamp(), tags, tagToShards, makeShardIds(1), true), AssertionException, 50973); } diff --git a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp index a8d0bc99b2e..71106b3a396 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp @@ -401,8 +401,18 @@ void ShardingCatalogManager::shardCollection(OperationContext* opCtx, } std::vector tags; - const auto initialChunks = InitialSplitPolicy::createFirstChunks( - opCtx, nss, fieldsAndOrder, dbPrimaryShardId, splitPoints, tags, distributeInitialChunks); + // Since this code runs on the config server, we cannot guarantee that the collection is still + // empty by the time the metadata is written so always assume we are sharding a non-empty + // collection. + bool isEmpty = false; + const auto initialChunks = InitialSplitPolicy::createFirstChunks(opCtx, + nss, + fieldsAndOrder, + dbPrimaryShardId, + splitPoints, + tags, + distributeInitialChunks, + isEmpty); InitialSplitPolicy::writeFirstChunksToConfig(opCtx, initialChunks); diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp index 62b24c06268..4b625547aa6 100644 --- a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp +++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp @@ -79,33 +79,29 @@ const NamespaceString kNamespace("db1.foo"); class ShardCollectionTest : public ConfigServerTestFixture { public: - void expectCount(const HostAndPort& receivingHost, - const NamespaceString& expectedNss, - const BSONObj& expectedQuery, - const StatusWith& response) { + void expectSplitVector(const HostAndPort& shardHost, + const ShardKeyPattern& keyPattern, + const BSONObj& splitPoints) { onCommand([&](const RemoteCommandRequest& request) { - ASSERT_EQUALS(receivingHost, request.target); + ASSERT_EQUALS(shardHost, request.target); string cmdName = request.cmdObj.firstElement().fieldName(); - - ASSERT_EQUALS("count", cmdName); - - const NamespaceString nss(request.dbname, request.cmdObj.firstElement().String()); - ASSERT_EQUALS(expectedNss, nss); - - if (expectedQuery.isEmpty()) { - auto queryElem = request.cmdObj["query"]; - ASSERT_TRUE(queryElem.eoo() || queryElem.Obj().isEmpty()); - } else { - ASSERT_BSONOBJ_EQ(expectedQuery, request.cmdObj["query"].Obj()); - } - - if (response.isOK()) { - return BSON("ok" << 1 << "n" << response.getValue()); - } - - BSONObjBuilder responseBuilder; - CommandHelpers::appendCommandStatusNoThrow(responseBuilder, response.getStatus()); - return responseBuilder.obj(); + ASSERT_EQUALS("splitVector", cmdName); + ASSERT_EQUALS(kNamespace.ns(), + request.cmdObj["splitVector"].String()); // splitVector uses full ns + + ASSERT_BSONOBJ_EQ(keyPattern.toBSON(), request.cmdObj["keyPattern"].Obj()); + ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMin(), request.cmdObj["min"].Obj()); + ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMax(), request.cmdObj["max"].Obj()); + ASSERT_EQUALS(64 * 1024 * 1024ULL, + static_cast(request.cmdObj["maxChunkSizeBytes"].numberLong())); + ASSERT_EQUALS(0, request.cmdObj["maxSplitPoints"].numberLong()); + ASSERT_EQUALS(0, request.cmdObj["maxChunkObjects"].numberLong()); + + ASSERT_BSONOBJ_EQ( + ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ok" << 1 << "splitKeys" << splitPoints); }); } @@ -187,8 +183,8 @@ TEST_F(ShardCollectionTest, noInitialChunksOrData) { testPrimaryShard); }); - // Report that no documents exist for the given collection on the primary shard - expectCount(shardHost, kNamespace, BSONObj(), 0); + // Respond to the splitVector command sent to the shard to figure out initial split points. + expectSplitVector(shardHost, shardKeyPattern, BSONObj()); // Expect the set shard version for that namespace. // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was @@ -343,50 +339,6 @@ TEST_F(ShardCollectionTest, withInitialData) { BSONObj splitPoint2 = BSON("_id" << 200); BSONObj splitPoint3 = BSON("_id" << 300); - ChunkVersion expectedVersion(1, 0, OID::gen()); - - ChunkType expectedChunk0; - expectedChunk0.setNS(kNamespace); - expectedChunk0.setShard(shard.getName()); - expectedChunk0.setMin(keyPattern.getKeyPattern().globalMin()); - expectedChunk0.setMax(splitPoint0); - expectedChunk0.setVersion(expectedVersion); - expectedVersion.incMinor(); - - ChunkType expectedChunk1; - expectedChunk1.setNS(kNamespace); - expectedChunk1.setShard(shard.getName()); - expectedChunk1.setMin(splitPoint0); - expectedChunk1.setMax(splitPoint1); - expectedChunk1.setVersion(expectedVersion); - expectedVersion.incMinor(); - - ChunkType expectedChunk2; - expectedChunk2.setNS(kNamespace); - expectedChunk2.setShard(shard.getName()); - expectedChunk2.setMin(splitPoint1); - expectedChunk2.setMax(splitPoint2); - expectedChunk2.setVersion(expectedVersion); - expectedVersion.incMinor(); - - ChunkType expectedChunk3; - expectedChunk3.setNS(kNamespace); - expectedChunk3.setShard(shard.getName()); - expectedChunk3.setMin(splitPoint2); - expectedChunk3.setMax(splitPoint3); - expectedChunk3.setVersion(expectedVersion); - expectedVersion.incMinor(); - - ChunkType expectedChunk4; - expectedChunk4.setNS(kNamespace); - expectedChunk4.setShard(shard.getName()); - expectedChunk4.setMin(splitPoint3); - expectedChunk4.setMax(keyPattern.getKeyPattern().globalMax()); - expectedChunk4.setVersion(expectedVersion); - - vector expectedChunks{ - expectedChunk0, expectedChunk1, expectedChunk2, expectedChunk3, expectedChunk4}; - BSONObj defaultCollation; // Now start actually sharding the collection. @@ -406,33 +358,10 @@ TEST_F(ShardCollectionTest, withInitialData) { testPrimaryShard); }); - // Report that documents exist for the given collection on the primary shard, so that calling - // splitVector is required for calculating the initial split points. - expectCount(shardHost, kNamespace, BSONObj(), 1000); - - // Respond to the splitVector command sent to the shard to figure out initial split points - onCommand([&](const RemoteCommandRequest& request) { - ASSERT_EQUALS(shardHost, request.target); - string cmdName = request.cmdObj.firstElement().fieldName(); - ASSERT_EQUALS("splitVector", cmdName); - ASSERT_EQUALS(kNamespace.ns(), - request.cmdObj["splitVector"].String()); // splitVector uses full ns - - ASSERT_BSONOBJ_EQ(keyPattern.toBSON(), request.cmdObj["keyPattern"].Obj()); - ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMin(), request.cmdObj["min"].Obj()); - ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMax(), request.cmdObj["max"].Obj()); - ASSERT_EQUALS(64 * 1024 * 1024ULL, - static_cast(request.cmdObj["maxChunkSizeBytes"].numberLong())); - ASSERT_EQUALS(0, request.cmdObj["maxSplitPoints"].numberLong()); - ASSERT_EQUALS(0, request.cmdObj["maxChunkObjects"].numberLong()); - - ASSERT_BSONOBJ_EQ( - ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(), - rpc::TrackingMetadata::removeTrackingData(request.metadata)); - - return BSON("ok" << 1 << "splitKeys" - << BSON_ARRAY(splitPoint0 << splitPoint1 << splitPoint2 << splitPoint3)); - }); + // Respond to the splitVector command sent to the shard to figure out initial split points. + expectSplitVector(shardHost, + keyPattern, + BSON_ARRAY(splitPoint0 << splitPoint1 << splitPoint2 << splitPoint3)); // Expect the set shard version for that namespace // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was diff --git a/src/mongo/db/s/shardsvr_shard_collection.cpp b/src/mongo/db/s/shardsvr_shard_collection.cpp index d4fc29f23ed..829ffe23499 100644 --- a/src/mongo/db/s/shardsvr_shard_collection.cpp +++ b/src/mongo/db/s/shardsvr_shard_collection.cpp @@ -407,7 +407,8 @@ void shardCollection(OperationContext* opCtx, const std::vector& tags, const bool fromMapReduce, const ShardId& dbPrimaryShardId, - const int numContiguousChunksPerShard) { + const int numContiguousChunksPerShard, + const bool isEmpty) { const auto catalogClient = Grid::get(opCtx)->catalogClient(); const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); @@ -449,6 +450,7 @@ void shardCollection(OperationContext* opCtx, splitPoints, tags, distributeChunks, + isEmpty, numContiguousChunksPerShard); // Create collections on all shards that will receive chunks. We need to do this after we mark @@ -644,13 +646,7 @@ public: if (request.getInitialSplitPoints()) { finalSplitPoints = std::move(*request.getInitialSplitPoints()); - } else if (!tags.empty()) { - // no need to find split points since we will create chunks based on - // the existing zones - uassert(ErrorCodes::InvalidOptions, - str::stream() << "found existing zones but the collection is not empty", - isEmpty); - } else { + } else if (tags.empty()) { InitialSplitPolicy::calculateHashedSplitPointsForEmptyCollection( shardKeyPattern, isEmpty, @@ -691,7 +687,8 @@ public: tags, fromMapReduce, ShardingState::get(opCtx)->shardId(), - numContiguousChunksPerShard); + numContiguousChunksPerShard, + isEmpty); status = Status::OK(); } catch (const DBException& e) { -- cgit v1.2.1