summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml1
-rw-r--r--jstests/sharding/shard_collection_existing_zones.js48
-rw-r--r--src/mongo/db/s/config/initial_split_policy.cpp93
-rw-r--r--src/mongo/db/s/config/initial_split_policy.h4
-rw-r--r--src/mongo/db/s/config/initial_split_policy_test.cpp5
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp14
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp125
-rw-r--r--src/mongo/db/s/shardsvr_shard_collection.cpp15
8 files changed, 152 insertions, 153 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index 6648bac3175..5c89f344930 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -102,6 +102,7 @@ selector:
- jstests/sharding/validate_collection.js
# Enable if SERVER-36966 is backported to v3.6
- jstests/sharding/mr_output_sharded_validation.js
+ - jstests/sharding/shard_collection_existing_zones.js
executor:
config:
diff --git a/jstests/sharding/shard_collection_existing_zones.js b/jstests/sharding/shard_collection_existing_zones.js
index d6cb10c304e..8782e4e132b 100644
--- a/jstests/sharding/shard_collection_existing_zones.js
+++ b/jstests/sharding/shard_collection_existing_zones.js
@@ -128,6 +128,52 @@
assert.commandWorked(testDB.runCommand({drop: kCollName}));
}
+ /**
+ * Tests that a non-empty collection associated with zones can be sharded.
+ */
+ function testNonemptyZonedCollection() {
+ var shardKey = {x: 1};
+ var shards = configDB.shards.find().toArray();
+ var testColl = testDB.getCollection(kCollName);
+ var ranges = [
+ {min: {x: 0}, max: {x: 10}},
+ {min: {x: 10}, max: {x: 20}},
+ {min: {x: 20}, max: {x: 40}}
+ ];
+
+ for (let i = 0; i < 40; i++) {
+ assert.writeOK(testColl.insert({x: i}));
+ }
+
+ assert.commandWorked(testColl.createIndex(shardKey));
+
+ for (let i = 0; i < shards.length; i++) {
+ assert.commandWorked(
+ mongos.adminCommand({addShardToZone: shards[i]._id, zone: zoneName + i}));
+ assert.commandWorked(mongos.adminCommand({
+ updateZoneKeyRange: ns,
+ min: ranges[i].min,
+ max: ranges[i].max,
+ zone: zoneName + i
+ }));
+ }
+
+ assert.commandWorked(mongos.adminCommand({shardCollection: ns, key: shardKey}));
+
+ // Check that there is initially 1 chunk.
+ assert.eq(1, configDB.chunks.count({ns: ns}));
+
+ st.startBalancer();
+
+ // Check that the chunks were moved properly.
+ assert.soon(() => {
+ let res = configDB.chunks.count({ns: ns});
+ return res === 5;
+ }, 'balancer never ran', 10 * 60 * 1000, 1000);
+
+ assert.commandWorked(testDB.runCommand({drop: kCollName}));
+ }
+
// test that shardCollection checks that a zone is associated with a shard.
testShardZoneAssociationValidation({x: 1}, false, false);
@@ -152,5 +198,7 @@
testChunkSplits(false);
testChunkSplits(true);
+ testNonemptyZonedCollection();
+
st.stop();
})();
diff --git a/src/mongo/db/s/config/initial_split_policy.cpp b/src/mongo/db/s/config/initial_split_policy.cpp
index b48701bfd7a..5d6a4b2b5bc 100644
--- a/src/mongo/db/s/config/initial_split_policy.cpp
+++ b/src/mongo/db/s/config/initial_split_policy.cpp
@@ -207,7 +207,8 @@ InitialSplitPolicy::generateShardCollectionInitialZonedChunks(
const Timestamp& validAfter,
const std::vector<TagsType>& tags,
const StringMap<std::vector<ShardId>>& tagToShards,
- const std::vector<ShardId>& allShardIds) {
+ const std::vector<ShardId>& allShardIds,
+ const bool isEmpty) {
invariant(!allShardIds.empty());
invariant(!tags.empty());
@@ -218,18 +219,31 @@ InitialSplitPolicy::generateShardCollectionInitialZonedChunks(
std::vector<ChunkType> chunks;
- for (const auto& tag : tags) {
- if (tag.getMinKey().woCompare(lastChunkMax) > 0) {
- // create a chunk for the hole between zones
- const ShardId shardId = allShardIds[indx++ % allShardIds.size()];
- appendChunk(nss, lastChunkMax, tag.getMinKey(), &version, validAfter, shardId, &chunks);
- }
-
- // check that this tag is associated with a shard and if so create a chunk for the zone.
- const auto it = tagToShards.find(tag.getTag());
- invariant(it != tagToShards.end());
- const auto& shardIdsForChunk = it->second;
- uassert(50973,
+ if (!isEmpty) {
+ // For a non-empty collection, create one chunk on the primary shard and leave it to the
+ // balancer to do the final zone partitioning/rebalancing.
+ appendChunk(nss,
+ keyPattern.globalMin(),
+ keyPattern.globalMax(),
+ &version,
+ validAfter,
+ allShardIds[0],
+ &chunks);
+ } else {
+ for (const auto& tag : tags) {
+ if (tag.getMinKey().woCompare(lastChunkMax) > 0) {
+ // create a chunk for the hole between zones
+ const ShardId shardId = allShardIds[indx++ % allShardIds.size()];
+ appendChunk(
+ nss, lastChunkMax, tag.getMinKey(), &version, validAfter, shardId, &chunks);
+ }
+
+ // check that this tag is associated with a shard and if so create a chunk for the zone.
+ const auto it = tagToShards.find(tag.getTag());
+ invariant(it != tagToShards.end());
+ const auto& shardIdsForChunk = it->second;
+ uassert(
+ 50973,
str::stream()
<< "cannot shard collection "
<< nss.ns()
@@ -238,21 +252,22 @@ InitialSplitPolicy::generateShardCollectionInitialZonedChunks(
<< " which is not associated with a shard. please add this zone to a shard.",
!shardIdsForChunk.empty());
- appendChunk(nss,
- tag.getMinKey(),
- tag.getMaxKey(),
- &version,
- validAfter,
- shardIdsForChunk[0],
- &chunks);
- lastChunkMax = tag.getMaxKey();
- }
+ appendChunk(nss,
+ tag.getMinKey(),
+ tag.getMaxKey(),
+ &version,
+ validAfter,
+ shardIdsForChunk[0],
+ &chunks);
+ lastChunkMax = tag.getMaxKey();
+ }
- if (lastChunkMax.woCompare(keyPattern.globalMax()) < 0) {
- // existing zones do not span to $maxKey so create a chunk for that
- const ShardId shardId = allShardIds[indx++ % allShardIds.size()];
- appendChunk(
- nss, lastChunkMax, keyPattern.globalMax(), &version, validAfter, shardId, &chunks);
+ if (lastChunkMax.woCompare(keyPattern.globalMax()) < 0) {
+ // existing zones do not span to $maxKey so create a chunk for that
+ const ShardId shardId = allShardIds[indx++ % allShardIds.size()];
+ appendChunk(
+ nss, lastChunkMax, keyPattern.globalMax(), &version, validAfter, shardId, &chunks);
+ }
}
log() << "Created " << chunks.size() << " chunk(s) for: " << nss << " using new epoch "
@@ -269,6 +284,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks(
const std::vector<BSONObj>& splitPoints,
const std::vector<TagsType>& tags,
const bool distributeInitialChunks,
+ const bool isEmpty,
const int numContiguousChunksPerShard) {
const auto& keyPattern = shardKeyPattern.getKeyPattern();
@@ -281,22 +297,11 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks(
auto primaryShard =
uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, primaryShardId));
- auto result = uassertStatusOK(primaryShard->runCommandWithFixedRetryAttempts(
- opCtx,
- ReadPreferenceSetting{ReadPreference::PrimaryPreferred},
- nss.db().toString(),
- BSON("count" << nss.coll()),
- Shard::RetryPolicy::kIdempotent));
-
- long long numObjects = 0;
- uassertStatusOK(result.commandStatus);
- uassertStatusOK(bsonExtractIntegerField(result.response, "n", &numObjects));
-
// Refresh the balancer settings to ensure the chunk size setting, which is sent as part of
// the splitVector command and affects the number of chunks returned, has been loaded.
uassertStatusOK(Grid::get(opCtx)->getBalancerConfiguration()->refreshAndCheck(opCtx));
- if (numObjects > 0) {
+ if (!isEmpty) {
finalSplitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints(
opCtx,
primaryShardId,
@@ -309,7 +314,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks(
// If docs already exist for the collection, must use primary shard,
// otherwise defer to passed-in distribution option.
- if (numObjects == 0 && distributeInitialChunks) {
+ if (isEmpty && distributeInitialChunks) {
Grid::get(opCtx)->shardRegistry()->getAllShardIdsNoReload(&shardIds);
} else {
shardIds.push_back(primaryShardId);
@@ -348,7 +353,13 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::createFirstChunks(
shardIds,
numContiguousChunksPerShard)
: InitialSplitPolicy::generateShardCollectionInitialZonedChunks(
- nss, shardKeyPattern, validAfter, tags, getTagToShardIds(opCtx, tags), shardIds);
+ nss,
+ shardKeyPattern,
+ validAfter,
+ tags,
+ getTagToShardIds(opCtx, tags),
+ shardIds,
+ isEmpty);
return initialChunks;
}
diff --git a/src/mongo/db/s/config/initial_split_policy.h b/src/mongo/db/s/config/initial_split_policy.h
index 6a2ca2d2a31..16cb2e82b24 100644
--- a/src/mongo/db/s/config/initial_split_policy.h
+++ b/src/mongo/db/s/config/initial_split_policy.h
@@ -105,7 +105,8 @@ public:
const Timestamp& validAfter,
const std::vector<TagsType>& tags,
const StringMap<std::vector<ShardId>>& tagToShards,
- const std::vector<ShardId>& allShardIds);
+ const std::vector<ShardId>& allShardIds,
+ const bool isEmpty);
/**
* Creates the first chunks for a newly sharded collection.
@@ -118,6 +119,7 @@ public:
const std::vector<BSONObj>& splitPoints,
const std::vector<TagsType>& tags,
const bool distributeInitialChunks,
+ const bool isEmpty,
const int numContiguousChunksPerShard = 1);
/**
diff --git a/src/mongo/db/s/config/initial_split_policy_test.cpp b/src/mongo/db/s/config/initial_split_policy_test.cpp
index 7ff8e44fef2..049f5b16892 100644
--- a/src/mongo/db/s/config/initial_split_policy_test.cpp
+++ b/src/mongo/db/s/config/initial_split_policy_test.cpp
@@ -281,7 +281,8 @@ public:
timeStamp(),
tags,
makeTagToShards(numShards),
- makeShardIds(numShards));
+ makeShardIds(numShards),
+ true);
const std::vector<ChunkType> expectedChunks =
makeChunks(expectedChunkRanges, expectedShardIds);
assertChunkVectorsAreEqual(expectedChunks, shardCollectionConfig.chunks);
@@ -431,7 +432,7 @@ TEST_F(GenerateShardCollectionInitialZonedChunksTest, ZoneNotAssociatedWithAnySh
ASSERT_THROWS_CODE(
InitialSplitPolicy::generateShardCollectionInitialZonedChunks(
- nss(), shardKeyPattern(), timeStamp(), tags, tagToShards, makeShardIds(1)),
+ nss(), shardKeyPattern(), timeStamp(), tags, tagToShards, makeShardIds(1), true),
AssertionException,
50973);
}
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
index a8d0bc99b2e..71106b3a396 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
@@ -401,8 +401,18 @@ void ShardingCatalogManager::shardCollection(OperationContext* opCtx,
}
std::vector<TagsType> tags;
- const auto initialChunks = InitialSplitPolicy::createFirstChunks(
- opCtx, nss, fieldsAndOrder, dbPrimaryShardId, splitPoints, tags, distributeInitialChunks);
+ // Since this code runs on the config server, we cannot guarantee that the collection is still
+ // empty by the time the metadata is written so always assume we are sharding a non-empty
+ // collection.
+ bool isEmpty = false;
+ const auto initialChunks = InitialSplitPolicy::createFirstChunks(opCtx,
+ nss,
+ fieldsAndOrder,
+ dbPrimaryShardId,
+ splitPoints,
+ tags,
+ distributeInitialChunks,
+ isEmpty);
InitialSplitPolicy::writeFirstChunksToConfig(opCtx, initialChunks);
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
index 62b24c06268..4b625547aa6 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
@@ -79,33 +79,29 @@ const NamespaceString kNamespace("db1.foo");
class ShardCollectionTest : public ConfigServerTestFixture {
public:
- void expectCount(const HostAndPort& receivingHost,
- const NamespaceString& expectedNss,
- const BSONObj& expectedQuery,
- const StatusWith<long long>& response) {
+ void expectSplitVector(const HostAndPort& shardHost,
+ const ShardKeyPattern& keyPattern,
+ const BSONObj& splitPoints) {
onCommand([&](const RemoteCommandRequest& request) {
- ASSERT_EQUALS(receivingHost, request.target);
+ ASSERT_EQUALS(shardHost, request.target);
string cmdName = request.cmdObj.firstElement().fieldName();
-
- ASSERT_EQUALS("count", cmdName);
-
- const NamespaceString nss(request.dbname, request.cmdObj.firstElement().String());
- ASSERT_EQUALS(expectedNss, nss);
-
- if (expectedQuery.isEmpty()) {
- auto queryElem = request.cmdObj["query"];
- ASSERT_TRUE(queryElem.eoo() || queryElem.Obj().isEmpty());
- } else {
- ASSERT_BSONOBJ_EQ(expectedQuery, request.cmdObj["query"].Obj());
- }
-
- if (response.isOK()) {
- return BSON("ok" << 1 << "n" << response.getValue());
- }
-
- BSONObjBuilder responseBuilder;
- CommandHelpers::appendCommandStatusNoThrow(responseBuilder, response.getStatus());
- return responseBuilder.obj();
+ ASSERT_EQUALS("splitVector", cmdName);
+ ASSERT_EQUALS(kNamespace.ns(),
+ request.cmdObj["splitVector"].String()); // splitVector uses full ns
+
+ ASSERT_BSONOBJ_EQ(keyPattern.toBSON(), request.cmdObj["keyPattern"].Obj());
+ ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMin(), request.cmdObj["min"].Obj());
+ ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMax(), request.cmdObj["max"].Obj());
+ ASSERT_EQUALS(64 * 1024 * 1024ULL,
+ static_cast<uint64_t>(request.cmdObj["maxChunkSizeBytes"].numberLong()));
+ ASSERT_EQUALS(0, request.cmdObj["maxSplitPoints"].numberLong());
+ ASSERT_EQUALS(0, request.cmdObj["maxChunkObjects"].numberLong());
+
+ ASSERT_BSONOBJ_EQ(
+ ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ok" << 1 << "splitKeys" << splitPoints);
});
}
@@ -187,8 +183,8 @@ TEST_F(ShardCollectionTest, noInitialChunksOrData) {
testPrimaryShard);
});
- // Report that no documents exist for the given collection on the primary shard
- expectCount(shardHost, kNamespace, BSONObj(), 0);
+ // Respond to the splitVector command sent to the shard to figure out initial split points.
+ expectSplitVector(shardHost, shardKeyPattern, BSONObj());
// Expect the set shard version for that namespace.
// We do not check for a specific ChunkVersion, because we cannot easily know the OID that was
@@ -343,50 +339,6 @@ TEST_F(ShardCollectionTest, withInitialData) {
BSONObj splitPoint2 = BSON("_id" << 200);
BSONObj splitPoint3 = BSON("_id" << 300);
- ChunkVersion expectedVersion(1, 0, OID::gen());
-
- ChunkType expectedChunk0;
- expectedChunk0.setNS(kNamespace);
- expectedChunk0.setShard(shard.getName());
- expectedChunk0.setMin(keyPattern.getKeyPattern().globalMin());
- expectedChunk0.setMax(splitPoint0);
- expectedChunk0.setVersion(expectedVersion);
- expectedVersion.incMinor();
-
- ChunkType expectedChunk1;
- expectedChunk1.setNS(kNamespace);
- expectedChunk1.setShard(shard.getName());
- expectedChunk1.setMin(splitPoint0);
- expectedChunk1.setMax(splitPoint1);
- expectedChunk1.setVersion(expectedVersion);
- expectedVersion.incMinor();
-
- ChunkType expectedChunk2;
- expectedChunk2.setNS(kNamespace);
- expectedChunk2.setShard(shard.getName());
- expectedChunk2.setMin(splitPoint1);
- expectedChunk2.setMax(splitPoint2);
- expectedChunk2.setVersion(expectedVersion);
- expectedVersion.incMinor();
-
- ChunkType expectedChunk3;
- expectedChunk3.setNS(kNamespace);
- expectedChunk3.setShard(shard.getName());
- expectedChunk3.setMin(splitPoint2);
- expectedChunk3.setMax(splitPoint3);
- expectedChunk3.setVersion(expectedVersion);
- expectedVersion.incMinor();
-
- ChunkType expectedChunk4;
- expectedChunk4.setNS(kNamespace);
- expectedChunk4.setShard(shard.getName());
- expectedChunk4.setMin(splitPoint3);
- expectedChunk4.setMax(keyPattern.getKeyPattern().globalMax());
- expectedChunk4.setVersion(expectedVersion);
-
- vector<ChunkType> expectedChunks{
- expectedChunk0, expectedChunk1, expectedChunk2, expectedChunk3, expectedChunk4};
-
BSONObj defaultCollation;
// Now start actually sharding the collection.
@@ -406,33 +358,10 @@ TEST_F(ShardCollectionTest, withInitialData) {
testPrimaryShard);
});
- // Report that documents exist for the given collection on the primary shard, so that calling
- // splitVector is required for calculating the initial split points.
- expectCount(shardHost, kNamespace, BSONObj(), 1000);
-
- // Respond to the splitVector command sent to the shard to figure out initial split points
- onCommand([&](const RemoteCommandRequest& request) {
- ASSERT_EQUALS(shardHost, request.target);
- string cmdName = request.cmdObj.firstElement().fieldName();
- ASSERT_EQUALS("splitVector", cmdName);
- ASSERT_EQUALS(kNamespace.ns(),
- request.cmdObj["splitVector"].String()); // splitVector uses full ns
-
- ASSERT_BSONOBJ_EQ(keyPattern.toBSON(), request.cmdObj["keyPattern"].Obj());
- ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMin(), request.cmdObj["min"].Obj());
- ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMax(), request.cmdObj["max"].Obj());
- ASSERT_EQUALS(64 * 1024 * 1024ULL,
- static_cast<uint64_t>(request.cmdObj["maxChunkSizeBytes"].numberLong()));
- ASSERT_EQUALS(0, request.cmdObj["maxSplitPoints"].numberLong());
- ASSERT_EQUALS(0, request.cmdObj["maxChunkObjects"].numberLong());
-
- ASSERT_BSONOBJ_EQ(
- ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(),
- rpc::TrackingMetadata::removeTrackingData(request.metadata));
-
- return BSON("ok" << 1 << "splitKeys"
- << BSON_ARRAY(splitPoint0 << splitPoint1 << splitPoint2 << splitPoint3));
- });
+ // Respond to the splitVector command sent to the shard to figure out initial split points.
+ expectSplitVector(shardHost,
+ keyPattern,
+ BSON_ARRAY(splitPoint0 << splitPoint1 << splitPoint2 << splitPoint3));
// Expect the set shard version for that namespace
// We do not check for a specific ChunkVersion, because we cannot easily know the OID that was
diff --git a/src/mongo/db/s/shardsvr_shard_collection.cpp b/src/mongo/db/s/shardsvr_shard_collection.cpp
index d4fc29f23ed..829ffe23499 100644
--- a/src/mongo/db/s/shardsvr_shard_collection.cpp
+++ b/src/mongo/db/s/shardsvr_shard_collection.cpp
@@ -407,7 +407,8 @@ void shardCollection(OperationContext* opCtx,
const std::vector<TagsType>& tags,
const bool fromMapReduce,
const ShardId& dbPrimaryShardId,
- const int numContiguousChunksPerShard) {
+ const int numContiguousChunksPerShard,
+ const bool isEmpty) {
const auto catalogClient = Grid::get(opCtx)->catalogClient();
const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
@@ -449,6 +450,7 @@ void shardCollection(OperationContext* opCtx,
splitPoints,
tags,
distributeChunks,
+ isEmpty,
numContiguousChunksPerShard);
// Create collections on all shards that will receive chunks. We need to do this after we mark
@@ -644,13 +646,7 @@ public:
if (request.getInitialSplitPoints()) {
finalSplitPoints = std::move(*request.getInitialSplitPoints());
- } else if (!tags.empty()) {
- // no need to find split points since we will create chunks based on
- // the existing zones
- uassert(ErrorCodes::InvalidOptions,
- str::stream() << "found existing zones but the collection is not empty",
- isEmpty);
- } else {
+ } else if (tags.empty()) {
InitialSplitPolicy::calculateHashedSplitPointsForEmptyCollection(
shardKeyPattern,
isEmpty,
@@ -691,7 +687,8 @@ public:
tags,
fromMapReduce,
ShardingState::get(opCtx)->shardId(),
- numContiguousChunksPerShard);
+ numContiguousChunksPerShard,
+ isEmpty);
status = Status::OK();
} catch (const DBException& e) {