summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Saltz <matthew.saltz@mongodb.com>2020-03-16 15:56:11 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-03-26 22:51:39 +0000
commitc5eea7753b2fe3082d853ff9400117c85ac42dab (patch)
tree3a0b10e16e5f02b64341783299ee9ccfe67eb9b9
parent491ab3f67681e83f4184f4ffce07c6c53d9441d9 (diff)
downloadmongo-c5eea7753b2fe3082d853ff9400117c85ac42dab.tar.gz
SERVER-46370 Maintain _receivingChunks list correctly after shard key refine
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml1
-rw-r--r--jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js441
-rw-r--r--src/mongo/db/s/metadata_manager.cpp21
-rw-r--r--src/mongo/db/s/metadata_manager.h7
-rw-r--r--src/mongo/db/s/metadata_manager_test.cpp80
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp17
-rw-r--r--src/mongo/db/s/migration_destination_manager.h1
7 files changed, 427 insertions, 141 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
index e23481be51c..4be1df1d525 100644
--- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
@@ -50,6 +50,7 @@ selector:
- jstests/sharding/prefix_shard_key.js
- jstests/sharding/presplit.js
- jstests/sharding/query_config.js
+ - jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
- jstests/sharding/remove1.js
- jstests/sharding/rename_across_mongos.js
- jstests/sharding/shard1.js
diff --git a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
index d5ed8b095ef..5c3c22a52c9 100644
--- a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
+++ b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
@@ -13,7 +13,8 @@ load('jstests/libs/parallel_shell_helpers.js');
TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
const dbName = "test";
-const ns = dbName + ".foo";
+const collName = "foo";
+const ns = dbName + "." + collName;
const originalShardKey = {
x: 1
@@ -33,9 +34,7 @@ const refinedShardKeyValueInChunk = {
y: 1
};
-const st = new ShardingTest({shards: {rs0: {nodes: 3}, rs1: {nodes: 3}}});
-
-function setUp() {
+function setUp(st) {
// Create a sharded collection with two chunk on shard0, split at key {x: -1}.
assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName}));
@@ -48,7 +47,7 @@ function setUp() {
}
}
-function tearDown() {
+function tearDown(st) {
st.s.getCollection(ns).drop();
}
@@ -56,165 +55,369 @@ function tearDown() {
* Generic function to run a test. 'description' is a description of the test for logging
* purposes and 'testBody' is the test function.
*/
-function test(description, testBody) {
+function test(st, description, testBody) {
jsTest.log(`Running Test Setup: ${description}`);
- setUp();
+ setUp(st);
jsTest.log(`Running Test Body: ${description}`);
testBody();
jsTest.log(`Running Test Tear-Down: ${description}`);
- tearDown();
+ tearDown(st);
jsTest.log(`Finished Running Test: ${description}`);
}
-test("Refining the shard key does not prevent removal of orphaned documents", () => {
- // Enable failpoint which will cause range deletion to hang indefinitely.
- let suspendRangeDeletionFailpoint =
- configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+// Tests with resumable range deleter enabled.
+(() => {
+ const st = new ShardingTest({shards: {rs0: {nodes: 3}, rs1: {nodes: 3}}});
+ test(st,
+ "Refining the shard key does not prevent removal of orphaned documents on a donor" +
+ " shard after a successful migration",
+ () => {
+ // Enable failpoint which will cause range deletion to hang indefinitely.
+ let suspendRangeDeletionFailpoint =
+ configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+
+ // Note that _waitForDelete has to be absent/false since we're suspending range
+ // deletion.
+ assert.commandWorked(st.s.adminCommand(
+ {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+
+ jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
- // Note that _waitForDelete has to be absent/false since we're suspending range deletion.
- assert.commandWorked(
- st.s.adminCommand({moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+ suspendRangeDeletionFailpoint.wait();
- jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+ jsTestLog("Refining the shard key");
- suspendRangeDeletionFailpoint.wait();
+ // Create an index on the refined shard key.
+ assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
- jsTestLog("Refining the shard key");
+ // Refine the shard key from just the field 'x' to 'x' and 'y'.
+ assert.commandWorked(
+ st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
- // Create an index on the refined shard key.
- assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+ // The index on the original shard key shouldn't be required anymore.
+ assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
- // Refine the shard key from just the field 'x' to 'x' and 'y'.
- assert.commandWorked(st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+ // Allow range deletion to continue.
+ suspendRangeDeletionFailpoint.off();
- // The index on the original shard key shouldn't be required anymore.
- assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+ jsTestLog("Waiting for orphans to be removed from shard 0");
+
+ // The range deletion should eventually succeed in the background.
+ assert.soon(() => {
+ return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+ });
+ });
- // Allow range deletion to continue.
- suspendRangeDeletionFailpoint.off();
+ test(st,
+ "Chunks with a refined shard key cannot migrate back onto a shard with " +
+ "an orphaned range created with the prior shard key",
+ () => {
+ // Enable failpoint which will cause range deletion to hang indefinitely.
+ let suspendRangeDeletionFailpoint =
+ configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+
+ // Note that _waitForDelete has to be absent/false since we're suspending range
+ // deletion.
+ assert.commandWorked(st.s.adminCommand(
+ {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+
+ jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+
+ suspendRangeDeletionFailpoint.wait();
+
+ jsTestLog("Refining the shard key");
+
+ // Create an index on the refined shard key.
+ assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+
+ // Refine the shard key from just the field 'x' to 'x' and 'y'.
+ assert.commandWorked(
+ st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+
+ // The index on the original shard key shouldn't be required anymore.
+ assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+
+ // We will use this to wait until the following migration has completed, since we
+ // expect the client to time out. Waiting for this failpoint technically just waits for
+ // the recipient side of the migration to complete, but it's expected that if the
+ // migration can get to that point, then it should be able to succeed overall.
+ let hangDonorAtEndOfMigration =
+ configureFailPoint(st.rs1.getPrimary(), "moveChunkHangAtStep6");
+
+ jsTestLog("Attempting to move the chunk back to shard 0");
+ // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there
+ // will be orphaned documents still on shard 0 (because range deletion is paused), we
+ // expected this command to time out.
+ assert.commandFailedWithCode(st.s.adminCommand({
+ moveChunk: ns,
+ find: refinedShardKeyValueInChunk,
+ to: st.shard0.shardName,
+ maxTimeMS: 1000
+ }),
+ ErrorCodes.MaxTimeMSExpired);
+
+ // Hang after waiting for orphan cleanup so that in the test we can check for orphans
+ // on disk before documents begin migrating.
+ let hangRecipient =
+ configureFailPoint(st.rs0.getPrimary(), "migrateThreadHangAtStep1");
+
+ // Allow range deletion to continue.
+ suspendRangeDeletionFailpoint.off();
+
+ jsTestLog("Waiting for orphans to be removed from shard 0");
+
+ // The range deletion should eventually succeed in the background.
+ assert.soon(() => {
+ return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+ });
+
+ hangRecipient.off();
+
+ // Wait for the previous migration to complete before continuing.
+ hangDonorAtEndOfMigration.wait();
+ hangDonorAtEndOfMigration.off();
+ });
- jsTestLog("Waiting for orphans to be removed from shard 0");
+ // This test was created to reproduce a specific bug, which is why it may sound like an odd
+ // thing to test. See SERVER-46386 for more details.
+ test(st,
+ "Range deletion tasks created prior to refining the shard key do not " +
+ "conflict with non-overlapping ranges once the shard key is refined",
+ () => {
+ // Enable failpoint which will cause range deletion to hang indefinitely.
+ let suspendRangeDeletionFailpoint =
+ configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
- // The range deletion should eventually succeed in the background.
- assert.soon(() => {
- return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
- });
-});
+ // Note that _waitForDelete has to be absent/false since we're suspending range
+ // deletion.
+ assert.commandWorked(st.s.adminCommand(
+ {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
-test("Chunks with a refined shard key cannot migrate back onto a shard with " +
- "orphaned documents created with the prior shard key",
- () => {
- // Enable failpoint which will cause range deletion to hang indefinitely.
- let suspendRangeDeletionFailpoint =
- configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+ jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
- // Note that _waitForDelete has to be absent/false since we're suspending range
- // deletion.
- assert.commandWorked(st.s.adminCommand(
- {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+ suspendRangeDeletionFailpoint.wait();
- jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+ jsTestLog("Refining the shard key");
- suspendRangeDeletionFailpoint.wait();
+ // Create an index on the refined shard key.
+ assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
- jsTestLog("Refining the shard key");
+ // Refine the shard key from just the field 'x' to 'x' and 'y'.
+ assert.commandWorked(
+ st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
- // Create an index on the refined shard key.
- assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+ // The index on the original shard key shouldn't be required anymore.
+ assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
- // Refine the shard key from just the field 'x' to 'x' and 'y'.
- assert.commandWorked(
- st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+ // Step down current primary.
+ assert.commandWorked(st.rs0.getPrimary().adminCommand(
+ {replSetStepDown: ReplSetTest.kForeverSecs, force: 1}));
- // The index on the original shard key shouldn't be required anymore.
- assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+ // Allow range deletion to continue on old node. This isn't required for this test to
+ // proceed since we only care about the new primary, but it's worth cleaning up.
+ suspendRangeDeletionFailpoint.off();
- jsTestLog("Attempting to move the chunk back to shard 0");
- // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there
- // will be orphaned documents still on shard 0 (because range deletion is paused), we
- // expected this command to time out.
- const awaitResult = startParallelShell(
- funWithArgs(function(ns, refinedShardKeyValueInChunk, toShardName) {
- assert.commandFailedWithCode(db.adminCommand({
- moveChunk: ns,
- find: refinedShardKeyValueInChunk,
- to: toShardName,
- maxTimeMS: 1000
- }),
- ErrorCodes.MaxTimeMSExpired);
- }, ns, refinedShardKeyValueInChunk, st.shard0.shardName), st.s.port);
- awaitResult();
+ jsTestLog("Waiting for orphans to be removed from shard 0");
- // Allow range deletion to continue.
- suspendRangeDeletionFailpoint.off();
+ // The range deletion should eventually succeed in the background on the new primary.
+ assert.soon(() => {
+ return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+ });
- jsTestLog("Waiting for orphans to be removed from shard 0");
+ // Wait for the donor to learn about the new primary on the recipient.
+ awaitRSClientHosts(
+ st.rs1.getPrimary(), st.rs0.getPrimary(), {ok: true, ismaster: true});
- // The range deletion should eventually succeed in the background.
- assert.soon(() => {
- return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+ // We should be able to move the chunk back to shard 0 now that orphans are gone.
+ assert.commandWorked(st.s.adminCommand({
+ moveChunk: ns,
+ find: refinedShardKeyValueInChunk,
+ to: st.shard0.shardName,
+ _waitForDelete: true
+ }));
});
- // Moving the chunk back to shard 0 should now succeed.
- assert.commandWorked(st.s.adminCommand(
- {moveChunk: ns, find: refinedShardKeyValueInChunk, to: st.shard0.shardName}));
- });
+ st.stop();
+})();
-// This test was created to reproduce a specific bug, which is why it may sound like an odd thing to
-// test. See SERVER-46386 for more details.
-test("Range deletion tasks created prior to refining the shard key do not " +
- "conflict with non-overlapping ranges once the shard key is refined",
- () => {
- // Enable failpoint which will cause range deletion to hang indefinitely.
- let suspendRangeDeletionFailpoint =
- configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+// Tests with resumable range deleter disabled.
+(() => {
+ const st = new ShardingTest(
+ {shards: 2, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
- // Note that _waitForDelete has to be absent/false since we're suspending range deletion.
- assert.commandWorked(st.s.adminCommand(
- {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+ test(st,
+ "Refining the shard key does not prevent removal of orphaned documents on a donor" +
+ " shard after a successful migration",
+ () => {
+ // Enable failpoint which will cause range deletion to hang indefinitely.
+ let suspendRangeDeletionFailpoint =
+ configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
- jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+ // Note that _waitForDelete has to be absent/false since we're suspending range
+ // deletion.
+ assert.commandWorked(st.s.adminCommand(
+ {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
- suspendRangeDeletionFailpoint.wait();
+ jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
- jsTestLog("Refining the shard key");
+ suspendRangeDeletionFailpoint.wait();
- // Create an index on the refined shard key.
- assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+ jsTestLog("Refining the shard key");
- // Refine the shard key from just the field 'x' to 'x' and 'y'.
- assert.commandWorked(
- st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+ // Create an index on the refined shard key.
+ assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
- // The index on the original shard key shouldn't be required anymore.
- assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+ // Refine the shard key from just the field 'x' to 'x' and 'y'.
+ assert.commandWorked(
+ st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
- // Step down current primary.
- assert.commandWorked(st.rs0.getPrimary().adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: 1}));
+ // The index on the original shard key shouldn't be required anymore.
+ assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
- // Allow range deletion to continue on old node. This isn't required for this test to
- // proceed since we only care about the new primary, but it's worth cleaning up.
- suspendRangeDeletionFailpoint.off();
+ // Allow range deletion to continue.
+ suspendRangeDeletionFailpoint.off();
- jsTestLog("Waiting for orphans to be removed from shard 0");
+ jsTestLog("Waiting for orphans to be removed from shard 0");
- // The range deletion should eventually succeed in the background on the new primary.
- assert.soon(() => {
- return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+ // The range deletion should eventually succeed in the background.
+ assert.soon(() => {
+ return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+ });
});
- // Wait for the donor to learn about the new primary on the recipient.
- awaitRSClientHosts(st.rs1.getPrimary(), st.rs0.getPrimary(), {ok: true, ismaster: true});
+ test(st,
+ "Chunks with a refined shard key cannot migrate back onto a shard with " +
+ "an orphaned range created with the prior shard key",
+ () => {
+ // Enable failpoint which will cause range deletion to hang indefinitely.
+ let suspendRangeDeletionFailpoint =
+ configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+
+ // Note that _waitForDelete has to be absent/false since we're suspending range
+ // deletion.
+ assert.commandWorked(st.s.adminCommand(
+ {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+
+ jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+
+ suspendRangeDeletionFailpoint.wait();
+
+ jsTestLog("Refining the shard key");
+
+ // Create an index on the refined shard key.
+ assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+
+ // Refine the shard key from just the field 'x' to 'x' and 'y'.
+ assert.commandWorked(
+ st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+
+ // The index on the original shard key shouldn't be required anymore.
+ assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+
+ // We will use this to wait until the following migration has completed, since we
+ // expect the client to time out. Waiting for this failpoint technically just waits for
+ // the recipient side of the migration to complete, but it's expected that if the
+ // migration can get to that point, then it should be able to succeed overall.
+ let hangDonorAtEndOfMigration =
+ configureFailPoint(st.rs1.getPrimary(), "moveChunkHangAtStep6");
+
+ jsTestLog("Attempting to move the chunk back to shard 0");
+
+ // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there
+ // will be orphaned documents still on shard 0 (because range deletion is paused), we
+ // expected this command to time out. This will NOT fail the migration, however, since
+ // that occurs in a background OperationContext.
+ assert.commandFailedWithCode(st.s.adminCommand({
+ moveChunk: ns,
+ find: refinedShardKeyValueInChunk,
+ to: st.shard0.shardName,
+ maxTimeMS: 1000
+ }),
+ ErrorCodes.MaxTimeMSExpired);
+
+ // Hang after waiting for orphan cleanup so that in the test we can check for orphans
+ // on disk before documents begin migrating.
+ let hangRecipient =
+ configureFailPoint(st.rs0.getPrimary(), "migrateThreadHangAtStep1");
+
+ // Allow range deletion to continue.
+ suspendRangeDeletionFailpoint.off();
+
+ jsTestLog("Waiting for orphans to be removed from shard 0");
+
+ // The range deletion should eventually succeed in the background.
+ assert.soon(() => {
+ return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+ });
+
+ hangRecipient.off();
+
+ // Wait for the previous migration to complete before continuing.
+ hangDonorAtEndOfMigration.wait();
+ hangDonorAtEndOfMigration.off();
+
+ // TODO (SERVER-47003): There will be a left-over entry in config.migrations after the
+ // previous moveChunk fails with MaxTimeMSExpired, so we drop the collection. Otherwise
+ // future migrations would receive a DuplicateKeyError when trying to update
+ // config.migrations.
+ st.config.getSiblingDB('config').migrations.drop();
+ });
- // We should be able to move the chunk back to shard 0 now that orphans are gone.
- assert.commandWorked(st.s.adminCommand({
- moveChunk: ns,
- find: refinedShardKeyValueInChunk,
- to: st.shard0.shardName,
- _waitForDelete: true
- }));
- });
+ test(st,
+ "Refining the shard key does not prevent removal of orphaned documents on a recipient" +
+ " shard after a failed migration",
+ () => {
+ let hangRecipientAfterCloningDocuments =
+ configureFailPoint(st.rs1.getPrimary(), "migrateThreadHangAtStep3");
+
+ // Attempt to move the chunk to shard 1. This will clone all documents from shard 0 to
+ // shard 1 and then block behind the hangRecipientAfterCloningDocuments failpoint.
+ // Then, when the index is created on the refined shard key, the migration will be
+ // interrupted, causing it to fail with error code Interrupted.
+ const awaitResult =
+ startParallelShell(funWithArgs(function(ns, shardKeyValueInChunk, toShardName) {
+ assert.commandFailedWithCode(db.adminCommand({
+ moveChunk: ns,
+ find: shardKeyValueInChunk,
+ to: toShardName,
+ _waitForDelete: true
+ }),
+ ErrorCodes.Interrupted);
+ jsTestLog("Recipient failed in parallel shell");
+ }, ns, shardKeyValueInChunk, st.shard1.shardName), st.s.port);
+
+ jsTestLog("Waiting for recipient to finish cloning documents");
+
+ hangRecipientAfterCloningDocuments.wait();
+
+ jsTestLog("Refining the shard key");
+
+ // Create an index on the refined shard key.
+ assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+
+ // Refine the shard key from just the field 'x' to 'x' and 'y'.
+ assert.commandWorked(
+ st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+
+ // The index on the original shard key shouldn't be required anymore.
+ assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+
+ // Turn off failpoint and wait for recipient to fail.
+ hangRecipientAfterCloningDocuments.off();
+ awaitResult();
+
+ // TODO (SERVER-47025): Without creating this index, the range deleter will hang
+ // indefinitely looking for a shard key index.
+ assert.commandWorked(st.shard1.getCollection(ns).createIndex(refinedShardKey));
+
+ jsTestLog("Waiting for orphans to be removed from shard 1");
+
+ // The range deletion should eventually succeed in the background on the recipient.
+ assert.soon(() => {
+ return st.rs1.getPrimary().getCollection(ns).find().itcount() == 0;
+ });
+ });
-st.stop();
+ st.stop();
+})();
})();
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp
index dc5adf82c97..73736707a67 100644
--- a/src/mongo/db/s/metadata_manager.cpp
+++ b/src/mongo/db/s/metadata_manager.cpp
@@ -57,13 +57,8 @@ using CallbackArgs = TaskExecutor::CallbackArgs;
* Returns whether the given metadata object has a chunk owned by this shard that overlaps the
* input range.
*/
-bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata,
- const ChunkRange& range) {
- if (!metadata) {
- return false;
- }
-
- auto metadataShardKeyPattern = KeyPattern(metadata->getKeyPattern());
+bool metadataOverlapsRange(const CollectionMetadata& metadata, const ChunkRange& range) {
+ auto metadataShardKeyPattern = KeyPattern(metadata.getKeyPattern());
// If the input range is shorter than the range in the ChunkManager inside
// 'metadata', we must extend its bounds to get a correct comparison. If the input
@@ -102,7 +97,15 @@ bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata,
}
}();
- return metadata->rangeOverlapsChunk(chunkRangeToCompareToMetadata);
+ return metadata.rangeOverlapsChunk(chunkRangeToCompareToMetadata);
+}
+
+bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata,
+ const ChunkRange& range) {
+ if (!metadata) {
+ return false;
+ }
+ return metadataOverlapsRange(metadata.get(), range);
}
} // namespace
@@ -240,7 +243,7 @@ void MetadataManager::setFilteringMetadata(CollectionMetadata remoteMetadata) {
for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) {
const ChunkRange receivingRange(it->first, it->second);
- if (!remoteMetadata.rangeOverlapsChunk(receivingRange)) {
+ if (!metadataOverlapsRange(remoteMetadata, receivingRange)) {
++it;
continue;
}
diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h
index bd2c0189742..27049e16490 100644
--- a/src/mongo/db/s/metadata_manager.h
+++ b/src/mongo/db/s/metadata_manager.h
@@ -107,6 +107,13 @@ public:
void toBSONPending(BSONArrayBuilder& bb) const;
/**
+ * Returns the number of items in the _receivingChunks list. Useful for unit tests.
+ */
+ size_t numberOfReceivingChunks() {
+ return _receivingChunks.size();
+ }
+
+ /**
* Appends information on all the chunk ranges in rangesToClean to builder.
*/
void append(BSONObjBuilder* builder) const;
diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp
index 58c663c4a4f..57947948fcf 100644
--- a/src/mongo/db/s/metadata_manager_test.cpp
+++ b/src/mongo/db/s/metadata_manager_test.cpp
@@ -73,20 +73,20 @@ protected:
/**
* Returns an instance of CollectionMetadata which has no chunks owned by 'thisShard'.
*/
- static CollectionMetadata makeEmptyMetadata() {
+ static CollectionMetadata makeEmptyMetadata(
+ const KeyPattern& shardKeyPattern = kShardKeyPattern,
+ const ChunkRange& range = ChunkRange{BSON(kPattern << MINKEY), BSON(kPattern << MAXKEY)},
+ UUID uuid = UUID::gen()) {
const OID epoch = OID::gen();
auto rt = RoutingTableHistory::makeNew(
kNss,
- UUID::gen(),
- kShardKeyPattern,
+ uuid,
+ shardKeyPattern,
nullptr,
false,
epoch,
- {ChunkType{kNss,
- ChunkRange{BSON(kPattern << MINKEY), BSON(kPattern << MAXKEY)},
- ChunkVersion(1, 0, epoch),
- kOtherShard}});
+ {ChunkType{kNss, range, ChunkVersion(1, 0, epoch), kOtherShard}});
std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, boost::none);
@@ -103,11 +103,16 @@ protected:
*/
static CollectionMetadata cloneMetadataPlusChunk(const ScopedCollectionDescription& collDesc,
const ChunkRange& range) {
+ return cloneMetadataPlusChunk(collDesc.get(), range);
+ }
+
+ static CollectionMetadata cloneMetadataPlusChunk(const CollectionMetadata& collMetadata,
+ const ChunkRange& range) {
const BSONObj& minKey = range.getMin();
const BSONObj& maxKey = range.getMax();
- ASSERT(!rangeMapOverlaps(collDesc->getChunks(), minKey, maxKey));
+ ASSERT(!rangeMapOverlaps(collMetadata.getChunks(), minKey, maxKey));
- auto cm = collDesc->getChunkManager();
+ auto cm = collMetadata.getChunkManager();
const auto chunkToSplit = cm->findIntersectingChunkWithSimpleCollation(minKey);
ASSERT_BSONOBJ_GTE(minKey, chunkToSplit.getMin());
@@ -179,6 +184,63 @@ TEST_F(MetadataManagerTest, CleanUpForMigrateIn) {
ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse());
}
+TEST_F(MetadataManagerTest,
+ ChunkInReceivingChunksListIsRemovedAfterShardKeyRefineIfMigrationSucceeded) {
+ _manager->setFilteringMetadata(makeEmptyMetadata());
+
+ // Simulate receiving a range. This will add an item to _receivingChunks.
+ ChunkRange range(BSON("key" << 0), BSON("key" << 10));
+ auto notif1 = _manager->beginReceive(range);
+
+ ASSERT_EQ(_manager->numberOfReceivingChunks(), 1);
+
+ // Simulate a situation in which the migration completes, and then the shard key is refined,
+ // before this shard discovers the updated metadata.
+ auto uuid = _manager->getActiveMetadata(boost::none)->getChunkManager()->getUUID().get();
+ ChunkRange refinedRange(BSON("key" << 0 << "other" << MINKEY),
+ BSON("key" << 10 << "other" << MINKEY));
+ auto refinedMetadata = makeEmptyMetadata(BSON(kPattern << 1 << "other" << 1),
+ ChunkRange(BSON("key" << MINKEY << "other" << MINKEY),
+ BSON("key" << MAXKEY << "other" << MAXKEY)),
+ uuid);
+
+ // Set the updated chunk map on the MetadataManager.
+ _manager->setFilteringMetadata(cloneMetadataPlusChunk(refinedMetadata, refinedRange));
+ // Because the refined range overlaps with the received range (pre-refine), this should remove
+ // the item in _receivingChunks.
+ ASSERT_EQ(_manager->numberOfReceivingChunks(), 0);
+}
+
+TEST_F(MetadataManagerTest,
+ ChunkInReceivingChunksListIsNotRemovedAfterShardKeyRefineIfNonOverlappingRangeIsReceived) {
+ _manager->setFilteringMetadata(makeEmptyMetadata());
+
+ // Simulate receiving a range. This will add an item to _receivingChunks.
+ ChunkRange range(BSON("key" << 0), BSON("key" << 10));
+ auto notif1 = _manager->beginReceive(range);
+ ASSERT_EQ(_manager->numberOfReceivingChunks(), 1);
+
+ // Simulate a situation in which the shard key is refined and this shard discovers
+ // updated metadata where it owns some range that does not overlap with the range being migrated
+ // in.
+ auto uuid = _manager->getActiveMetadata(boost::none)->getChunkManager()->getUUID().get();
+ ChunkRange refinedNonOverlappingRange(BSON("key" << -10 << "other" << MINKEY),
+ BSON("key" << 0 << "other" << MINKEY));
+
+ auto refinedMetadata = makeEmptyMetadata(BSON(kPattern << 1 << "other" << 1),
+ ChunkRange(BSON("key" << MINKEY << "other" << MINKEY),
+ BSON("key" << MAXKEY << "other" << MAXKEY)),
+ uuid);
+
+ // Set the updated chunk map on the MetadataManager.
+ _manager->setFilteringMetadata(
+ cloneMetadataPlusChunk(refinedMetadata, refinedNonOverlappingRange));
+
+ // Because the refined range does not overlap with the received range (pre-refine), this should
+ // NOT remove the item in _receivingChunks.
+ ASSERT_EQ(_manager->numberOfReceivingChunks(), 1);
+}
+
TEST_F(MetadataManagerTest, TrackOrphanedDataCleanupBlocksOnScheduledRangeDeletions) {
ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index bea8ac3aa30..19de4193563 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -809,6 +809,7 @@ void MigrationDestinationManager::_migrateThread() {
stdx::lock_guard<Latch> lk(_mutex);
_sessionId.reset();
+ _collUuid.reset();
_scopedReceiveChunk.reset();
_isActiveCV.notify_all();
}
@@ -892,6 +893,9 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
// Synchronously delete any data which might have been left orphaned in the range
// being moved, and wait for completion
+ // Needed for _forgetPending to make sure the collection has the same UUID at the end of
+ // an aborted migration as at the beginning. Must be set before calling _notePending.
+ _collUuid = donorCollectionOptionsAndIndexes.uuid;
auto cleanupCompleteFuture = _notePending(outerOpCtx, range);
auto cleanupStatus = cleanupCompleteFuture.getNoThrow(outerOpCtx);
// Wait for the range deletion to report back. Swallow
@@ -1356,8 +1360,9 @@ SharedSemiFuture<void> MigrationDestinationManager::_notePending(OperationContex
auto* const css = CollectionShardingRuntime::get(opCtx, _nss);
const auto optMetadata = css->getCurrentMetadataIfKnown();
- // This can currently happen because drops aren't synchronized with in-migrations. The idea for
- // checking this here is that in the future we shouldn't have this problem.
+ // This can currently happen because drops and shard key refine operations aren't guaranteed to
+ // be synchronized with in-migrations. The idea for checking this here is that in the future we
+ // shouldn't have this problem.
if (!optMetadata || !(*optMetadata)->isSharded() ||
(*optMetadata)->getCollVersion().epoch() != _epoch) {
return Status{ErrorCodes::StaleShardVersion,
@@ -1388,10 +1393,14 @@ void MigrationDestinationManager::_forgetPending(OperationContext* opCtx, ChunkR
// This can currently happen because drops aren't synchronized with in-migrations. The idea for
// checking this here is that in the future we shouldn't have this problem.
+ //
+ // _collUuid will always be set if _notePending was called, so if it is not set, there is no
+ // need to do anything. If it is set, we use it to ensure that the collection UUID has not
+ // changed since the beginning of migration.
if (!optMetadata || !(*optMetadata)->isSharded() ||
- (*optMetadata)->getCollVersion().epoch() != _epoch) {
+ (_collUuid && !(*optMetadata)->uuidMatches(*_collUuid))) {
LOGV2(22009,
- "No need to forget pending chunk {range} because the epoch for {nss_ns} changed",
+ "No need to forget pending chunk {range} because the uuid for {nss_ns} changed",
"range"_attr = redact(range.toString()),
"nss_ns"_attr = _nss.ns());
return;
diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h
index ea322655509..403800427fd 100644
--- a/src/mongo/db/s/migration_destination_manager.h
+++ b/src/mongo/db/s/migration_destination_manager.h
@@ -210,6 +210,7 @@ private:
LogicalSessionId _lsid;
TxnNumber _txnNumber;
NamespaceString _nss;
+ boost::optional<UUID> _collUuid;
ConnectionString _fromShardConnString;
ShardId _fromShard;
ShardId _toShard;