diff options
author | Matthew Saltz <matthew.saltz@mongodb.com> | 2020-03-16 15:56:11 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-03-26 22:51:39 +0000 |
commit | c5eea7753b2fe3082d853ff9400117c85ac42dab (patch) | |
tree | 3a0b10e16e5f02b64341783299ee9ccfe67eb9b9 | |
parent | 491ab3f67681e83f4184f4ffce07c6c53d9441d9 (diff) | |
download | mongo-c5eea7753b2fe3082d853ff9400117c85ac42dab.tar.gz |
SERVER-46370 Maintain _receivingChunks list correctly after shard key refine
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml | 1 | ||||
-rw-r--r-- | jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js | 441 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager.h | 7 | ||||
-rw-r--r-- | src/mongo/db/s/metadata_manager_test.cpp | 80 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 17 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.h | 1 |
7 files changed, 427 insertions, 141 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml index e23481be51c..4be1df1d525 100644 --- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml +++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml @@ -50,6 +50,7 @@ selector: - jstests/sharding/prefix_shard_key.js - jstests/sharding/presplit.js - jstests/sharding/query_config.js + - jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js - jstests/sharding/remove1.js - jstests/sharding/rename_across_mongos.js - jstests/sharding/shard1.js diff --git a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js index d5ed8b095ef..5c3c22a52c9 100644 --- a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js +++ b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js @@ -13,7 +13,8 @@ load('jstests/libs/parallel_shell_helpers.js'); TestData.skipCheckingUUIDsConsistentAcrossCluster = true; const dbName = "test"; -const ns = dbName + ".foo"; +const collName = "foo"; +const ns = dbName + "." + collName; const originalShardKey = { x: 1 @@ -33,9 +34,7 @@ const refinedShardKeyValueInChunk = { y: 1 }; -const st = new ShardingTest({shards: {rs0: {nodes: 3}, rs1: {nodes: 3}}}); - -function setUp() { +function setUp(st) { // Create a sharded collection with two chunk on shard0, split at key {x: -1}. assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName})); @@ -48,7 +47,7 @@ function setUp() { } } -function tearDown() { +function tearDown(st) { st.s.getCollection(ns).drop(); } @@ -56,165 +55,369 @@ function tearDown() { * Generic function to run a test. 'description' is a description of the test for logging * purposes and 'testBody' is the test function. */ -function test(description, testBody) { +function test(st, description, testBody) { jsTest.log(`Running Test Setup: ${description}`); - setUp(); + setUp(st); jsTest.log(`Running Test Body: ${description}`); testBody(); jsTest.log(`Running Test Tear-Down: ${description}`); - tearDown(); + tearDown(st); jsTest.log(`Finished Running Test: ${description}`); } -test("Refining the shard key does not prevent removal of orphaned documents", () => { - // Enable failpoint which will cause range deletion to hang indefinitely. - let suspendRangeDeletionFailpoint = - configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); +// Tests with resumable range deleter enabled. +(() => { + const st = new ShardingTest({shards: {rs0: {nodes: 3}, rs1: {nodes: 3}}}); + test(st, + "Refining the shard key does not prevent removal of orphaned documents on a donor" + + " shard after a successful migration", + () => { + // Enable failpoint which will cause range deletion to hang indefinitely. + let suspendRangeDeletionFailpoint = + configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); + + // Note that _waitForDelete has to be absent/false since we're suspending range + // deletion. + assert.commandWorked(st.s.adminCommand( + {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); + + jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); - // Note that _waitForDelete has to be absent/false since we're suspending range deletion. - assert.commandWorked( - st.s.adminCommand({moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); + suspendRangeDeletionFailpoint.wait(); - jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); + jsTestLog("Refining the shard key"); - suspendRangeDeletionFailpoint.wait(); + // Create an index on the refined shard key. + assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); - jsTestLog("Refining the shard key"); + // Refine the shard key from just the field 'x' to 'x' and 'y'. + assert.commandWorked( + st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); - // Create an index on the refined shard key. - assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); + // The index on the original shard key shouldn't be required anymore. + assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); - // Refine the shard key from just the field 'x' to 'x' and 'y'. - assert.commandWorked(st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); + // Allow range deletion to continue. + suspendRangeDeletionFailpoint.off(); - // The index on the original shard key shouldn't be required anymore. - assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); + jsTestLog("Waiting for orphans to be removed from shard 0"); + + // The range deletion should eventually succeed in the background. + assert.soon(() => { + return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + }); + }); - // Allow range deletion to continue. - suspendRangeDeletionFailpoint.off(); + test(st, + "Chunks with a refined shard key cannot migrate back onto a shard with " + + "an orphaned range created with the prior shard key", + () => { + // Enable failpoint which will cause range deletion to hang indefinitely. + let suspendRangeDeletionFailpoint = + configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); + + // Note that _waitForDelete has to be absent/false since we're suspending range + // deletion. + assert.commandWorked(st.s.adminCommand( + {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); + + jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); + + suspendRangeDeletionFailpoint.wait(); + + jsTestLog("Refining the shard key"); + + // Create an index on the refined shard key. + assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); + + // Refine the shard key from just the field 'x' to 'x' and 'y'. + assert.commandWorked( + st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); + + // The index on the original shard key shouldn't be required anymore. + assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); + + // We will use this to wait until the following migration has completed, since we + // expect the client to time out. Waiting for this failpoint technically just waits for + // the recipient side of the migration to complete, but it's expected that if the + // migration can get to that point, then it should be able to succeed overall. + let hangDonorAtEndOfMigration = + configureFailPoint(st.rs1.getPrimary(), "moveChunkHangAtStep6"); + + jsTestLog("Attempting to move the chunk back to shard 0"); + // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there + // will be orphaned documents still on shard 0 (because range deletion is paused), we + // expected this command to time out. + assert.commandFailedWithCode(st.s.adminCommand({ + moveChunk: ns, + find: refinedShardKeyValueInChunk, + to: st.shard0.shardName, + maxTimeMS: 1000 + }), + ErrorCodes.MaxTimeMSExpired); + + // Hang after waiting for orphan cleanup so that in the test we can check for orphans + // on disk before documents begin migrating. + let hangRecipient = + configureFailPoint(st.rs0.getPrimary(), "migrateThreadHangAtStep1"); + + // Allow range deletion to continue. + suspendRangeDeletionFailpoint.off(); + + jsTestLog("Waiting for orphans to be removed from shard 0"); + + // The range deletion should eventually succeed in the background. + assert.soon(() => { + return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + }); + + hangRecipient.off(); + + // Wait for the previous migration to complete before continuing. + hangDonorAtEndOfMigration.wait(); + hangDonorAtEndOfMigration.off(); + }); - jsTestLog("Waiting for orphans to be removed from shard 0"); + // This test was created to reproduce a specific bug, which is why it may sound like an odd + // thing to test. See SERVER-46386 for more details. + test(st, + "Range deletion tasks created prior to refining the shard key do not " + + "conflict with non-overlapping ranges once the shard key is refined", + () => { + // Enable failpoint which will cause range deletion to hang indefinitely. + let suspendRangeDeletionFailpoint = + configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); - // The range deletion should eventually succeed in the background. - assert.soon(() => { - return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; - }); -}); + // Note that _waitForDelete has to be absent/false since we're suspending range + // deletion. + assert.commandWorked(st.s.adminCommand( + {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); -test("Chunks with a refined shard key cannot migrate back onto a shard with " + - "orphaned documents created with the prior shard key", - () => { - // Enable failpoint which will cause range deletion to hang indefinitely. - let suspendRangeDeletionFailpoint = - configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); + jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); - // Note that _waitForDelete has to be absent/false since we're suspending range - // deletion. - assert.commandWorked(st.s.adminCommand( - {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); + suspendRangeDeletionFailpoint.wait(); - jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); + jsTestLog("Refining the shard key"); - suspendRangeDeletionFailpoint.wait(); + // Create an index on the refined shard key. + assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); - jsTestLog("Refining the shard key"); + // Refine the shard key from just the field 'x' to 'x' and 'y'. + assert.commandWorked( + st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); - // Create an index on the refined shard key. - assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); + // The index on the original shard key shouldn't be required anymore. + assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); - // Refine the shard key from just the field 'x' to 'x' and 'y'. - assert.commandWorked( - st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); + // Step down current primary. + assert.commandWorked(st.rs0.getPrimary().adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: 1})); - // The index on the original shard key shouldn't be required anymore. - assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); + // Allow range deletion to continue on old node. This isn't required for this test to + // proceed since we only care about the new primary, but it's worth cleaning up. + suspendRangeDeletionFailpoint.off(); - jsTestLog("Attempting to move the chunk back to shard 0"); - // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there - // will be orphaned documents still on shard 0 (because range deletion is paused), we - // expected this command to time out. - const awaitResult = startParallelShell( - funWithArgs(function(ns, refinedShardKeyValueInChunk, toShardName) { - assert.commandFailedWithCode(db.adminCommand({ - moveChunk: ns, - find: refinedShardKeyValueInChunk, - to: toShardName, - maxTimeMS: 1000 - }), - ErrorCodes.MaxTimeMSExpired); - }, ns, refinedShardKeyValueInChunk, st.shard0.shardName), st.s.port); - awaitResult(); + jsTestLog("Waiting for orphans to be removed from shard 0"); - // Allow range deletion to continue. - suspendRangeDeletionFailpoint.off(); + // The range deletion should eventually succeed in the background on the new primary. + assert.soon(() => { + return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + }); - jsTestLog("Waiting for orphans to be removed from shard 0"); + // Wait for the donor to learn about the new primary on the recipient. + awaitRSClientHosts( + st.rs1.getPrimary(), st.rs0.getPrimary(), {ok: true, ismaster: true}); - // The range deletion should eventually succeed in the background. - assert.soon(() => { - return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + // We should be able to move the chunk back to shard 0 now that orphans are gone. + assert.commandWorked(st.s.adminCommand({ + moveChunk: ns, + find: refinedShardKeyValueInChunk, + to: st.shard0.shardName, + _waitForDelete: true + })); }); - // Moving the chunk back to shard 0 should now succeed. - assert.commandWorked(st.s.adminCommand( - {moveChunk: ns, find: refinedShardKeyValueInChunk, to: st.shard0.shardName})); - }); + st.stop(); +})(); -// This test was created to reproduce a specific bug, which is why it may sound like an odd thing to -// test. See SERVER-46386 for more details. -test("Range deletion tasks created prior to refining the shard key do not " + - "conflict with non-overlapping ranges once the shard key is refined", - () => { - // Enable failpoint which will cause range deletion to hang indefinitely. - let suspendRangeDeletionFailpoint = - configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); +// Tests with resumable range deleter disabled. +(() => { + const st = new ShardingTest( + {shards: 2, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}}); - // Note that _waitForDelete has to be absent/false since we're suspending range deletion. - assert.commandWorked(st.s.adminCommand( - {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); + test(st, + "Refining the shard key does not prevent removal of orphaned documents on a donor" + + " shard after a successful migration", + () => { + // Enable failpoint which will cause range deletion to hang indefinitely. + let suspendRangeDeletionFailpoint = + configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); - jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); + // Note that _waitForDelete has to be absent/false since we're suspending range + // deletion. + assert.commandWorked(st.s.adminCommand( + {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); - suspendRangeDeletionFailpoint.wait(); + jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); - jsTestLog("Refining the shard key"); + suspendRangeDeletionFailpoint.wait(); - // Create an index on the refined shard key. - assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); + jsTestLog("Refining the shard key"); - // Refine the shard key from just the field 'x' to 'x' and 'y'. - assert.commandWorked( - st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); + // Create an index on the refined shard key. + assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); - // The index on the original shard key shouldn't be required anymore. - assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); + // Refine the shard key from just the field 'x' to 'x' and 'y'. + assert.commandWorked( + st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); - // Step down current primary. - assert.commandWorked(st.rs0.getPrimary().adminCommand( - {replSetStepDown: ReplSetTest.kForeverSecs, force: 1})); + // The index on the original shard key shouldn't be required anymore. + assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); - // Allow range deletion to continue on old node. This isn't required for this test to - // proceed since we only care about the new primary, but it's worth cleaning up. - suspendRangeDeletionFailpoint.off(); + // Allow range deletion to continue. + suspendRangeDeletionFailpoint.off(); - jsTestLog("Waiting for orphans to be removed from shard 0"); + jsTestLog("Waiting for orphans to be removed from shard 0"); - // The range deletion should eventually succeed in the background on the new primary. - assert.soon(() => { - return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + // The range deletion should eventually succeed in the background. + assert.soon(() => { + return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + }); }); - // Wait for the donor to learn about the new primary on the recipient. - awaitRSClientHosts(st.rs1.getPrimary(), st.rs0.getPrimary(), {ok: true, ismaster: true}); + test(st, + "Chunks with a refined shard key cannot migrate back onto a shard with " + + "an orphaned range created with the prior shard key", + () => { + // Enable failpoint which will cause range deletion to hang indefinitely. + let suspendRangeDeletionFailpoint = + configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion"); + + // Note that _waitForDelete has to be absent/false since we're suspending range + // deletion. + assert.commandWorked(st.s.adminCommand( + {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName})); + + jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit"); + + suspendRangeDeletionFailpoint.wait(); + + jsTestLog("Refining the shard key"); + + // Create an index on the refined shard key. + assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); + + // Refine the shard key from just the field 'x' to 'x' and 'y'. + assert.commandWorked( + st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); + + // The index on the original shard key shouldn't be required anymore. + assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); + + // We will use this to wait until the following migration has completed, since we + // expect the client to time out. Waiting for this failpoint technically just waits for + // the recipient side of the migration to complete, but it's expected that if the + // migration can get to that point, then it should be able to succeed overall. + let hangDonorAtEndOfMigration = + configureFailPoint(st.rs1.getPrimary(), "moveChunkHangAtStep6"); + + jsTestLog("Attempting to move the chunk back to shard 0"); + + // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there + // will be orphaned documents still on shard 0 (because range deletion is paused), we + // expected this command to time out. This will NOT fail the migration, however, since + // that occurs in a background OperationContext. + assert.commandFailedWithCode(st.s.adminCommand({ + moveChunk: ns, + find: refinedShardKeyValueInChunk, + to: st.shard0.shardName, + maxTimeMS: 1000 + }), + ErrorCodes.MaxTimeMSExpired); + + // Hang after waiting for orphan cleanup so that in the test we can check for orphans + // on disk before documents begin migrating. + let hangRecipient = + configureFailPoint(st.rs0.getPrimary(), "migrateThreadHangAtStep1"); + + // Allow range deletion to continue. + suspendRangeDeletionFailpoint.off(); + + jsTestLog("Waiting for orphans to be removed from shard 0"); + + // The range deletion should eventually succeed in the background. + assert.soon(() => { + return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + }); + + hangRecipient.off(); + + // Wait for the previous migration to complete before continuing. + hangDonorAtEndOfMigration.wait(); + hangDonorAtEndOfMigration.off(); + + // TODO (SERVER-47003): There will be a left-over entry in config.migrations after the + // previous moveChunk fails with MaxTimeMSExpired, so we drop the collection. Otherwise + // future migrations would receive a DuplicateKeyError when trying to update + // config.migrations. + st.config.getSiblingDB('config').migrations.drop(); + }); - // We should be able to move the chunk back to shard 0 now that orphans are gone. - assert.commandWorked(st.s.adminCommand({ - moveChunk: ns, - find: refinedShardKeyValueInChunk, - to: st.shard0.shardName, - _waitForDelete: true - })); - }); + test(st, + "Refining the shard key does not prevent removal of orphaned documents on a recipient" + + " shard after a failed migration", + () => { + let hangRecipientAfterCloningDocuments = + configureFailPoint(st.rs1.getPrimary(), "migrateThreadHangAtStep3"); + + // Attempt to move the chunk to shard 1. This will clone all documents from shard 0 to + // shard 1 and then block behind the hangRecipientAfterCloningDocuments failpoint. + // Then, when the index is created on the refined shard key, the migration will be + // interrupted, causing it to fail with error code Interrupted. + const awaitResult = + startParallelShell(funWithArgs(function(ns, shardKeyValueInChunk, toShardName) { + assert.commandFailedWithCode(db.adminCommand({ + moveChunk: ns, + find: shardKeyValueInChunk, + to: toShardName, + _waitForDelete: true + }), + ErrorCodes.Interrupted); + jsTestLog("Recipient failed in parallel shell"); + }, ns, shardKeyValueInChunk, st.shard1.shardName), st.s.port); + + jsTestLog("Waiting for recipient to finish cloning documents"); + + hangRecipientAfterCloningDocuments.wait(); + + jsTestLog("Refining the shard key"); + + // Create an index on the refined shard key. + assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey)); + + // Refine the shard key from just the field 'x' to 'x' and 'y'. + assert.commandWorked( + st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey})); + + // The index on the original shard key shouldn't be required anymore. + assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey)); + + // Turn off failpoint and wait for recipient to fail. + hangRecipientAfterCloningDocuments.off(); + awaitResult(); + + // TODO (SERVER-47025): Without creating this index, the range deleter will hang + // indefinitely looking for a shard key index. + assert.commandWorked(st.shard1.getCollection(ns).createIndex(refinedShardKey)); + + jsTestLog("Waiting for orphans to be removed from shard 1"); + + // The range deletion should eventually succeed in the background on the recipient. + assert.soon(() => { + return st.rs1.getPrimary().getCollection(ns).find().itcount() == 0; + }); + }); -st.stop(); + st.stop(); +})(); })(); diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp index dc5adf82c97..73736707a67 100644 --- a/src/mongo/db/s/metadata_manager.cpp +++ b/src/mongo/db/s/metadata_manager.cpp @@ -57,13 +57,8 @@ using CallbackArgs = TaskExecutor::CallbackArgs; * Returns whether the given metadata object has a chunk owned by this shard that overlaps the * input range. */ -bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata, - const ChunkRange& range) { - if (!metadata) { - return false; - } - - auto metadataShardKeyPattern = KeyPattern(metadata->getKeyPattern()); +bool metadataOverlapsRange(const CollectionMetadata& metadata, const ChunkRange& range) { + auto metadataShardKeyPattern = KeyPattern(metadata.getKeyPattern()); // If the input range is shorter than the range in the ChunkManager inside // 'metadata', we must extend its bounds to get a correct comparison. If the input @@ -102,7 +97,15 @@ bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata, } }(); - return metadata->rangeOverlapsChunk(chunkRangeToCompareToMetadata); + return metadata.rangeOverlapsChunk(chunkRangeToCompareToMetadata); +} + +bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata, + const ChunkRange& range) { + if (!metadata) { + return false; + } + return metadataOverlapsRange(metadata.get(), range); } } // namespace @@ -240,7 +243,7 @@ void MetadataManager::setFilteringMetadata(CollectionMetadata remoteMetadata) { for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) { const ChunkRange receivingRange(it->first, it->second); - if (!remoteMetadata.rangeOverlapsChunk(receivingRange)) { + if (!metadataOverlapsRange(remoteMetadata, receivingRange)) { ++it; continue; } diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h index bd2c0189742..27049e16490 100644 --- a/src/mongo/db/s/metadata_manager.h +++ b/src/mongo/db/s/metadata_manager.h @@ -107,6 +107,13 @@ public: void toBSONPending(BSONArrayBuilder& bb) const; /** + * Returns the number of items in the _receivingChunks list. Useful for unit tests. + */ + size_t numberOfReceivingChunks() { + return _receivingChunks.size(); + } + + /** * Appends information on all the chunk ranges in rangesToClean to builder. */ void append(BSONObjBuilder* builder) const; diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp index 58c663c4a4f..57947948fcf 100644 --- a/src/mongo/db/s/metadata_manager_test.cpp +++ b/src/mongo/db/s/metadata_manager_test.cpp @@ -73,20 +73,20 @@ protected: /** * Returns an instance of CollectionMetadata which has no chunks owned by 'thisShard'. */ - static CollectionMetadata makeEmptyMetadata() { + static CollectionMetadata makeEmptyMetadata( + const KeyPattern& shardKeyPattern = kShardKeyPattern, + const ChunkRange& range = ChunkRange{BSON(kPattern << MINKEY), BSON(kPattern << MAXKEY)}, + UUID uuid = UUID::gen()) { const OID epoch = OID::gen(); auto rt = RoutingTableHistory::makeNew( kNss, - UUID::gen(), - kShardKeyPattern, + uuid, + shardKeyPattern, nullptr, false, epoch, - {ChunkType{kNss, - ChunkRange{BSON(kPattern << MINKEY), BSON(kPattern << MAXKEY)}, - ChunkVersion(1, 0, epoch), - kOtherShard}}); + {ChunkType{kNss, range, ChunkVersion(1, 0, epoch), kOtherShard}}); std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, boost::none); @@ -103,11 +103,16 @@ protected: */ static CollectionMetadata cloneMetadataPlusChunk(const ScopedCollectionDescription& collDesc, const ChunkRange& range) { + return cloneMetadataPlusChunk(collDesc.get(), range); + } + + static CollectionMetadata cloneMetadataPlusChunk(const CollectionMetadata& collMetadata, + const ChunkRange& range) { const BSONObj& minKey = range.getMin(); const BSONObj& maxKey = range.getMax(); - ASSERT(!rangeMapOverlaps(collDesc->getChunks(), minKey, maxKey)); + ASSERT(!rangeMapOverlaps(collMetadata.getChunks(), minKey, maxKey)); - auto cm = collDesc->getChunkManager(); + auto cm = collMetadata.getChunkManager(); const auto chunkToSplit = cm->findIntersectingChunkWithSimpleCollation(minKey); ASSERT_BSONOBJ_GTE(minKey, chunkToSplit.getMin()); @@ -179,6 +184,63 @@ TEST_F(MetadataManagerTest, CleanUpForMigrateIn) { ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse()); } +TEST_F(MetadataManagerTest, + ChunkInReceivingChunksListIsRemovedAfterShardKeyRefineIfMigrationSucceeded) { + _manager->setFilteringMetadata(makeEmptyMetadata()); + + // Simulate receiving a range. This will add an item to _receivingChunks. + ChunkRange range(BSON("key" << 0), BSON("key" << 10)); + auto notif1 = _manager->beginReceive(range); + + ASSERT_EQ(_manager->numberOfReceivingChunks(), 1); + + // Simulate a situation in which the migration completes, and then the shard key is refined, + // before this shard discovers the updated metadata. + auto uuid = _manager->getActiveMetadata(boost::none)->getChunkManager()->getUUID().get(); + ChunkRange refinedRange(BSON("key" << 0 << "other" << MINKEY), + BSON("key" << 10 << "other" << MINKEY)); + auto refinedMetadata = makeEmptyMetadata(BSON(kPattern << 1 << "other" << 1), + ChunkRange(BSON("key" << MINKEY << "other" << MINKEY), + BSON("key" << MAXKEY << "other" << MAXKEY)), + uuid); + + // Set the updated chunk map on the MetadataManager. + _manager->setFilteringMetadata(cloneMetadataPlusChunk(refinedMetadata, refinedRange)); + // Because the refined range overlaps with the received range (pre-refine), this should remove + // the item in _receivingChunks. + ASSERT_EQ(_manager->numberOfReceivingChunks(), 0); +} + +TEST_F(MetadataManagerTest, + ChunkInReceivingChunksListIsNotRemovedAfterShardKeyRefineIfNonOverlappingRangeIsReceived) { + _manager->setFilteringMetadata(makeEmptyMetadata()); + + // Simulate receiving a range. This will add an item to _receivingChunks. + ChunkRange range(BSON("key" << 0), BSON("key" << 10)); + auto notif1 = _manager->beginReceive(range); + ASSERT_EQ(_manager->numberOfReceivingChunks(), 1); + + // Simulate a situation in which the shard key is refined and this shard discovers + // updated metadata where it owns some range that does not overlap with the range being migrated + // in. + auto uuid = _manager->getActiveMetadata(boost::none)->getChunkManager()->getUUID().get(); + ChunkRange refinedNonOverlappingRange(BSON("key" << -10 << "other" << MINKEY), + BSON("key" << 0 << "other" << MINKEY)); + + auto refinedMetadata = makeEmptyMetadata(BSON(kPattern << 1 << "other" << 1), + ChunkRange(BSON("key" << MINKEY << "other" << MINKEY), + BSON("key" << MAXKEY << "other" << MAXKEY)), + uuid); + + // Set the updated chunk map on the MetadataManager. + _manager->setFilteringMetadata( + cloneMetadataPlusChunk(refinedMetadata, refinedNonOverlappingRange)); + + // Because the refined range does not overlap with the received range (pre-refine), this should + // NOT remove the item in _receivingChunks. + ASSERT_EQ(_manager->numberOfReceivingChunks(), 1); +} + TEST_F(MetadataManagerTest, TrackOrphanedDataCleanupBlocksOnScheduledRangeDeletions) { ChunkRange cr1(BSON("key" << 0), BSON("key" << 10)); diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index bea8ac3aa30..19de4193563 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -809,6 +809,7 @@ void MigrationDestinationManager::_migrateThread() { stdx::lock_guard<Latch> lk(_mutex); _sessionId.reset(); + _collUuid.reset(); _scopedReceiveChunk.reset(); _isActiveCV.notify_all(); } @@ -892,6 +893,9 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { // Synchronously delete any data which might have been left orphaned in the range // being moved, and wait for completion + // Needed for _forgetPending to make sure the collection has the same UUID at the end of + // an aborted migration as at the beginning. Must be set before calling _notePending. + _collUuid = donorCollectionOptionsAndIndexes.uuid; auto cleanupCompleteFuture = _notePending(outerOpCtx, range); auto cleanupStatus = cleanupCompleteFuture.getNoThrow(outerOpCtx); // Wait for the range deletion to report back. Swallow @@ -1356,8 +1360,9 @@ SharedSemiFuture<void> MigrationDestinationManager::_notePending(OperationContex auto* const css = CollectionShardingRuntime::get(opCtx, _nss); const auto optMetadata = css->getCurrentMetadataIfKnown(); - // This can currently happen because drops aren't synchronized with in-migrations. The idea for - // checking this here is that in the future we shouldn't have this problem. + // This can currently happen because drops and shard key refine operations aren't guaranteed to + // be synchronized with in-migrations. The idea for checking this here is that in the future we + // shouldn't have this problem. if (!optMetadata || !(*optMetadata)->isSharded() || (*optMetadata)->getCollVersion().epoch() != _epoch) { return Status{ErrorCodes::StaleShardVersion, @@ -1388,10 +1393,14 @@ void MigrationDestinationManager::_forgetPending(OperationContext* opCtx, ChunkR // This can currently happen because drops aren't synchronized with in-migrations. The idea for // checking this here is that in the future we shouldn't have this problem. + // + // _collUuid will always be set if _notePending was called, so if it is not set, there is no + // need to do anything. If it is set, we use it to ensure that the collection UUID has not + // changed since the beginning of migration. if (!optMetadata || !(*optMetadata)->isSharded() || - (*optMetadata)->getCollVersion().epoch() != _epoch) { + (_collUuid && !(*optMetadata)->uuidMatches(*_collUuid))) { LOGV2(22009, - "No need to forget pending chunk {range} because the epoch for {nss_ns} changed", + "No need to forget pending chunk {range} because the uuid for {nss_ns} changed", "range"_attr = redact(range.toString()), "nss_ns"_attr = _nss.ns()); return; diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h index ea322655509..403800427fd 100644 --- a/src/mongo/db/s/migration_destination_manager.h +++ b/src/mongo/db/s/migration_destination_manager.h @@ -210,6 +210,7 @@ private: LogicalSessionId _lsid; TxnNumber _txnNumber; NamespaceString _nss; + boost::optional<UUID> _collUuid; ConnectionString _fromShardConnString; ShardId _fromShard; ShardId _toShard; |