SERVER-46370 Maintain _receivingChunks list correctly after shard key refine

author: Matthew Saltz <matthew.saltz@mongodb.com> 2020-03-16 15:56:11 -0400
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-03-26 22:51:39 +0000
commit: c5eea7753b2fe3082d853ff9400117c85ac42dab (patch)
tree: 3a0b10e16e5f02b64341783299ee9ccfe67eb9b9
parent: 491ab3f67681e83f4184f4ffce07c6c53d9441d9 (diff)
download: mongo-c5eea7753b2fe3082d853ff9400117c85ac42dab.tar.gz
7 files changed, 427 insertions, 141 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
index e23481be51c..4be1df1d525 100644
--- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
@@ -50,6 +50,7 @@ selector:
   - jstests/sharding/prefix_shard_key.js
   - jstests/sharding/presplit.js
   - jstests/sharding/query_config.js
+  - jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
   - jstests/sharding/remove1.js
   - jstests/sharding/rename_across_mongos.js
   - jstests/sharding/shard1.js
diff --git a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
index d5ed8b095ef..5c3c22a52c9 100644
--- a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
+++ b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
@@ -13,7 +13,8 @@ load('jstests/libs/parallel_shell_helpers.js');
 TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
 
 const dbName = "test";
-const ns = dbName + ".foo";
+const collName = "foo";
+const ns = dbName + "." + collName;
 
 const originalShardKey = {
     x: 1
@@ -33,9 +34,7 @@ const refinedShardKeyValueInChunk = {
     y: 1
 };
 
-const st = new ShardingTest({shards: {rs0: {nodes: 3}, rs1: {nodes: 3}}});
-
-function setUp() {
+function setUp(st) {
     // Create a sharded collection with two chunk on shard0, split at key {x: -1}.
     assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
     assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName}));
@@ -48,7 +47,7 @@ function setUp() {
     }
 }
 
-function tearDown() {
+function tearDown(st) {
     st.s.getCollection(ns).drop();
 }
 
@@ -56,165 +55,369 @@ function tearDown() {
  * Generic function to run a test. 'description' is a description of the test for logging
  * purposes and 'testBody' is the test function.
  */
-function test(description, testBody) {
+function test(st, description, testBody) {
     jsTest.log(`Running Test Setup: ${description}`);
-    setUp();
+    setUp(st);
     jsTest.log(`Running Test Body: ${description}`);
     testBody();
     jsTest.log(`Running Test Tear-Down: ${description}`);
-    tearDown();
+    tearDown(st);
     jsTest.log(`Finished Running Test: ${description}`);
 }
 
-test("Refining the shard key does not prevent removal of orphaned documents", () => {
-    // Enable failpoint which will cause range deletion to hang indefinitely.
-    let suspendRangeDeletionFailpoint =
-        configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+// Tests with resumable range deleter enabled.
+(() => {
+    const st = new ShardingTest({shards: {rs0: {nodes: 3}, rs1: {nodes: 3}}});
+    test(st,
+         "Refining the shard key does not prevent removal of orphaned documents on a donor" +
+             " shard after a successful migration",
+         () => {
+             // Enable failpoint which will cause range deletion to hang indefinitely.
+             let suspendRangeDeletionFailpoint =
+                 configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+
+             // Note that _waitForDelete has to be absent/false since we're suspending range
+             // deletion.
+             assert.commandWorked(st.s.adminCommand(
+                 {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+
+             jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
 
-    // Note that _waitForDelete has to be absent/false since we're suspending range deletion.
-    assert.commandWorked(
-        st.s.adminCommand({moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+             suspendRangeDeletionFailpoint.wait();
 
-    jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+             jsTestLog("Refining the shard key");
 
-    suspendRangeDeletionFailpoint.wait();
+             // Create an index on the refined shard key.
+             assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
 
-    jsTestLog("Refining the shard key");
+             // Refine the shard key from just the field 'x' to 'x' and 'y'.
+             assert.commandWorked(
+                 st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
 
-    // Create an index on the refined shard key.
-    assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+             // The index on the original shard key shouldn't be required anymore.
+             assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
 
-    // Refine the shard key from just the field 'x' to 'x' and 'y'.
-    assert.commandWorked(st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+             // Allow range deletion to continue.
+             suspendRangeDeletionFailpoint.off();
 
-    // The index on the original shard key shouldn't be required anymore.
-    assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+             jsTestLog("Waiting for orphans to be removed from shard 0");
+
+             // The range deletion should eventually succeed in the background.
+             assert.soon(() => {
+                 return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+             });
+         });
 
-    // Allow range deletion to continue.
-    suspendRangeDeletionFailpoint.off();
+    test(st,
+         "Chunks with a refined shard key cannot migrate back onto a shard with " +
+             "an orphaned range created with the prior shard key",
+         () => {
+             // Enable failpoint which will cause range deletion to hang indefinitely.
+             let suspendRangeDeletionFailpoint =
+                 configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+
+             // Note that _waitForDelete has to be absent/false since we're suspending range
+             // deletion.
+             assert.commandWorked(st.s.adminCommand(
+                 {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+
+             jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+
+             suspendRangeDeletionFailpoint.wait();
+
+             jsTestLog("Refining the shard key");
+
+             // Create an index on the refined shard key.
+             assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+
+             // Refine the shard key from just the field 'x' to 'x' and 'y'.
+             assert.commandWorked(
+                 st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+
+             // The index on the original shard key shouldn't be required anymore.
+             assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+
+             // We will use this to wait until the following migration has completed, since we
+             // expect the client to time out. Waiting for this failpoint technically just waits for
+             // the recipient side of the migration to complete, but it's expected that if the
+             // migration can get to that point, then it should be able to succeed overall.
+             let hangDonorAtEndOfMigration =
+                 configureFailPoint(st.rs1.getPrimary(), "moveChunkHangAtStep6");
+
+             jsTestLog("Attempting to move the chunk back to shard 0");
+             // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there
+             // will be orphaned documents still on shard 0 (because range deletion is paused), we
+             // expected this command to time out.
+             assert.commandFailedWithCode(st.s.adminCommand({
+                 moveChunk: ns,
+                 find: refinedShardKeyValueInChunk,
+                 to: st.shard0.shardName,
+                 maxTimeMS: 1000
+             }),
+                                          ErrorCodes.MaxTimeMSExpired);
+
+             // Hang after waiting for orphan cleanup so that in the test we can check for orphans
+             // on disk before documents begin migrating.
+             let hangRecipient =
+                 configureFailPoint(st.rs0.getPrimary(), "migrateThreadHangAtStep1");
+
+             // Allow range deletion to continue.
+             suspendRangeDeletionFailpoint.off();
+
+             jsTestLog("Waiting for orphans to be removed from shard 0");
+
+             // The range deletion should eventually succeed in the background.
+             assert.soon(() => {
+                 return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+             });
+
+             hangRecipient.off();
+
+             // Wait for the previous migration to complete before continuing.
+             hangDonorAtEndOfMigration.wait();
+             hangDonorAtEndOfMigration.off();
+         });
 
-    jsTestLog("Waiting for orphans to be removed from shard 0");
+    // This test was created to reproduce a specific bug, which is why it may sound like an odd
+    // thing to test. See SERVER-46386 for more details.
+    test(st,
+         "Range deletion tasks created prior to refining the shard key do not " +
+             "conflict with non-overlapping ranges once the shard key is refined",
+         () => {
+             // Enable failpoint which will cause range deletion to hang indefinitely.
+             let suspendRangeDeletionFailpoint =
+                 configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
 
-    // The range deletion should eventually succeed in the background.
-    assert.soon(() => {
-        return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
-    });
-});
+             // Note that _waitForDelete has to be absent/false since we're suspending range
+             // deletion.
+             assert.commandWorked(st.s.adminCommand(
+                 {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
 
-test("Chunks with a refined shard key cannot migrate back onto a shard with " +
-         "orphaned documents created with the prior shard key",
-     () => {
-         // Enable failpoint which will cause range deletion to hang indefinitely.
-         let suspendRangeDeletionFailpoint =
-             configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+             jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
 
-         // Note that _waitForDelete has to be absent/false since we're suspending range
-         // deletion.
-         assert.commandWorked(st.s.adminCommand(
-             {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+             suspendRangeDeletionFailpoint.wait();
 
-         jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+             jsTestLog("Refining the shard key");
 
-         suspendRangeDeletionFailpoint.wait();
+             // Create an index on the refined shard key.
+             assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
 
-         jsTestLog("Refining the shard key");
+             // Refine the shard key from just the field 'x' to 'x' and 'y'.
+             assert.commandWorked(
+                 st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
 
-         // Create an index on the refined shard key.
-         assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+             // The index on the original shard key shouldn't be required anymore.
+             assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
 
-         // Refine the shard key from just the field 'x' to 'x' and 'y'.
-         assert.commandWorked(
-             st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+             // Step down current primary.
+             assert.commandWorked(st.rs0.getPrimary().adminCommand(
+                 {replSetStepDown: ReplSetTest.kForeverSecs, force: 1}));
 
-         // The index on the original shard key shouldn't be required anymore.
-         assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+             // Allow range deletion to continue on old node. This isn't required for this test to
+             // proceed since we only care about the new primary, but it's worth cleaning up.
+             suspendRangeDeletionFailpoint.off();
 
-         jsTestLog("Attempting to move the chunk back to shard 0");
-         // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there
-         // will be orphaned documents still on shard 0 (because range deletion is paused), we
-         // expected this command to time out.
-         const awaitResult = startParallelShell(
-             funWithArgs(function(ns, refinedShardKeyValueInChunk, toShardName) {
-                 assert.commandFailedWithCode(db.adminCommand({
-                     moveChunk: ns,
-                     find: refinedShardKeyValueInChunk,
-                     to: toShardName,
-                     maxTimeMS: 1000
-                 }),
-                                              ErrorCodes.MaxTimeMSExpired);
-             }, ns, refinedShardKeyValueInChunk, st.shard0.shardName), st.s.port);
-         awaitResult();
+             jsTestLog("Waiting for orphans to be removed from shard 0");
 
-         // Allow range deletion to continue.
-         suspendRangeDeletionFailpoint.off();
+             // The range deletion should eventually succeed in the background on the new primary.
+             assert.soon(() => {
+                 return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+             });
 
-         jsTestLog("Waiting for orphans to be removed from shard 0");
+             // Wait for the donor to learn about the new primary on the recipient.
+             awaitRSClientHosts(
+                 st.rs1.getPrimary(), st.rs0.getPrimary(), {ok: true, ismaster: true});
 
-         // The range deletion should eventually succeed in the background.
-         assert.soon(() => {
-             return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+             // We should be able to move the chunk back to shard 0 now that orphans are gone.
+             assert.commandWorked(st.s.adminCommand({
+                 moveChunk: ns,
+                 find: refinedShardKeyValueInChunk,
+                 to: st.shard0.shardName,
+                 _waitForDelete: true
+             }));
          });
 
-         // Moving the chunk back to shard 0 should now succeed.
-         assert.commandWorked(st.s.adminCommand(
-             {moveChunk: ns, find: refinedShardKeyValueInChunk, to: st.shard0.shardName}));
-     });
+    st.stop();
+})();
 
-// This test was created to reproduce a specific bug, which is why it may sound like an odd thing to
-// test. See SERVER-46386 for more details.
-test("Range deletion tasks created prior to refining the shard key do not " +
-         "conflict with non-overlapping ranges once the shard key is refined",
-     () => {
-         // Enable failpoint which will cause range deletion to hang indefinitely.
-         let suspendRangeDeletionFailpoint =
-             configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+// Tests with resumable range deleter disabled.
+(() => {
+    const st = new ShardingTest(
+        {shards: 2, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
 
-         // Note that _waitForDelete has to be absent/false since we're suspending range deletion.
-         assert.commandWorked(st.s.adminCommand(
-             {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+    test(st,
+         "Refining the shard key does not prevent removal of orphaned documents on a donor" +
+             " shard after a successful migration",
+         () => {
+             // Enable failpoint which will cause range deletion to hang indefinitely.
+             let suspendRangeDeletionFailpoint =
+                 configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
 
-         jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+             // Note that _waitForDelete has to be absent/false since we're suspending range
+             // deletion.
+             assert.commandWorked(st.s.adminCommand(
+                 {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
 
-         suspendRangeDeletionFailpoint.wait();
+             jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
 
-         jsTestLog("Refining the shard key");
+             suspendRangeDeletionFailpoint.wait();
 
-         // Create an index on the refined shard key.
-         assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+             jsTestLog("Refining the shard key");
 
-         // Refine the shard key from just the field 'x' to 'x' and 'y'.
-         assert.commandWorked(
-             st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+             // Create an index on the refined shard key.
+             assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
 
-         // The index on the original shard key shouldn't be required anymore.
-         assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+             // Refine the shard key from just the field 'x' to 'x' and 'y'.
+             assert.commandWorked(
+                 st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
 
-         // Step down current primary.
-         assert.commandWorked(st.rs0.getPrimary().adminCommand(
-             {replSetStepDown: ReplSetTest.kForeverSecs, force: 1}));
+             // The index on the original shard key shouldn't be required anymore.
+             assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
 
-         // Allow range deletion to continue on old node. This isn't required for this test to
-         // proceed since we only care about the new primary, but it's worth cleaning up.
-         suspendRangeDeletionFailpoint.off();
+             // Allow range deletion to continue.
+             suspendRangeDeletionFailpoint.off();
 
-         jsTestLog("Waiting for orphans to be removed from shard 0");
+             jsTestLog("Waiting for orphans to be removed from shard 0");
 
-         // The range deletion should eventually succeed in the background on the new primary.
-         assert.soon(() => {
-             return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+             // The range deletion should eventually succeed in the background.
+             assert.soon(() => {
+                 return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+             });
          });
 
-         // Wait for the donor to learn about the new primary on the recipient.
-         awaitRSClientHosts(st.rs1.getPrimary(), st.rs0.getPrimary(), {ok: true, ismaster: true});
+    test(st,
+         "Chunks with a refined shard key cannot migrate back onto a shard with " +
+             "an orphaned range created with the prior shard key",
+         () => {
+             // Enable failpoint which will cause range deletion to hang indefinitely.
+             let suspendRangeDeletionFailpoint =
+                 configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+
+             // Note that _waitForDelete has to be absent/false since we're suspending range
+             // deletion.
+             assert.commandWorked(st.s.adminCommand(
+                 {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
+
+             jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
+
+             suspendRangeDeletionFailpoint.wait();
+
+             jsTestLog("Refining the shard key");
+
+             // Create an index on the refined shard key.
+             assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+
+             // Refine the shard key from just the field 'x' to 'x' and 'y'.
+             assert.commandWorked(
+                 st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+
+             // The index on the original shard key shouldn't be required anymore.
+             assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+
+             // We will use this to wait until the following migration has completed, since we
+             // expect the client to time out. Waiting for this failpoint technically just waits for
+             // the recipient side of the migration to complete, but it's expected that if the
+             // migration can get to that point, then it should be able to succeed overall.
+             let hangDonorAtEndOfMigration =
+                 configureFailPoint(st.rs1.getPrimary(), "moveChunkHangAtStep6");
+
+             jsTestLog("Attempting to move the chunk back to shard 0");
+
+             // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there
+             // will be orphaned documents still on shard 0 (because range deletion is paused), we
+             // expected this command to time out. This will NOT fail the migration, however, since
+             // that occurs in a background OperationContext.
+             assert.commandFailedWithCode(st.s.adminCommand({
+                 moveChunk: ns,
+                 find: refinedShardKeyValueInChunk,
+                 to: st.shard0.shardName,
+                 maxTimeMS: 1000
+             }),
+                                          ErrorCodes.MaxTimeMSExpired);
+
+             // Hang after waiting for orphan cleanup so that in the test we can check for orphans
+             // on disk before documents begin migrating.
+             let hangRecipient =
+                 configureFailPoint(st.rs0.getPrimary(), "migrateThreadHangAtStep1");
+
+             // Allow range deletion to continue.
+             suspendRangeDeletionFailpoint.off();
+
+             jsTestLog("Waiting for orphans to be removed from shard 0");
+
+             // The range deletion should eventually succeed in the background.
+             assert.soon(() => {
+                 return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
+             });
+
+             hangRecipient.off();
+
+             // Wait for the previous migration to complete before continuing.
+             hangDonorAtEndOfMigration.wait();
+             hangDonorAtEndOfMigration.off();
+
+             // TODO (SERVER-47003): There will be a left-over entry in config.migrations after the
+             // previous moveChunk fails with MaxTimeMSExpired, so we drop the collection. Otherwise
+             // future migrations would receive a DuplicateKeyError when trying to update
+             // config.migrations.
+             st.config.getSiblingDB('config').migrations.drop();
+         });
 
-         // We should be able to move the chunk back to shard 0 now that orphans are gone.
-         assert.commandWorked(st.s.adminCommand({
-             moveChunk: ns,
-             find: refinedShardKeyValueInChunk,
-             to: st.shard0.shardName,
-             _waitForDelete: true
-         }));
-     });
+    test(st,
+         "Refining the shard key does not prevent removal of orphaned documents on a recipient" +
+             " shard after a failed migration",
+         () => {
+             let hangRecipientAfterCloningDocuments =
+                 configureFailPoint(st.rs1.getPrimary(), "migrateThreadHangAtStep3");
+
+             // Attempt to move the chunk to shard 1. This will clone all documents from shard 0 to
+             // shard 1 and then block behind the hangRecipientAfterCloningDocuments failpoint.
+             // Then, when the index is created on the refined shard key, the migration will be
+             // interrupted, causing it to fail with error code Interrupted.
+             const awaitResult =
+                 startParallelShell(funWithArgs(function(ns, shardKeyValueInChunk, toShardName) {
+                                        assert.commandFailedWithCode(db.adminCommand({
+                                            moveChunk: ns,
+                                            find: shardKeyValueInChunk,
+                                            to: toShardName,
+                                            _waitForDelete: true
+                                        }),
+                                                                     ErrorCodes.Interrupted);
+                                        jsTestLog("Recipient failed in parallel shell");
+                                    }, ns, shardKeyValueInChunk, st.shard1.shardName), st.s.port);
+
+             jsTestLog("Waiting for recipient to finish cloning documents");
+
+             hangRecipientAfterCloningDocuments.wait();
+
+             jsTestLog("Refining the shard key");
+
+             // Create an index on the refined shard key.
+             assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
+
+             // Refine the shard key from just the field 'x' to 'x' and 'y'.
+             assert.commandWorked(
+                 st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
+
+             // The index on the original shard key shouldn't be required anymore.
+             assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
+
+             // Turn off failpoint and wait for recipient to fail.
+             hangRecipientAfterCloningDocuments.off();
+             awaitResult();
+
+             // TODO (SERVER-47025): Without creating this index, the range deleter will hang
+             // indefinitely looking for a shard key index.
+             assert.commandWorked(st.shard1.getCollection(ns).createIndex(refinedShardKey));
+
+             jsTestLog("Waiting for orphans to be removed from shard 1");
+
+             // The range deletion should eventually succeed in the background on the recipient.
+             assert.soon(() => {
+                 return st.rs1.getPrimary().getCollection(ns).find().itcount() == 0;
+             });
+         });
 
-st.stop();
+    st.stop();
+})();
 })();
diff --git a/src/mongo/db/s/metadata_manager.cpp b/src/mongo/db/s/metadata_manager.cpp
index dc5adf82c97..73736707a67 100644
--- a/src/mongo/db/s/metadata_manager.cpp
+++ b/src/mongo/db/s/metadata_manager.cpp
@@ -57,13 +57,8 @@ using CallbackArgs = TaskExecutor::CallbackArgs;
  * Returns whether the given metadata object has a chunk owned by this shard that overlaps the
  * input range.
  */
-bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata,
-                           const ChunkRange& range) {
-    if (!metadata) {
-        return false;
-    }
-
-    auto metadataShardKeyPattern = KeyPattern(metadata->getKeyPattern());
+bool metadataOverlapsRange(const CollectionMetadata& metadata, const ChunkRange& range) {
+    auto metadataShardKeyPattern = KeyPattern(metadata.getKeyPattern());
 
     // If the input range is shorter than the range in the ChunkManager inside
     // 'metadata', we must extend its bounds to get a correct comparison. If the input
@@ -102,7 +97,15 @@ bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata,
         }
     }();
 
-    return metadata->rangeOverlapsChunk(chunkRangeToCompareToMetadata);
+    return metadata.rangeOverlapsChunk(chunkRangeToCompareToMetadata);
+}
+
+bool metadataOverlapsRange(const boost::optional<CollectionMetadata>& metadata,
+                           const ChunkRange& range) {
+    if (!metadata) {
+        return false;
+    }
+    return metadataOverlapsRange(metadata.get(), range);
 }
 
 }  // namespace
@@ -240,7 +243,7 @@ void MetadataManager::setFilteringMetadata(CollectionMetadata remoteMetadata) {
     for (auto it = _receivingChunks.begin(); it != _receivingChunks.end();) {
         const ChunkRange receivingRange(it->first, it->second);
 
-        if (!remoteMetadata.rangeOverlapsChunk(receivingRange)) {
+        if (!metadataOverlapsRange(remoteMetadata, receivingRange)) {
             ++it;
             continue;
         }
diff --git a/src/mongo/db/s/metadata_manager.h b/src/mongo/db/s/metadata_manager.h
index bd2c0189742..27049e16490 100644
--- a/src/mongo/db/s/metadata_manager.h
+++ b/src/mongo/db/s/metadata_manager.h
@@ -107,6 +107,13 @@ public:
     void toBSONPending(BSONArrayBuilder& bb) const;
 
     /**
+     * Returns the number of items in the _receivingChunks list. Useful for unit tests.
+     */
+    size_t numberOfReceivingChunks() {
+        return _receivingChunks.size();
+    }
+
+    /**
      * Appends information on all the chunk ranges in rangesToClean to builder.
      */
     void append(BSONObjBuilder* builder) const;
diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp
index 58c663c4a4f..57947948fcf 100644
--- a/src/mongo/db/s/metadata_manager_test.cpp
+++ b/src/mongo/db/s/metadata_manager_test.cpp
@@ -73,20 +73,20 @@ protected:
     /**
      * Returns an instance of CollectionMetadata which has no chunks owned by 'thisShard'.
      */
-    static CollectionMetadata makeEmptyMetadata() {
+    static CollectionMetadata makeEmptyMetadata(
+        const KeyPattern& shardKeyPattern = kShardKeyPattern,
+        const ChunkRange& range = ChunkRange{BSON(kPattern << MINKEY), BSON(kPattern << MAXKEY)},
+        UUID uuid = UUID::gen()) {
         const OID epoch = OID::gen();
 
         auto rt = RoutingTableHistory::makeNew(
             kNss,
-            UUID::gen(),
-            kShardKeyPattern,
+            uuid,
+            shardKeyPattern,
             nullptr,
             false,
             epoch,
-            {ChunkType{kNss,
-                       ChunkRange{BSON(kPattern << MINKEY), BSON(kPattern << MAXKEY)},
-                       ChunkVersion(1, 0, epoch),
-                       kOtherShard}});
+            {ChunkType{kNss, range, ChunkVersion(1, 0, epoch), kOtherShard}});
 
         std::shared_ptr<ChunkManager> cm = std::make_shared<ChunkManager>(rt, boost::none);
 
@@ -103,11 +103,16 @@ protected:
      */
     static CollectionMetadata cloneMetadataPlusChunk(const ScopedCollectionDescription& collDesc,
                                                      const ChunkRange& range) {
+        return cloneMetadataPlusChunk(collDesc.get(), range);
+    }
+
+    static CollectionMetadata cloneMetadataPlusChunk(const CollectionMetadata& collMetadata,
+                                                     const ChunkRange& range) {
         const BSONObj& minKey = range.getMin();
         const BSONObj& maxKey = range.getMax();
-        ASSERT(!rangeMapOverlaps(collDesc->getChunks(), minKey, maxKey));
+        ASSERT(!rangeMapOverlaps(collMetadata.getChunks(), minKey, maxKey));
 
-        auto cm = collDesc->getChunkManager();
+        auto cm = collMetadata.getChunkManager();
 
         const auto chunkToSplit = cm->findIntersectingChunkWithSimpleCollation(minKey);
         ASSERT_BSONOBJ_GTE(minKey, chunkToSplit.getMin());
@@ -179,6 +184,63 @@ TEST_F(MetadataManagerTest, CleanUpForMigrateIn) {
     ASSERT_EQ(0UL, _manager->numberOfRangesToCleanStillInUse());
 }
 
+TEST_F(MetadataManagerTest,
+       ChunkInReceivingChunksListIsRemovedAfterShardKeyRefineIfMigrationSucceeded) {
+    _manager->setFilteringMetadata(makeEmptyMetadata());
+
+    // Simulate receiving a range. This will add an item to _receivingChunks.
+    ChunkRange range(BSON("key" << 0), BSON("key" << 10));
+    auto notif1 = _manager->beginReceive(range);
+
+    ASSERT_EQ(_manager->numberOfReceivingChunks(), 1);
+
+    // Simulate a situation in which the migration completes, and then the shard key is refined,
+    // before this shard discovers the updated metadata.
+    auto uuid = _manager->getActiveMetadata(boost::none)->getChunkManager()->getUUID().get();
+    ChunkRange refinedRange(BSON("key" << 0 << "other" << MINKEY),
+                            BSON("key" << 10 << "other" << MINKEY));
+    auto refinedMetadata = makeEmptyMetadata(BSON(kPattern << 1 << "other" << 1),
+                                             ChunkRange(BSON("key" << MINKEY << "other" << MINKEY),
+                                                        BSON("key" << MAXKEY << "other" << MAXKEY)),
+                                             uuid);
+
+    // Set the updated chunk map on the MetadataManager.
+    _manager->setFilteringMetadata(cloneMetadataPlusChunk(refinedMetadata, refinedRange));
+    // Because the refined range overlaps with the received range (pre-refine), this should remove
+    // the item in _receivingChunks.
+    ASSERT_EQ(_manager->numberOfReceivingChunks(), 0);
+}
+
+TEST_F(MetadataManagerTest,
+       ChunkInReceivingChunksListIsNotRemovedAfterShardKeyRefineIfNonOverlappingRangeIsReceived) {
+    _manager->setFilteringMetadata(makeEmptyMetadata());
+
+    // Simulate receiving a range. This will add an item to _receivingChunks.
+    ChunkRange range(BSON("key" << 0), BSON("key" << 10));
+    auto notif1 = _manager->beginReceive(range);
+    ASSERT_EQ(_manager->numberOfReceivingChunks(), 1);
+
+    // Simulate a situation in which the shard key is refined and this shard discovers
+    // updated metadata where it owns some range that does not overlap with the range being migrated
+    // in.
+    auto uuid = _manager->getActiveMetadata(boost::none)->getChunkManager()->getUUID().get();
+    ChunkRange refinedNonOverlappingRange(BSON("key" << -10 << "other" << MINKEY),
+                                          BSON("key" << 0 << "other" << MINKEY));
+
+    auto refinedMetadata = makeEmptyMetadata(BSON(kPattern << 1 << "other" << 1),
+                                             ChunkRange(BSON("key" << MINKEY << "other" << MINKEY),
+                                                        BSON("key" << MAXKEY << "other" << MAXKEY)),
+                                             uuid);
+
+    // Set the updated chunk map on the MetadataManager.
+    _manager->setFilteringMetadata(
+        cloneMetadataPlusChunk(refinedMetadata, refinedNonOverlappingRange));
+
+    // Because the refined range does not overlap with the received range (pre-refine), this should
+    // NOT remove the item in _receivingChunks.
+    ASSERT_EQ(_manager->numberOfReceivingChunks(), 1);
+}
+
 TEST_F(MetadataManagerTest, TrackOrphanedDataCleanupBlocksOnScheduledRangeDeletions) {
     ChunkRange cr1(BSON("key" << 0), BSON("key" << 10));
 
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index bea8ac3aa30..19de4193563 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -809,6 +809,7 @@ void MigrationDestinationManager::_migrateThread() {
 
     stdx::lock_guard<Latch> lk(_mutex);
     _sessionId.reset();
+    _collUuid.reset();
     _scopedReceiveChunk.reset();
     _isActiveCV.notify_all();
 }
@@ -892,6 +893,9 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
             // Synchronously delete any data which might have been left orphaned in the range
             // being moved, and wait for completion
 
+            // Needed for _forgetPending to make sure the collection has the same UUID at the end of
+            // an aborted migration as at the beginning. Must be set before calling _notePending.
+            _collUuid = donorCollectionOptionsAndIndexes.uuid;
             auto cleanupCompleteFuture = _notePending(outerOpCtx, range);
             auto cleanupStatus = cleanupCompleteFuture.getNoThrow(outerOpCtx);
             // Wait for the range deletion to report back. Swallow
@@ -1356,8 +1360,9 @@ SharedSemiFuture<void> MigrationDestinationManager::_notePending(OperationContex
     auto* const css = CollectionShardingRuntime::get(opCtx, _nss);
     const auto optMetadata = css->getCurrentMetadataIfKnown();
 
-    // This can currently happen because drops aren't synchronized with in-migrations. The idea for
-    // checking this here is that in the future we shouldn't have this problem.
+    // This can currently happen because drops and shard key refine operations aren't guaranteed to
+    // be synchronized with in-migrations. The idea for checking this here is that in the future we
+    // shouldn't have this problem.
     if (!optMetadata || !(*optMetadata)->isSharded() ||
         (*optMetadata)->getCollVersion().epoch() != _epoch) {
         return Status{ErrorCodes::StaleShardVersion,
@@ -1388,10 +1393,14 @@ void MigrationDestinationManager::_forgetPending(OperationContext* opCtx, ChunkR
 
     // This can currently happen because drops aren't synchronized with in-migrations. The idea for
     // checking this here is that in the future we shouldn't have this problem.
+    //
+    // _collUuid will always be set if _notePending was called, so if it is not set, there is no
+    // need to do anything. If it is set, we use it to ensure that the collection UUID has not
+    // changed since the beginning of migration.
     if (!optMetadata || !(*optMetadata)->isSharded() ||
-        (*optMetadata)->getCollVersion().epoch() != _epoch) {
+        (_collUuid && !(*optMetadata)->uuidMatches(*_collUuid))) {
         LOGV2(22009,
-              "No need to forget pending chunk {range} because the epoch for {nss_ns} changed",
+              "No need to forget pending chunk {range} because the uuid for {nss_ns} changed",
               "range"_attr = redact(range.toString()),
               "nss_ns"_attr = _nss.ns());
         return;
diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h
index ea322655509..403800427fd 100644
--- a/src/mongo/db/s/migration_destination_manager.h
+++ b/src/mongo/db/s/migration_destination_manager.h
@@ -210,6 +210,7 @@ private:
     LogicalSessionId _lsid;
     TxnNumber _txnNumber;
     NamespaceString _nss;
+    boost::optional<UUID> _collUuid;
     ConnectionString _fromShardConnString;
     ShardId _fromShard;
     ShardId _toShard;
author	Matthew Saltz <matthew.saltz@mongodb.com>	2020-03-16 15:56:11 -0400
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-03-26 22:51:39 +0000
commit	c5eea7753b2fe3082d853ff9400117c85ac42dab (patch)
tree	3a0b10e16e5f02b64341783299ee9ccfe67eb9b9
parent	491ab3f67681e83f4184f4ffce07c6c53d9441d9 (diff)
download	mongo-c5eea7753b2fe3082d853ff9400117c85ac42dab.tar.gz