diff options
author | Jack Mulrow <jack.mulrow@mongodb.com> | 2020-05-13 23:17:29 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-15 02:02:45 +0000 |
commit | 7d4d1ebeaeee37d743ad65099702bf27a12d7d33 (patch) | |
tree | ef96ba2feafac17ef78d6b943911e63e101e80db /jstests | |
parent | d2faf5110c64448fa663963769a341df39e3b45c (diff) | |
download | mongo-7d4d1ebeaeee37d743ad65099702bf27a12d7d33.tar.gz |
SERVER-48198 Account for extended range bounds when recovering migration decision
(cherry picked from commit 9d8eb69d583b89682520ec58595e558d5f6cc9a2)
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js index d5139a88ce2..def5e94287c 100644 --- a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js +++ b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js @@ -179,6 +179,69 @@ function test(st, description, testBody) { hangDonorAtEndOfMigration.off(); }); + test(st, + "Migration recovery recovers correct decision for migration committed before shard key " + + "refine", + () => { + // Enable a failpoint that makes the migration donor hang before making a decision and + // begin a migration that hits this failpoint. + let hangBeforeWritingDecisionFailpoint = + configureFailPoint(st.rs0.getPrimary(), "hangBeforeMakingCommitDecisionDurable"); + const parallelMoveChunk = startParallelShell( + funWithArgs(function(ns, shardKeyValueInChunk, toShardName) { + assert.commandFailedWithCode( + db.adminCommand( + {moveChunk: ns, find: shardKeyValueInChunk, to: toShardName}), + ErrorCodes.InterruptedDueToReplStateChange); + }, ns, shardKeyValueInChunk, st.shard1.shardName), st.s.port); + + jsTestLog("Waiting for the migration to hang before writing a decision"); + hangBeforeWritingDecisionFailpoint.wait(); + + // Step up a new primary, which will interrupt the migration and trigger the migration + // recovery process. Set a failpoint on the new primary that will pause the recovery + // before it can load the latest metadata. + jsTestLog("Stepping up a new primary"); + const newPrimary = st.rs0.getSecondary(); + let hangInMigrationRecoveryFailpoint = + configureFailPoint(newPrimary, "hangBeforeFilteringMetadataRefresh"); + assert.commandWorked(newPrimary.adminCommand({replSetStepUp: 1})); + st.rs0.waitForState(newPrimary, ReplSetTest.State.PRIMARY); + st.rs0.awaitNodesAgreeOnPrimary(); + + jsTestLog("Waiting for the new primary to hang in migration recovery"); + hangInMigrationRecoveryFailpoint.wait(); + + // Clean up the failpoint on the old primary. + hangBeforeWritingDecisionFailpoint.off(); + + // Refine the collection's shard key while the recovery task is hung. + jsTestLog("Refining the shard key"); + assert.commandWorked(st.s.getCollection(ns).createIndex({x: 1, y: 1, z: 1})); + assert.commandWorked( + st.s.adminCommand({refineCollectionShardKey: ns, key: {x: 1, y: 1, z: 1}})); + + // Allow the recovery to continue by disabling the failpoint and verify that despite + // the recovered migration having fewer fields in its bounds than in the current shard + // key, the decision should be recovered successfully and orphans should be removed + // from the donor. + jsTestLog("Waiting for orphans to be removed from shard 0"); + hangInMigrationRecoveryFailpoint.off(); + assert.soon(() => { + return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0; + }); + + // Verify we can move the chunk back to the original donor once the orphans are gone. + awaitRSClientHosts( + st.rs1.getPrimary(), st.rs0.getPrimary(), {ok: true, ismaster: true}); + assert.commandWorked(st.s.adminCommand({ + moveChunk: ns, + find: {x: 1, y: 1, z: 1}, + to: st.shard0.shardName, + _waitForDelete: true + })); + }); + // This test was created to reproduce a specific bug, which is why it may sound like an odd // thing to test. See SERVER-46386 for more details. test(st, |