diff options
author | Esha Maharishi <esha.maharishi@mongodb.com> | 2020-01-02 19:46:37 +0000 |
---|---|---|
committer | A. Jesse Jiryu Davis <jesse@mongodb.com> | 2020-01-27 15:37:56 -0500 |
commit | d764b3047694230666ebfe7b52adcd639273eb57 (patch) | |
tree | 1c531df833a8b08f8dc3666041bdd4f0f710178d /jstests | |
parent | e8d1a151ec5b195f701ac9b336c0a0afac7e8550 (diff) | |
download | mongo-d764b3047694230666ebfe7b52adcd639273eb57.tar.gz |
SERVER-44163 Plug MigrationCoordinator into MigrationSourceManager
Diffstat (limited to 'jstests')
-rw-r--r-- | jstests/sharding/convert_to_and_from_sharded.js | 5 | ||||
-rw-r--r-- | jstests/sharding/merge_with_drop_shard.js | 5 | ||||
-rw-r--r-- | jstests/sharding/migration_coordinator_basic.js | 203 | ||||
-rw-r--r-- | jstests/sharding/migration_waits_for_majority_commit.js | 30 |
4 files changed, 237 insertions, 6 deletions
diff --git a/jstests/sharding/convert_to_and_from_sharded.js b/jstests/sharding/convert_to_and_from_sharded.js index 8a347f752f0..d79cca15012 100644 --- a/jstests/sharding/convert_to_and_from_sharded.js +++ b/jstests/sharding/convert_to_and_from_sharded.js @@ -54,7 +54,10 @@ assert.commandWorked(priConn.getDB('test').sharded.insert({_id: 'marker'})); checkBasicCRUD(priConn.getDB('test').sharded); for (var x = 0; x < NUM_NODES; x++) { - replShard.restart(x, {shardsvr: ''}); + replShard.restart(x, { + shardsvr: '', + setParameter: {"failpoint.disableWritingPendingRangeDeletionEntries": "{mode: 'alwaysOn'}"} + }); } replShard.awaitNodesAgreeOnPrimary(); diff --git a/jstests/sharding/merge_with_drop_shard.js b/jstests/sharding/merge_with_drop_shard.js index 068c050c117..ecdd775d114 100644 --- a/jstests/sharding/merge_with_drop_shard.js +++ b/jstests/sharding/merge_with_drop_shard.js @@ -7,6 +7,11 @@ load("jstests/aggregation/extras/merge_helpers.js"); // For withEachMergeMode. const st = new ShardingTest({shards: 2, rs: {nodes: 1}}); +assert.commandWorked(st.shard0.adminCommand( + {configureFailPoint: "disableWritingPendingRangeDeletionEntries", mode: "alwaysOn"})); +assert.commandWorked(st.shard1.adminCommand( + {configureFailPoint: "disableWritingPendingRangeDeletionEntries", mode: "alwaysOn"})); + const mongosDB = st.s.getDB(jsTestName()); const sourceColl = mongosDB["source"]; const targetColl = mongosDB["target"]; diff --git a/jstests/sharding/migration_coordinator_basic.js b/jstests/sharding/migration_coordinator_basic.js new file mode 100644 index 00000000000..c459e5aa163 --- /dev/null +++ b/jstests/sharding/migration_coordinator_basic.js @@ -0,0 +1,203 @@ +/** + * Tests that a donor shard durably records a migration's state, inserts pending entries into its + * own and the recipient's config.rangeDeletions, and informs itself and the recipient of the + * migration's outcome by updating or deleting its own and the recipient's config.rangeDeletions + * entries for the migration. + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); +load('jstests/libs/parallel_shell_helpers.js'); + +function getNewNs(dbName) { + if (typeof getNewNs.counter == 'undefined') { + getNewNs.counter = 0; + } + getNewNs.counter++; + const collName = "ns" + getNewNs.counter; + return [collName, dbName + "." + collName]; +} + +const dbName = "test"; + +var st = new ShardingTest({shards: 2}); + +assert.commandWorked(st.s.adminCommand({enableSharding: dbName})); +assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName})); + +function getCollectionUuidAndEpoch(ns) { + const collectionDoc = st.s.getDB("config").getCollection("collections").findOne({_id: ns}); + assert.neq(null, collectionDoc); + assert.neq(null, collectionDoc.uuid); + assert.neq(null, collectionDoc.lastmodEpoch); + return [collectionDoc.uuid, collectionDoc.lastmodEpoch]; +} + +function assertHasMigrationCoordinatorDoc({conn, ns, uuid, epoch}) { + const query = { + lsid: {$exists: 1}, + txnNumber: {$exists: 1}, + nss: ns, + collectionUuid: uuid, + donorShardId: st.shard0.shardName, + recipientShardId: st.shard1.shardName, + "range.min._id": MinKey, + "range.max._id": MaxKey, + "preMigrationChunkVersion.0": Timestamp(1, 0), + "preMigrationChunkVersion.1": epoch + }; + assert.neq( + null, + conn.getDB("config").getCollection("migrationCoordinators").findOne(query), + "did not find document matching query " + tojson(query) + + ", contents of config.migrationCoordinators on " + conn + ": " + + tojson(conn.getDB("config").getCollection("migrationCoordinators").find().toArray())); +} + +function assertEventuallyDoesNotHaveMigrationCoordinatorDoc(conn) { + assert.soon(() => { + return 0 == conn.getDB("config").getCollection("migrationCoordinators").find().itcount(); + }); +} + +function assertHasRangeDeletionDoc({conn, pending, ns, uuid}) { + const query = { + nss: ns, + collectionUuid: uuid, + donorShardId: st.shard0.shardName, + "range.min._id": MinKey, + "range.max._id": MaxKey, + pending: (pending ? true : {$exists: false}), + whenToClean: "delayed" + }; + assert.neq(null, + conn.getDB("config").getCollection("rangeDeletions").findOne(query), + "did not find document matching query " + tojson(query) + + ", contents of config.rangeDeletions on " + conn + ": " + + tojson(conn.getDB("config").getCollection("rangeDeletions").find().toArray())); +} + +function assertEventuallyDoesNotHaveRangeDeletionDoc(conn) { + assert.soon(() => { + return 0 == conn.getDB("config").getCollection("rangeDeletions").find().itcount(); + }); +} + +(() => { + const [collName, ns] = getNewNs(dbName); + jsTest.log("Test end-to-end migration when migration commit succeeds, ns is " + ns); + + // Insert some docs into the collection. + const numDocs = 1000; + var bulk = st.s.getDB(dbName).getCollection(collName).initializeUnorderedBulkOp(); + for (var i = 0; i < numDocs; i++) { + bulk.insert({_id: i}); + } + assert.commandWorked(bulk.execute()); + + // Shard the collection. + assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}})); + const [uuid, epoch] = getCollectionUuidAndEpoch(ns); + + // Run the moveChunk asynchronously, pausing during cloning to allow the test to make + // assertions. + let step3Failpoint = configureFailPoint(st.shard0, "moveChunkHangAtStep3"); + const awaitResult = startParallelShell( + funWithArgs(function(ns, toShardName) { + assert.commandWorked(db.adminCommand({moveChunk: ns, find: {_id: 0}, to: toShardName})); + }, ns, st.shard1.shardName), st.s.port); + + // Assert that the durable state for coordinating the migration was written correctly. + step3Failpoint.wait(); + assertHasMigrationCoordinatorDoc({conn: st.shard0, ns, uuid, epoch}); + assertHasRangeDeletionDoc({conn: st.shard0, pending: true, whenToClean: "delayed", ns, uuid}); + // TODO (SERVER-45179): Add the FCV 4.4 behavior to the MigrationDestinationManager + // assertHasRangeDeletionDoc({conn: st.shard1, pending: true, whenToClean: "now", ns, uuid}); + step3Failpoint.off(); + + // Allow the moveChunk to finish. + awaitResult(); + + // Donor shard eventually cleans up the orphans. + assert.soon(function() { + return st.shard0.getDB(dbName).getCollection(collName).count() === 0; + }); + assert.eq(numDocs, st.s.getDB(dbName).getCollection(collName).find().itcount()); + + // The durable state for coordinating the migration is eventually cleaned up. + assertEventuallyDoesNotHaveMigrationCoordinatorDoc(st.shard0); + // TODO (SERVER-44159): Delete document from config.rangeDeletions when CollectionRangeDeleter + // finishes deleting a range + // assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard0); + assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard1); +})(); + +(() => { + const [collName, ns] = getNewNs(dbName); + jsTest.log("Test end-to-end migration when migration commit fails, ns is " + ns); + + // Insert some docs into the collection. + const numDocs = 1000; + var bulk = st.s.getDB(dbName).getCollection(collName).initializeUnorderedBulkOp(); + for (var i = 0; i < numDocs; i++) { + bulk.insert({_id: i}); + } + assert.commandWorked(bulk.execute()); + + // Shard the collection. + assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}})); + const [uuid, epoch] = getCollectionUuidAndEpoch(ns); + + // Turn on a failpoint to make the migration commit fail on the config server. + let migrationCommitVersionErrorFailpoint = + configureFailPoint(st.configRS.getPrimary(), "migrationCommitVersionError"); + + // Run the moveChunk asynchronously, pausing during cloning to allow the test to make + // assertions. + let step3Failpoint = configureFailPoint(st.shard0, "moveChunkHangAtStep3"); + let step5Failpoint = configureFailPoint(st.shard0, "moveChunkHangAtStep5"); + const awaitResult = startParallelShell( + funWithArgs(function(ns, toShardName) { + // Expect StaleEpoch because of the failpoint that will make the migration commit fail. + assert.commandFailedWithCode( + db.adminCommand({moveChunk: ns, find: {_id: 0}, to: toShardName}), + ErrorCodes.StaleEpoch); + }, ns, st.shard1.shardName), st.s.port); + + // Assert that the durable state for coordinating the migration was written correctly. + step3Failpoint.wait(); + assertHasMigrationCoordinatorDoc({conn: st.shard0, ns, uuid, epoch}); + assertHasRangeDeletionDoc({conn: st.shard0, pending: true, whenToClean: "delayed", ns, uuid}); + // TODO (SERVER-45179): Add the FCV 4.4 behavior to the MigrationDestinationManager + // assertHasRangeDeletionDoc({conn: st.shard1, pending: true, whenToClean: "now", ns, uuid}); + step3Failpoint.off(); + + // Assert that the recipient has 'numDocs' orphans. + step5Failpoint.wait(); + assert.eq(numDocs, st.shard1.getDB(dbName).getCollection(collName).count()); + step5Failpoint.off(); + + // Allow the moveChunk to finish. + awaitResult(); + + // Recipient shard eventually cleans up the orphans. + // TODO (SERVER-45179): Add the FCV 4.4 behavior to the MigrationDestinationManager + // assert.soon(function() { + // return st.shard1.getDB(dbName).getCollection(collName).count() === 0; + //}); + assert.eq(numDocs, st.s.getDB(dbName).getCollection(collName).find().itcount()); + + // The durable state for coordinating the migration is eventually cleaned up. + assertEventuallyDoesNotHaveMigrationCoordinatorDoc(st.shard0); + // TODO (SERVER-44159): Delete document from config.rangeDeletions when CollectionRangeDeleter + // finishes deleting a range + // assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard0); + assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard1); + + migrationCommitVersionErrorFailpoint.off(); +})(); + +st.stop(); +})(); diff --git a/jstests/sharding/migration_waits_for_majority_commit.js b/jstests/sharding/migration_waits_for_majority_commit.js index b6ba286e7f9..6581a8da592 100644 --- a/jstests/sharding/migration_waits_for_majority_commit.js +++ b/jstests/sharding/migration_waits_for_majority_commit.js @@ -53,12 +53,32 @@ assert.commandWorked( unpauseMigrateAtStep(st.rs1.getPrimary(), 2); // The migration should fail to commit without being able to advance the majority commit point. -awaitMigration(); +if (jsTestOptions().mongosBinVersion == "last-stable") { + awaitMigration(); -assert.commandWorked( - destinationSec.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}), - "failed to enable fail point on secondary"); + assert.commandWorked( + destinationSec.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}), + "failed to enable fail point on secondary"); +} else { + // In FCV 4.4, check the migration coordinator document, because the moveChunk command itself + // will hang on trying to remove the recipient's range deletion entry with majority writeConcern + // until replication is re-enabled on the recipient. + assert.soon(() => { + return st.rs0.getPrimary().getDB("config").getCollection("migrationCoordinators").findOne({ + nss: "test.foo", + "range.min._id": 0, + "range.max._id": MaxKey, + decision: "aborted", + }) != null; + }); + + assert.commandWorked( + destinationSec.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}), + "failed to enable fail point on secondary"); + + awaitMigration(); +} st.stop(); MongoRunner.stopMongod(staticMongod); -})();
\ No newline at end of file +})(); |