summaryrefslogtreecommitdiff
path: root/jstests
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2020-01-02 19:46:37 +0000
committerA. Jesse Jiryu Davis <jesse@mongodb.com>2020-01-27 15:37:56 -0500
commitd764b3047694230666ebfe7b52adcd639273eb57 (patch)
tree1c531df833a8b08f8dc3666041bdd4f0f710178d /jstests
parente8d1a151ec5b195f701ac9b336c0a0afac7e8550 (diff)
downloadmongo-d764b3047694230666ebfe7b52adcd639273eb57.tar.gz
SERVER-44163 Plug MigrationCoordinator into MigrationSourceManager
Diffstat (limited to 'jstests')
-rw-r--r--jstests/sharding/convert_to_and_from_sharded.js5
-rw-r--r--jstests/sharding/merge_with_drop_shard.js5
-rw-r--r--jstests/sharding/migration_coordinator_basic.js203
-rw-r--r--jstests/sharding/migration_waits_for_majority_commit.js30
4 files changed, 237 insertions, 6 deletions
diff --git a/jstests/sharding/convert_to_and_from_sharded.js b/jstests/sharding/convert_to_and_from_sharded.js
index 8a347f752f0..d79cca15012 100644
--- a/jstests/sharding/convert_to_and_from_sharded.js
+++ b/jstests/sharding/convert_to_and_from_sharded.js
@@ -54,7 +54,10 @@ assert.commandWorked(priConn.getDB('test').sharded.insert({_id: 'marker'}));
checkBasicCRUD(priConn.getDB('test').sharded);
for (var x = 0; x < NUM_NODES; x++) {
- replShard.restart(x, {shardsvr: ''});
+ replShard.restart(x, {
+ shardsvr: '',
+ setParameter: {"failpoint.disableWritingPendingRangeDeletionEntries": "{mode: 'alwaysOn'}"}
+ });
}
replShard.awaitNodesAgreeOnPrimary();
diff --git a/jstests/sharding/merge_with_drop_shard.js b/jstests/sharding/merge_with_drop_shard.js
index 068c050c117..ecdd775d114 100644
--- a/jstests/sharding/merge_with_drop_shard.js
+++ b/jstests/sharding/merge_with_drop_shard.js
@@ -7,6 +7,11 @@ load("jstests/aggregation/extras/merge_helpers.js"); // For withEachMergeMode.
const st = new ShardingTest({shards: 2, rs: {nodes: 1}});
+assert.commandWorked(st.shard0.adminCommand(
+ {configureFailPoint: "disableWritingPendingRangeDeletionEntries", mode: "alwaysOn"}));
+assert.commandWorked(st.shard1.adminCommand(
+ {configureFailPoint: "disableWritingPendingRangeDeletionEntries", mode: "alwaysOn"}));
+
const mongosDB = st.s.getDB(jsTestName());
const sourceColl = mongosDB["source"];
const targetColl = mongosDB["target"];
diff --git a/jstests/sharding/migration_coordinator_basic.js b/jstests/sharding/migration_coordinator_basic.js
new file mode 100644
index 00000000000..c459e5aa163
--- /dev/null
+++ b/jstests/sharding/migration_coordinator_basic.js
@@ -0,0 +1,203 @@
+/**
+ * Tests that a donor shard durably records a migration's state, inserts pending entries into its
+ * own and the recipient's config.rangeDeletions, and informs itself and the recipient of the
+ * migration's outcome by updating or deleting its own and the recipient's config.rangeDeletions
+ * entries for the migration.
+ */
+
+(function() {
+'use strict';
+
+load("jstests/libs/fail_point_util.js");
+load('jstests/libs/parallel_shell_helpers.js');
+
+function getNewNs(dbName) {
+ if (typeof getNewNs.counter == 'undefined') {
+ getNewNs.counter = 0;
+ }
+ getNewNs.counter++;
+ const collName = "ns" + getNewNs.counter;
+ return [collName, dbName + "." + collName];
+}
+
+const dbName = "test";
+
+var st = new ShardingTest({shards: 2});
+
+assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
+assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName}));
+
+function getCollectionUuidAndEpoch(ns) {
+ const collectionDoc = st.s.getDB("config").getCollection("collections").findOne({_id: ns});
+ assert.neq(null, collectionDoc);
+ assert.neq(null, collectionDoc.uuid);
+ assert.neq(null, collectionDoc.lastmodEpoch);
+ return [collectionDoc.uuid, collectionDoc.lastmodEpoch];
+}
+
+function assertHasMigrationCoordinatorDoc({conn, ns, uuid, epoch}) {
+ const query = {
+ lsid: {$exists: 1},
+ txnNumber: {$exists: 1},
+ nss: ns,
+ collectionUuid: uuid,
+ donorShardId: st.shard0.shardName,
+ recipientShardId: st.shard1.shardName,
+ "range.min._id": MinKey,
+ "range.max._id": MaxKey,
+ "preMigrationChunkVersion.0": Timestamp(1, 0),
+ "preMigrationChunkVersion.1": epoch
+ };
+ assert.neq(
+ null,
+ conn.getDB("config").getCollection("migrationCoordinators").findOne(query),
+ "did not find document matching query " + tojson(query) +
+ ", contents of config.migrationCoordinators on " + conn + ": " +
+ tojson(conn.getDB("config").getCollection("migrationCoordinators").find().toArray()));
+}
+
+function assertEventuallyDoesNotHaveMigrationCoordinatorDoc(conn) {
+ assert.soon(() => {
+ return 0 == conn.getDB("config").getCollection("migrationCoordinators").find().itcount();
+ });
+}
+
+function assertHasRangeDeletionDoc({conn, pending, ns, uuid}) {
+ const query = {
+ nss: ns,
+ collectionUuid: uuid,
+ donorShardId: st.shard0.shardName,
+ "range.min._id": MinKey,
+ "range.max._id": MaxKey,
+ pending: (pending ? true : {$exists: false}),
+ whenToClean: "delayed"
+ };
+ assert.neq(null,
+ conn.getDB("config").getCollection("rangeDeletions").findOne(query),
+ "did not find document matching query " + tojson(query) +
+ ", contents of config.rangeDeletions on " + conn + ": " +
+ tojson(conn.getDB("config").getCollection("rangeDeletions").find().toArray()));
+}
+
+function assertEventuallyDoesNotHaveRangeDeletionDoc(conn) {
+ assert.soon(() => {
+ return 0 == conn.getDB("config").getCollection("rangeDeletions").find().itcount();
+ });
+}
+
+(() => {
+ const [collName, ns] = getNewNs(dbName);
+ jsTest.log("Test end-to-end migration when migration commit succeeds, ns is " + ns);
+
+ // Insert some docs into the collection.
+ const numDocs = 1000;
+ var bulk = st.s.getDB(dbName).getCollection(collName).initializeUnorderedBulkOp();
+ for (var i = 0; i < numDocs; i++) {
+ bulk.insert({_id: i});
+ }
+ assert.commandWorked(bulk.execute());
+
+ // Shard the collection.
+ assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
+ const [uuid, epoch] = getCollectionUuidAndEpoch(ns);
+
+ // Run the moveChunk asynchronously, pausing during cloning to allow the test to make
+ // assertions.
+ let step3Failpoint = configureFailPoint(st.shard0, "moveChunkHangAtStep3");
+ const awaitResult = startParallelShell(
+ funWithArgs(function(ns, toShardName) {
+ assert.commandWorked(db.adminCommand({moveChunk: ns, find: {_id: 0}, to: toShardName}));
+ }, ns, st.shard1.shardName), st.s.port);
+
+ // Assert that the durable state for coordinating the migration was written correctly.
+ step3Failpoint.wait();
+ assertHasMigrationCoordinatorDoc({conn: st.shard0, ns, uuid, epoch});
+ assertHasRangeDeletionDoc({conn: st.shard0, pending: true, whenToClean: "delayed", ns, uuid});
+ // TODO (SERVER-45179): Add the FCV 4.4 behavior to the MigrationDestinationManager
+ // assertHasRangeDeletionDoc({conn: st.shard1, pending: true, whenToClean: "now", ns, uuid});
+ step3Failpoint.off();
+
+ // Allow the moveChunk to finish.
+ awaitResult();
+
+ // Donor shard eventually cleans up the orphans.
+ assert.soon(function() {
+ return st.shard0.getDB(dbName).getCollection(collName).count() === 0;
+ });
+ assert.eq(numDocs, st.s.getDB(dbName).getCollection(collName).find().itcount());
+
+ // The durable state for coordinating the migration is eventually cleaned up.
+ assertEventuallyDoesNotHaveMigrationCoordinatorDoc(st.shard0);
+ // TODO (SERVER-44159): Delete document from config.rangeDeletions when CollectionRangeDeleter
+ // finishes deleting a range
+ // assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard0);
+ assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard1);
+})();
+
+(() => {
+ const [collName, ns] = getNewNs(dbName);
+ jsTest.log("Test end-to-end migration when migration commit fails, ns is " + ns);
+
+ // Insert some docs into the collection.
+ const numDocs = 1000;
+ var bulk = st.s.getDB(dbName).getCollection(collName).initializeUnorderedBulkOp();
+ for (var i = 0; i < numDocs; i++) {
+ bulk.insert({_id: i});
+ }
+ assert.commandWorked(bulk.execute());
+
+ // Shard the collection.
+ assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
+ const [uuid, epoch] = getCollectionUuidAndEpoch(ns);
+
+ // Turn on a failpoint to make the migration commit fail on the config server.
+ let migrationCommitVersionErrorFailpoint =
+ configureFailPoint(st.configRS.getPrimary(), "migrationCommitVersionError");
+
+ // Run the moveChunk asynchronously, pausing during cloning to allow the test to make
+ // assertions.
+ let step3Failpoint = configureFailPoint(st.shard0, "moveChunkHangAtStep3");
+ let step5Failpoint = configureFailPoint(st.shard0, "moveChunkHangAtStep5");
+ const awaitResult = startParallelShell(
+ funWithArgs(function(ns, toShardName) {
+ // Expect StaleEpoch because of the failpoint that will make the migration commit fail.
+ assert.commandFailedWithCode(
+ db.adminCommand({moveChunk: ns, find: {_id: 0}, to: toShardName}),
+ ErrorCodes.StaleEpoch);
+ }, ns, st.shard1.shardName), st.s.port);
+
+ // Assert that the durable state for coordinating the migration was written correctly.
+ step3Failpoint.wait();
+ assertHasMigrationCoordinatorDoc({conn: st.shard0, ns, uuid, epoch});
+ assertHasRangeDeletionDoc({conn: st.shard0, pending: true, whenToClean: "delayed", ns, uuid});
+ // TODO (SERVER-45179): Add the FCV 4.4 behavior to the MigrationDestinationManager
+ // assertHasRangeDeletionDoc({conn: st.shard1, pending: true, whenToClean: "now", ns, uuid});
+ step3Failpoint.off();
+
+ // Assert that the recipient has 'numDocs' orphans.
+ step5Failpoint.wait();
+ assert.eq(numDocs, st.shard1.getDB(dbName).getCollection(collName).count());
+ step5Failpoint.off();
+
+ // Allow the moveChunk to finish.
+ awaitResult();
+
+ // Recipient shard eventually cleans up the orphans.
+ // TODO (SERVER-45179): Add the FCV 4.4 behavior to the MigrationDestinationManager
+ // assert.soon(function() {
+ // return st.shard1.getDB(dbName).getCollection(collName).count() === 0;
+ //});
+ assert.eq(numDocs, st.s.getDB(dbName).getCollection(collName).find().itcount());
+
+ // The durable state for coordinating the migration is eventually cleaned up.
+ assertEventuallyDoesNotHaveMigrationCoordinatorDoc(st.shard0);
+ // TODO (SERVER-44159): Delete document from config.rangeDeletions when CollectionRangeDeleter
+ // finishes deleting a range
+ // assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard0);
+ assertEventuallyDoesNotHaveRangeDeletionDoc(st.shard1);
+
+ migrationCommitVersionErrorFailpoint.off();
+})();
+
+st.stop();
+})();
diff --git a/jstests/sharding/migration_waits_for_majority_commit.js b/jstests/sharding/migration_waits_for_majority_commit.js
index b6ba286e7f9..6581a8da592 100644
--- a/jstests/sharding/migration_waits_for_majority_commit.js
+++ b/jstests/sharding/migration_waits_for_majority_commit.js
@@ -53,12 +53,32 @@ assert.commandWorked(
unpauseMigrateAtStep(st.rs1.getPrimary(), 2);
// The migration should fail to commit without being able to advance the majority commit point.
-awaitMigration();
+if (jsTestOptions().mongosBinVersion == "last-stable") {
+ awaitMigration();
-assert.commandWorked(
- destinationSec.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}),
- "failed to enable fail point on secondary");
+ assert.commandWorked(
+ destinationSec.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}),
+ "failed to enable fail point on secondary");
+} else {
+ // In FCV 4.4, check the migration coordinator document, because the moveChunk command itself
+ // will hang on trying to remove the recipient's range deletion entry with majority writeConcern
+ // until replication is re-enabled on the recipient.
+ assert.soon(() => {
+ return st.rs0.getPrimary().getDB("config").getCollection("migrationCoordinators").findOne({
+ nss: "test.foo",
+ "range.min._id": 0,
+ "range.max._id": MaxKey,
+ decision: "aborted",
+ }) != null;
+ });
+
+ assert.commandWorked(
+ destinationSec.adminCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}),
+ "failed to enable fail point on secondary");
+
+ awaitMigration();
+}
st.stop();
MongoRunner.stopMongod(staticMongod);
-})(); \ No newline at end of file
+})();