summaryrefslogtreecommitdiff
path: root/jstests/replsets
diff options
context:
space:
mode:
authorChristopher Caplinger <christopher.caplinger@mongodb.com>2023-04-17 13:37:45 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-04-17 14:44:05 +0000
commitf8968230334cca0e504035188445697fcc8088cf (patch)
tree80eb2eab4753c88b67c7535616d6f8439ed83915 /jstests/replsets
parent408cbcd9802208741d5a4d96e2b52aedd97ce50f (diff)
downloadmongo-f8968230334cca0e504035188445697fcc8088cf.tar.gz
SERVER-75990: Tenant Migrations are not resilient to recipient failover
Diffstat (limited to 'jstests/replsets')
-rw-r--r--jstests/replsets/tenant_migration_cloner_stats_with_failover.js146
-rw-r--r--jstests/replsets/tenant_migration_cluster_time_keys_cloning.js101
-rw-r--r--jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js488
-rw-r--r--jstests/replsets/tenant_migration_external_keys_ttl.js228
-rw-r--r--jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js54
-rw-r--r--jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js199
-rw-r--r--jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js211
-rw-r--r--jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js110
-rw-r--r--jstests/replsets/tenant_migration_recipient_rollback_recovery.js335
-rw-r--r--jstests/replsets/tenant_migration_recipient_sync_donor_timestamp.js94
-rw-r--r--jstests/replsets/tenant_migration_recipient_sync_source_reconnect_delayed_secondary.js66
-rw-r--r--jstests/replsets/tenant_migration_recipient_sync_source_restart_donor_secondary.js67
-rw-r--r--jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js132
-rw-r--r--jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js133
-rw-r--r--jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js124
-rw-r--r--jstests/replsets/tenant_migration_resume_oplog_application.js119
-rw-r--r--jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js4
-rw-r--r--jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js3
18 files changed, 173 insertions, 2441 deletions
diff --git a/jstests/replsets/tenant_migration_cloner_stats_with_failover.js b/jstests/replsets/tenant_migration_cloner_stats_with_failover.js
deleted file mode 100644
index 77c98dea613..00000000000
--- a/jstests/replsets/tenant_migration_cloner_stats_with_failover.js
+++ /dev/null
@@ -1,146 +0,0 @@
-/**
- * Tests tenant migration cloner stats such as 'approxTotalDataSize', 'approxTotalBytesCopied',
- * 'databasesClonedBeforeFailover' across multiple databases and collections with failovers.
- *
- * This test does the following:
- * 1. Insert two databases on the donor. The first database consists of one collection, the second
- * consists of two collections.
- * 2. Wait for the primary (referred to as the original primary) to clone one batch from the second
- * database's second collection.
- * 3. Step up the new primary. Ensure that the stats such as 'databasesClonedBeforeFailover' tally.
- * 4. Allow the tenant migration to complete and commit. Ensure that stats are sensible.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * incompatible_with_shard_merge,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject().
-load("jstests/libs/fail_point_util.js"); // For configureFailPoint().
-
-// Limit the batch size to test the stat in between batches.
-const tenantMigrationTest = new TenantMigrationTest(
- {name: jsTestName(), sharedOptions: {setParameter: {collectionClonerBatchSize: 10}}});
-
-const kMigrationId = UUID();
-const kTenantId = ObjectId().str;
-const kReadPreference = {
- mode: "primary"
-};
-const migrationOpts = {
- migrationIdString: extractUUIDFromObject(kMigrationId),
- tenantId: kTenantId,
- readPreference: kReadPreference
-};
-
-const dbName = tenantMigrationTest.tenantDB(kTenantId, "testDB");
-const collName = "coll";
-const dbName1 = dbName + '_db_1';
-const dbName2 = dbName + '_db_2';
-const db2Coll1 = collName + "_db_2_1";
-const db2Coll2 = collName + "_db_2_2";
-
-// Add a large amount of data to the donor.
-jsTestLog("Adding data to donor.");
-const dataForEachCollection = [...Array(100).keys()].map((i) => ({a: i, b: 'metanoia'}));
-tenantMigrationTest.insertDonorDB(dbName1, collName + "_1", dataForEachCollection);
-tenantMigrationTest.insertDonorDB(dbName2, db2Coll1, dataForEachCollection);
-tenantMigrationTest.insertDonorDB(dbName2, db2Coll2, dataForEachCollection);
-
-const originalRecipientPrimary = tenantMigrationTest.getRecipientPrimary();
-const newRecipientPrimary = tenantMigrationTest.getRecipientRst().getSecondaries()[0];
-
-jsTestLog("Collecting the stats of the databases and collections from the donor.");
-const donorPrimary = tenantMigrationTest.getDonorPrimary();
-const donorDB2 = donorPrimary.getDB(dbName2);
-
-const db1Size = assert.commandWorked(donorPrimary.getDB(dbName1).runCommand({dbStats: 1})).dataSize;
-const db2Size = assert.commandWorked(donorDB2.runCommand({dbStats: 1})).dataSize;
-const db2Collection1Size = assert.commandWorked(donorDB2.runCommand({collStats: db2Coll1})).size;
-const db2Collection2Size = assert.commandWorked(donorDB2.runCommand({collStats: db2Coll2})).size;
-
-const donorStats = {
- db1Size,
- db2Size,
- db2Collection1Size,
- db2Collection2Size
-};
-jsTestLog("Collected the following stats on the donor: " + tojson(donorStats));
-
-// The last collection to be cloned is the one with a greater UUID.
-const collInfo = donorDB2.getCollectionInfos();
-const uuid1 = collInfo[0].info.uuid;
-const uuid2 = collInfo[1].info.uuid;
-const lastCollection = (uuid1 > uuid2) ? db2Coll1 : db2Coll2;
-
-// Create a failpoint to pause after one batch of the second database's second collection has been
-// cloned.
-const fpAfterBatchOfSecondDB = configureFailPoint(
- originalRecipientPrimary,
- "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
- {nss: originalRecipientPrimary.getDB(dbName2).getCollection(lastCollection).getFullName()});
-
-jsTestLog("Starting tenant migration with migrationId: " + kMigrationId +
- ", tenantId: " + kTenantId);
-assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
-
-let res = 0;
-let currOp = 0;
-jsTestLog("Waiting until one batch of second database has been cloned by original primary.");
-fpAfterBatchOfSecondDB.wait();
-// Since documents are inserted on a separate thread, wait until the expected stats are seen. The
-// failpoint needs to be maintained so that the next batch isn't processed.
-assert.soon(() => {
- res = originalRecipientPrimary.adminCommand(
- {currentOp: true, desc: "tenant recipient migration"});
- currOp = res.inprog[0];
-
- // Wait until one batch of documents of the second database's second collection has been copied.
- return currOp.approxTotalBytesCopied > db1Size + db2Collection1Size;
-}, res);
-
-assert.eq(currOp.approxTotalDataSize, db1Size + db2Size, res);
-// Since the two collections on the second database are the same size,
-// 'db1Size + db2Collection1Size' and 'db1Size + db2Collection2Size' evaluate to the same value.
-assert.gt(currOp.approxTotalBytesCopied, db1Size + db2Collection1Size, res);
-assert.lt(currOp.approxTotalBytesCopied, db1Size + db2Size, res);
-assert.eq(currOp.databases.databasesClonedBeforeFailover, 0, res);
-assert.eq(currOp.databases[dbName2].clonedCollectionsBeforeFailover, 0, res);
-const bytesCopiedIncludingSecondDB = currOp.approxTotalBytesCopied;
-jsTestLog("Bytes copied after first batch of second database: " + bytesCopiedIncludingSecondDB);
-
-// Wait until the batch of the second collection of the second database has been replicated from the
-// original primary to the new primary. Then, step up the new primary.
-const fpAfterCreatingCollectionOfSecondDB =
- configureFailPoint(newRecipientPrimary, "tenantCollectionClonerHangAfterCreateCollection");
-tenantMigrationTest.getRecipientRst().stepUp(newRecipientPrimary);
-fpAfterBatchOfSecondDB.off();
-
-jsTestLog("Wait until the new primary creates collection of second database.");
-fpAfterCreatingCollectionOfSecondDB.wait();
-res = newRecipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"});
-currOp = res.inprog[0];
-assert.eq(currOp.approxTotalDataSize, db1Size + db2Size, res);
-assert.eq(currOp.approxTotalBytesCopied, bytesCopiedIncludingSecondDB, res);
-assert.eq(currOp.databases.databasesClonedBeforeFailover, 1, res);
-assert.eq(currOp.databases[dbName2].clonedCollectionsBeforeFailover, 1, res);
-fpAfterCreatingCollectionOfSecondDB.off();
-
-// After the migration completes, the total bytes copied should be equal to the total data size.
-jsTestLog("Waiting for migration to complete.");
-TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-res = newRecipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"});
-currOp = res.inprog[0];
-assert.eq(currOp.approxTotalDataSize, db1Size + db2Size, res);
-assert.eq(currOp.approxTotalBytesCopied, db1Size + db2Size, res);
-assert.eq(currOp.databases.databasesClonedBeforeFailover, 1, res);
-assert.eq(currOp.databases[dbName2].clonedCollectionsBeforeFailover, 1, res);
-
-tenantMigrationTest.stop();
diff --git a/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js b/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js
index f3737dd4706..f3ab1df7e24 100644
--- a/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js
+++ b/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js
@@ -177,55 +177,58 @@ const migrationX509Options = makeX509OptionsForTest();
tenantMigrationTest.stop();
})();
-(() => {
- jsTest.log("Test that the donor and recipient correctly copy each other's cluster time keys " +
- "when there is recipient failover.");
- const recipientRst = new ReplSetTest({
- nodes: 3,
- name: "recipientRst",
- serverless: true,
- nodeOptions: migrationX509Options.recipient
- });
- recipientRst.startSet();
- recipientRst.initiate();
- if (isShardMergeEnabled(recipientRst.getPrimary().getDB("adminDB"))) {
- jsTestLog("Skip: featureFlagShardMerge enabled, but shard merge does not survive failover");
- recipientRst.stopSet();
- return;
- }
-
- const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), recipientRst});
-
- const recipientPrimary = recipientRst.getPrimary();
- const fp = configureFailPoint(recipientPrimary,
- "fpAfterPersistingTenantMigrationRecipientInstanceStateDoc",
- {action: "hang"});
-
- const migrationId = UUID();
- const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: kTenantId1,
- };
- assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
- fp.wait();
-
- assert.commandWorked(
- recipientPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
-
- fp.off();
- TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(
- migrationOpts, true /* retryOnRetryableErrors */));
-
- assertCopiedExternalKeys(tenantMigrationTest, migrationId);
-
- // After another migration, the first's keys should still exist.
- runMigrationAndAssertExternalKeysCopied(tenantMigrationTest, kTenantId2);
- assertCopiedExternalKeys(tenantMigrationTest, migrationId);
-
- recipientRst.stopSet();
- tenantMigrationTest.stop();
-})();
+// TODO SERVER-76128: Tenant Migrations are not robust to recipient failover.
+// (() => {
+// jsTest.log("Test that the donor and recipient correctly copy each other's cluster time keys "
+// +
+// "when there is recipient failover.");
+// const recipientRst = new ReplSetTest({
+// nodes: 3,
+// name: "recipientRst",
+// serverless: true,
+// nodeOptions: migrationX509Options.recipient
+// });
+// recipientRst.startSet();
+// recipientRst.initiate();
+// if (isShardMergeEnabled(recipientRst.getPrimary().getDB("adminDB"))) {
+// jsTestLog("Skip: featureFlagShardMerge enabled, but shard merge does not survive
+// failover");
+// recipientRst.stopSet();
+// return;
+// }
+
+// const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), recipientRst});
+
+// const recipientPrimary = recipientRst.getPrimary();
+// const fp = configureFailPoint(recipientPrimary,
+// "fpAfterPersistingTenantMigrationRecipientInstanceStateDoc",
+// {action: "hang"});
+
+// const migrationId = UUID();
+// const migrationOpts = {
+// migrationIdString: extractUUIDFromObject(migrationId),
+// tenantId: kTenantId1,
+// };
+// assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
+// fp.wait();
+
+// assert.commandWorked(
+// recipientPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
+// assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
+
+// fp.off();
+// TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(
+// migrationOpts, true /* retryOnRetryableErrors */));
+
+// assertCopiedExternalKeys(tenantMigrationTest, migrationId);
+
+// // After another migration, the first's keys should still exist.
+// runMigrationAndAssertExternalKeysCopied(tenantMigrationTest, kTenantId2);
+// assertCopiedExternalKeys(tenantMigrationTest, migrationId);
+
+// recipientRst.stopSet();
+// tenantMigrationTest.stop();
+// })();
(() => {
jsTest.log("Test that the donor waits for copied external keys to replicate to every node");
diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
deleted file mode 100644
index b3b158c7134..00000000000
--- a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
+++ /dev/null
@@ -1,488 +0,0 @@
-/**
- * Tests that tenant migrations resume successfully on donor stepup and restart.
- *
- * Incompatible with shard merge, which can't handle restart.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * # Some tenant migration statistics field names were changed in 6.1.
- * requires_fcv_61,
- * requires_majority_read_concern,
- * requires_persistence,
- * # Tenant migrations are only used in serverless.
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {
- forgetMigrationAsync,
- isShardMergeEnabled,
- makeX509OptionsForTest,
- runMigrationAsync,
- tryAbortMigrationAsync
-} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/parallelTester.js");
-load("jstests/libs/uuid_util.js");
-load("jstests/replsets/rslib.js"); // 'createRstArgs'
-
-const kMaxSleepTimeMS = 100;
-const kTenantId = ObjectId().str;
-const kMigrationFpNames = [
- "pauseTenantMigrationBeforeLeavingDataSyncState",
- "pauseTenantMigrationBeforeLeavingBlockingState",
- "abortTenantMigrationBeforeLeavingBlockingState",
- ""
-];
-
-// Set the delay before a state doc is garbage collected to be short to speed up the test but long
-// enough for the state doc to still be around after stepup or restart.
-const kGarbageCollectionDelayMS = 30 * 1000;
-
-// Set the TTL monitor to run at a smaller interval to speed up the test.
-const kTTLMonitorSleepSecs = 1;
-
-const migrationX509Options = makeX509OptionsForTest();
-
-/**
- * Runs the donorStartMigration command to start a migration, and interrupts the migration on the
- * donor using the 'interruptFunc', and asserts that migration eventually commits.
- */
-function testDonorStartMigrationInterrupt(interruptFunc,
- {donorRestarted = false, disableForShardMerge = true}) {
- const donorRst = new ReplSetTest(
- {nodes: 3, name: "donorRst", serverless: true, nodeOptions: migrationX509Options.donor});
-
- donorRst.startSet();
- donorRst.initiate();
-
- const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst});
-
- let donorPrimary = tenantMigrationTest.getDonorPrimary();
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-
- if (disableForShardMerge && isShardMergeEnabled(recipientPrimary.getDB("admin"))) {
- jsTest.log("Skipping test for shard merge");
- tenantMigrationTest.stop();
- donorRst.stopSet();
- return;
- }
-
- const migrationId = UUID();
- const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: kTenantId,
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- };
- const donorRstArgs = createRstArgs(donorRst);
-
- const runMigrationThread =
- new Thread(runMigrationAsync, migrationOpts, donorRstArgs, {retryOnRetryableErrors: true});
- runMigrationThread.start();
-
- // Wait for donorStartMigration command to start.
- assert.soon(() => donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})
- .inprog.length > 0);
-
- sleep(Math.random() * kMaxSleepTimeMS);
- interruptFunc(donorRst);
-
- TenantMigrationTest.assertCommitted(runMigrationThread.returnData());
- tenantMigrationTest.waitForDonorNodesToReachState(donorRst.nodes,
- migrationId,
- migrationOpts.tenantId,
- TenantMigrationTest.DonorState.kCommitted);
- assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
-
- donorPrimary = tenantMigrationTest.getDonorPrimary(); // Could change after interrupt.
- const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary);
- jsTestLog(`Stats at the donor primary: ${tojson(donorStats)}`);
- if (donorRestarted) {
- // If full restart happened the count could be lost completely.
- assert.gte(1, donorStats.totalMigrationDonationsCommitted);
- } else {
- // The double counting happens when the failover happens after migration completes
- // but before the state doc GC mark is persisted. While this test is targeting this
- // scenario it is low probability in production.
- assert(1 == donorStats.totalMigrationDonationsCommitted ||
- 2 == donorStats.totalMigrationDonationsCommitted);
- }
- // Skip checking the stats on the recipient since enableRecipientTesting is false
- // so the recipient is forced to respond to recipientSyncData without starting the
- // migration.
-
- tenantMigrationTest.stop();
- donorRst.stopSet();
-}
-
-/**
- * Starts a migration and waits for it to commit, then runs the donorForgetMigration, and interrupts
- * the donor using the 'interruptFunc', and asserts that the migration state is eventually garbage
- * collected.
- */
-function testDonorForgetMigrationInterrupt(interruptFunc) {
- const donorRst = new ReplSetTest({
- nodes: 3,
- name: "donorRst",
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.donor, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
- }
- })
- });
- const recipientRst = new ReplSetTest({
- nodes: 1,
- name: "recipientRst",
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.recipient, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
- }
- })
- });
-
- donorRst.startSet();
- donorRst.initiate();
-
- recipientRst.startSet();
- recipientRst.initiate();
-
- const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst});
-
- const donorPrimary = tenantMigrationTest.getDonorPrimary();
-
- const migrationId = UUID();
- const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: kTenantId,
- recipientConnString: recipientRst.getURL(),
- };
- const donorRstArgs = createRstArgs(donorRst);
-
- TenantMigrationTest.assertCommitted(
- tenantMigrationTest.runMigration(migrationOpts, {automaticForgetMigration: false}));
- const forgetMigrationThread = new Thread(forgetMigrationAsync,
- migrationOpts.migrationIdString,
- donorRstArgs,
- true /* retryOnRetryableErrors */);
- forgetMigrationThread.start();
-
- // Wait for donorForgetMigration command to start.
- assert.soon(() => {
- const res = assert.commandWorked(
- donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"}));
- return res.inprog[0].expireAt != null;
- });
- sleep(Math.random() * kMaxSleepTimeMS);
- interruptFunc(donorRst);
-
- assert.commandWorkedOrFailedWithCode(
- tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString),
- ErrorCodes.NoSuchTenantMigration);
-
- assert.commandWorked(forgetMigrationThread.returnData());
- tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, migrationOpts.tenantId);
-
- tenantMigrationTest.stop();
- donorRst.stopSet();
- recipientRst.stopSet();
-}
-
-/**
- * Starts a migration and sets the passed in failpoint, then runs the donorAbortMigration, and
- * interrupts the donor using the 'interruptFunc', and asserts that the migration state is
- * eventually garbage collected.
- */
-function testDonorAbortMigrationInterrupt(
- interruptFunc, fpName, {fpWaitBeforeAbort = false, isShutdown = false} = {}) {
- const donorRst = new ReplSetTest({
- nodes: 3,
- name: "donorRst",
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.donor, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
- }
- })
- });
- const recipientRst = new ReplSetTest({
- nodes: 1,
- name: "recipientRst",
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.recipient, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
- }
- })
- });
-
- donorRst.startSet();
- donorRst.initiate();
-
- recipientRst.startSet();
- recipientRst.initiate();
-
- const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst});
-
- const migrationId = UUID();
- const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: kTenantId,
- recipientConnString: recipientRst.getURL(),
- };
- const donorRstArgs = createRstArgs(donorRst);
- let donorPrimary = tenantMigrationTest.getDonorPrimary();
-
- // If we passed in a valid failpoint we set it, otherwise we let the migration run normally.
- let fp;
- if (fpName) {
- fp = configureFailPoint(donorPrimary, fpName);
- }
-
- assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
-
- if (fp && !isShutdown && fpWaitBeforeAbort) {
- fp.wait();
- }
-
- const tryAbortThread = new Thread(tryAbortMigrationAsync,
- {migrationIdString: migrationOpts.migrationIdString},
- donorRstArgs,
- true /* retryOnRetryableErrors */);
- tryAbortThread.start();
-
- // Wait for donorAbortMigration command to start.
- assert.soon(() => {
- const res = assert.commandWorked(
- donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"}));
- return res.inprog[0].receivedCancellation;
- });
-
- interruptFunc(donorRst);
-
- if (fp && !isShutdown) {
- // Turn off failpoint in order to allow the migration to resume after stepup.
- fp.off();
- }
-
- tryAbortThread.join();
-
- let res = tryAbortThread.returnData();
- assert.commandWorkedOrFailedWithCode(res, ErrorCodes.TenantMigrationCommitted);
-
- donorPrimary = tenantMigrationTest.getDonorPrimary();
- let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS);
- let donorDoc = configDonorsColl.findOne({tenantId: kTenantId});
-
- if (!res.ok) {
- assert.eq(donorDoc.state, TenantMigrationTest.DonorState.kCommitted);
- } else {
- assert.eq(donorDoc.state, TenantMigrationTest.DonorState.kAborted);
- }
-
- tenantMigrationTest.stop();
- donorRst.stopSet();
- recipientRst.stopSet();
-}
-
-/**
- * Starts a migration and sets the passed in failpoint, then either waits for the failpoint or lets
- * the migration run successfully and interrupts the donor using the 'interruptFunc'. After
- * restarting, check the to see if the donorDoc data has persisted.
- */
-function testStateDocPersistenceOnFailover(interruptFunc, fpName, isShutdown = false) {
- const donorRst = new ReplSetTest(
- {nodes: 3, name: "donorRst", serverless: true, nodeOptions: migrationX509Options.donor});
-
- donorRst.startSet();
- donorRst.initiate();
-
- const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst});
-
- const migrationId = UUID();
- const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: kTenantId,
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- };
- let donorPrimary = tenantMigrationTest.getDonorPrimary();
-
- // If we passed in a valid failpoint we set it, otherwise we let the migration run normally.
- let fp;
- if (fpName) {
- fp = configureFailPoint(donorPrimary, fpName);
- assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
- fp.wait();
- } else {
- TenantMigrationTest.assertCommitted(tenantMigrationTest.runMigration(migrationOpts));
- }
-
- let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS);
- let donorDocBeforeFailover = configDonorsColl.findOne({tenantId: kTenantId});
-
- interruptFunc(tenantMigrationTest.getDonorRst());
-
- if (fp && !isShutdown) {
- // Turn off failpoint in order to allow the migration to resume after stepup.
- fp.off();
- }
-
- donorPrimary = tenantMigrationTest.getDonorPrimary();
- configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS);
- let donorDocAfterFailover = configDonorsColl.findOne({tenantId: kTenantId});
-
- // Check persisted fields in the donor doc.
- assert.eq(donorDocBeforeFailover._id, donorDocAfterFailover._id);
- assert.eq(donorDocBeforeFailover.recipientConnString,
- donorDocAfterFailover.recipientConnString);
- assert.eq(donorDocBeforeFailover.readPreference, donorDocAfterFailover.readPreference);
- assert.eq(donorDocBeforeFailover.startMigrationDonorTimestamp,
- donorDocAfterFailover.startMigrationDonorTimestamp);
- assert.eq(donorDocBeforeFailover.migration, donorDocAfterFailover.migration);
- assert.eq(donorDocBeforeFailover.tenantId, donorDocAfterFailover.tenantId);
- assert.eq(donorDocBeforeFailover.donorCertificateForRecipient,
- donorDocAfterFailover.donorCertificateForRecipient);
- assert.eq(donorDocBeforeFailover.recipientCertificateForDonor,
- donorDocAfterFailover.recipientCertificateForDonor);
- assert.eq(donorDocBeforeFailover.migrationStart, donorDocAfterFailover.migrationStart);
-
- tenantMigrationTest.stop();
- donorRst.stopSet();
-}
-
-(() => {
- jsTest.log("Test that the migration resumes on stepup");
- testDonorStartMigrationInterrupt((donorRst) => {
- // Force the primary to step down but make it likely to step back up.
- const donorPrimary = donorRst.getPrimary();
- assert.commandWorked(
- donorPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0}));
- }, {donorRestarted: false});
-})();
-
-(() => {
- jsTest.log("Test that the migration resumes after restart");
- testDonorStartMigrationInterrupt((donorRst) => {
- // Skip validation on shutdown because the full validation can conflict with the tenant
- // migration and cause it to fail.
- donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true});
- donorRst.startSet({restart: true});
- }, {donorRestarted: true, disableForShardMerge: true});
-})();
-
-(() => {
- jsTest.log("Test that the donorForgetMigration command can be retried on stepup");
- testDonorForgetMigrationInterrupt((donorRst) => {
- // Force the primary to step down but make it likely to step back up.
- const donorPrimary = donorRst.getPrimary();
- assert.commandWorked(
- donorPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0}));
- });
-})();
-
-(() => {
- jsTest.log("Test that the donorForgetMigration command can be retried after restart");
- testDonorForgetMigrationInterrupt((donorRst) => {
- // Skip validation on shutdown because the full validation can conflict with the tenant
- // migration and cause it to fail.
- donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true});
- donorRst.startSet({restart: true});
- });
-})();
-
-(() => {
- jsTest.log("Test that the donorAbortMigration command can be retried after restart");
-
- kMigrationFpNames.forEach(fpName => {
- if (!fpName) {
- jsTest.log("Testing without setting a failpoint.");
- } else {
- jsTest.log("Testing with failpoint: " + fpName);
- }
-
- testDonorAbortMigrationInterrupt((donorRst) => {
- // Skip validation on shutdown because the full validation can conflict with the tenant
- // migration and cause it to fail.
- donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true});
- donorRst.startSet({restart: true});
- }, fpName, {isShutdown: true});
- });
-})();
-
-(() => {
- jsTest.log(
- "Test that the donorAbortMigration command fails if issued after state == kCommitted");
-
- testDonorAbortMigrationInterrupt((donorRst) => {},
- "pauseTenantMigrationAfterUpdatingToCommittedState",
- {fpWaitBeforeAbort: true});
-})();
-
-(() => {
- jsTest.log("Test that the donorAbortMigration command can be retried on stepup");
- kMigrationFpNames.forEach(fpName => {
- if (!fpName) {
- jsTest.log("Testing without setting a failpoint.");
- } else {
- jsTest.log("Testing with failpoint: " + fpName);
- }
-
- testDonorAbortMigrationInterrupt((donorRst) => {
- // Force the primary to step down but make it likely to step back up.
- const donorPrimary = donorRst.getPrimary();
- assert.commandWorked(donorPrimary.adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0}));
- }, fpName);
- });
-})();
-
-(() => {
- jsTest.log("Test stateDoc data persistence on restart.");
- kMigrationFpNames.forEach(fpName => {
- if (!fpName) {
- jsTest.log("Testing without setting a failpoint.");
- } else {
- jsTest.log("Testing with failpoint: " + fpName);
- }
-
- testStateDocPersistenceOnFailover((donorRst) => {
- // Skip validation on shutdown because the full validation can conflict with the tenant
- // migration and cause it to fail.
- donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true});
- donorRst.startSet({restart: true});
- }, fpName, true);
- });
-})();
-
-(() => {
- jsTest.log("Test stateDoc data persistence on stepup.");
- kMigrationFpNames.forEach(fpName => {
- if (!fpName) {
- jsTest.log("Testing without setting a failpoint.");
- } else {
- jsTest.log("Testing with failpoint: " + fpName);
- }
-
- testStateDocPersistenceOnFailover((donorRst) => {
- // Force the primary to step down but make it likely to step back up.
- const donorPrimary = donorRst.getPrimary();
- assert.commandWorked(donorPrimary.adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0}));
- }, fpName);
- });
-})();
diff --git a/jstests/replsets/tenant_migration_external_keys_ttl.js b/jstests/replsets/tenant_migration_external_keys_ttl.js
index 28611f9abaf..172600f1d2a 100644
--- a/jstests/replsets/tenant_migration_external_keys_ttl.js
+++ b/jstests/replsets/tenant_migration_external_keys_ttl.js
@@ -286,38 +286,39 @@ function makeTestParams() {
teardown();
}
- jsTestLog("Recipient failover before receiving forgetMigration");
- {
- const {tmt, teardown} = setup();
- const [tenantId, migrationId, migrationOpts] = makeTestParams();
- const recipientPrimary = tmt.getRecipientPrimary();
- const fp = configureFailPoint(recipientPrimary,
- "fpAfterConnectingTenantMigrationRecipientInstance",
- {action: "hang"});
-
- assert.commandWorked(tmt.startMigration(migrationOpts));
- fp.wait();
-
- assert.commandWorked(recipientPrimary.adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
- fp.off();
-
- TenantMigrationTest.assertCommitted(
- tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */));
-
- // The keys should have been created without a TTL deadline.
- verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false});
- verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false});
-
- assert.commandWorked(tmt.forgetMigration(migrationOpts.migrationIdString));
-
- // After running donorForgetMigration, the TTL value should be updated. The default TTL
- // buffer is 1 day so the keys will not have been deleted.
- verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true});
- verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true});
- teardown();
- }
+ // TODO SERVER-76128: Tenant Migrations are not robust to recipient failover.
+ // jsTestLog("Recipient failover before receiving forgetMigration");
+ // {
+ // const {tmt, teardown} = setup();
+ // const [tenantId, migrationId, migrationOpts] = makeTestParams();
+ // const recipientPrimary = tmt.getRecipientPrimary();
+ // const fp = configureFailPoint(recipientPrimary,
+ // "fpAfterConnectingTenantMigrationRecipientInstance",
+ // {action: "hang"});
+
+ // assert.commandWorked(tmt.startMigration(migrationOpts));
+ // fp.wait();
+
+ // assert.commandWorked(recipientPrimary.adminCommand(
+ // {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
+ // assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
+ // fp.off();
+
+ // TenantMigrationTest.assertCommitted(
+ // tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */));
+
+ // // The keys should have been created without a TTL deadline.
+ // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false});
+ // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false});
+
+ // assert.commandWorked(tmt.forgetMigration(migrationOpts.migrationIdString));
+
+ // // After running donorForgetMigration, the TTL value should be updated. The default TTL
+ // // buffer is 1 day so the keys will not have been deleted.
+ // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true});
+ // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true});
+ // teardown();
+ // }
jsTestLog(
"Donor failover after receiving forgetMigration before marking keys garbage collectable");
@@ -355,39 +356,42 @@ function makeTestParams() {
teardown();
}
- jsTestLog(
- "Recipient failover after receiving forgetMigration before marking keys garbage collectable");
- {
- const {tmt, donorRst, teardown} = setup();
- const [tenantId, migrationId, migrationOpts] = makeTestParams();
- const recipientPrimary = tmt.getRecipientPrimary();
-
- assert.commandWorked(tmt.startMigration(migrationOpts));
- TenantMigrationTest.assertCommitted(
- tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */));
-
- // The keys should have been created without a TTL deadline.
- verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false});
- verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false});
-
- const fp = configureFailPoint(
- recipientPrimary, "pauseTenantMigrationBeforeMarkingExternalKeysGarbageCollectable");
- const forgetMigrationThread = new Thread(
- forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst), true);
- forgetMigrationThread.start();
- fp.wait();
-
- assert.commandWorked(recipientPrimary.adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
- fp.off();
-
- assert.commandWorked(forgetMigrationThread.returnData());
-
- verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true});
- verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true});
- teardown();
- }
+ // TODO SERVER-76128: Tenant Migrations are not robust to recipient failover.
+ // jsTestLog(
+ // "Recipient failover after receiving forgetMigration before marking keys garbage
+ // collectable");
+ // {
+ // const {tmt, donorRst, teardown} = setup();
+ // const [tenantId, migrationId, migrationOpts] = makeTestParams();
+ // const recipientPrimary = tmt.getRecipientPrimary();
+
+ // assert.commandWorked(tmt.startMigration(migrationOpts));
+ // TenantMigrationTest.assertCommitted(
+ // tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */));
+
+ // // The keys should have been created without a TTL deadline.
+ // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false});
+ // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false});
+
+ // const fp = configureFailPoint(
+ // recipientPrimary, "pauseTenantMigrationBeforeMarkingExternalKeysGarbageCollectable");
+ // const forgetMigrationThread = new Thread(
+ // forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst),
+ // true);
+ // forgetMigrationThread.start();
+ // fp.wait();
+
+ // assert.commandWorked(recipientPrimary.adminCommand(
+ // {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
+ // assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
+ // fp.off();
+
+ // assert.commandWorked(forgetMigrationThread.returnData());
+
+ // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true});
+ // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true});
+ // teardown();
+ // }
jsTestLog("Donor failover after receiving forgetMigration after updating keys.");
{
@@ -433,49 +437,53 @@ function makeTestParams() {
teardown();
}
- jsTestLog("Recipient failover after receiving forgetMigration after updating keys.");
- {
- const {tmt, donorRst, recipientRst, teardown} = setup();
- // this test expects the external keys to expire, so lower the expiration timeouts.
- const lowerExternalKeysBufferSecs = 5;
- const lowerStateDocExpirationMS = 500;
- for (let conn of [...donorRst.nodes, ...recipientRst.nodes]) {
- setTenantMigrationExpirationParams(
- conn, lowerStateDocExpirationMS, lowerExternalKeysBufferSecs);
- }
- const [tenantId, migrationId, migrationOpts] = makeTestParams();
- const recipientPrimary = tmt.getRecipientPrimary();
-
- assert.commandWorked(tmt.startMigration(migrationOpts));
- TenantMigrationTest.assertCommitted(
- tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */));
-
- // The keys should have been created without a TTL deadline.
- verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false});
- verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false});
-
- const fp = configureFailPoint(
- recipientPrimary, "fpAfterReceivingRecipientForgetMigration", {action: "hang"});
- const forgetMigrationThread = new Thread(
- forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst), true);
- forgetMigrationThread.start();
- fp.wait();
-
- // Let the keys expire on the donor before the state document is deleted to verify retrying
- // recipientForgetMigration can handle this case. The keys won't be deleted until the buffer
- // expires, so sleep to avoid wasted work.
- sleep((lowerExternalKeysBufferSecs * 1000) + lowerStateDocExpirationMS + 500);
- waitForExternalKeysToBeDeleted(tmt.getRecipientPrimary(), migrationId);
-
- assert.commandWorked(recipientPrimary.adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
- fp.off();
-
- assert.commandWorked(forgetMigrationThread.returnData());
-
- // Eventually the donor's keys should be deleted too.
- waitForExternalKeysToBeDeleted(tmt.getDonorPrimary(), migrationId);
- teardown();
- }
+ // TODO SERVER-76128: Tenant Migrations are not robust to recipient failover.
+ // jsTestLog("Recipient failover after receiving forgetMigration after updating keys.");
+ // {
+ // const {tmt, donorRst, recipientRst, teardown} = setup();
+ // // this test expects the external keys to expire, so lower the expiration timeouts.
+ // const lowerExternalKeysBufferSecs = 5;
+ // const lowerStateDocExpirationMS = 500;
+ // for (let conn of [...donorRst.nodes, ...recipientRst.nodes]) {
+ // setTenantMigrationExpirationParams(
+ // conn, lowerStateDocExpirationMS, lowerExternalKeysBufferSecs);
+ // }
+ // const [tenantId, migrationId, migrationOpts] = makeTestParams();
+ // const recipientPrimary = tmt.getRecipientPrimary();
+
+ // assert.commandWorked(tmt.startMigration(migrationOpts));
+ // TenantMigrationTest.assertCommitted(
+ // tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */));
+
+ // // The keys should have been created without a TTL deadline.
+ // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false});
+ // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false});
+
+ // const fp = configureFailPoint(
+ // recipientPrimary, "fpAfterReceivingRecipientForgetMigration", {action: "hang"});
+ // const forgetMigrationThread = new Thread(
+ // forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst),
+ // true);
+ // forgetMigrationThread.start();
+ // fp.wait();
+
+ // // Let the keys expire on the donor before the state document is deleted to verify
+ // retrying
+ // // recipientForgetMigration can handle this case. The keys won't be deleted until the
+ // buffer
+ // // expires, so sleep to avoid wasted work.
+ // sleep((lowerExternalKeysBufferSecs * 1000) + lowerStateDocExpirationMS + 500);
+ // waitForExternalKeysToBeDeleted(tmt.getRecipientPrimary(), migrationId);
+
+ // assert.commandWorked(recipientPrimary.adminCommand(
+ // {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
+ // assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
+ // fp.off();
+
+ // assert.commandWorked(forgetMigrationThread.returnData());
+
+ // // Eventually the donor's keys should be deleted too.
+ // waitForExternalKeysToBeDeleted(tmt.getDonorPrimary(), migrationId);
+ // teardown();
+ // }
})();
diff --git a/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js b/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js
deleted file mode 100644
index 4cd9ae5a415..00000000000
--- a/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Tests whether the recipient returns an appropriate error code to the donor when the recipient
- * primary is made to step down before creating the oplog buffer collection.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_persistence,
- * requires_replication,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject().
-load("jstests/libs/fail_point_util.js"); // For configureFailPoint().
-
-const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), sharedOptions: {nodes: 2}});
-
-const kMigrationId = UUID();
-const kTenantId = ObjectId().str;
-const kReadPreference = {
- mode: "primary"
-};
-const migrationOpts = {
- migrationIdString: extractUUIDFromObject(kMigrationId),
- tenantId: kTenantId,
- readPreference: kReadPreference
-};
-
-const fpBeforeCreatingOplogBuffer =
- configureFailPoint(tenantMigrationTest.getRecipientPrimary(),
- "fpAfterRetrievingStartOpTimesMigrationRecipientInstance",
- {action: "hang"});
-
-jsTestLog("Starting tenant migration with migrationId: " + kMigrationId +
- ", tenantId: " + kTenantId);
-assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
-
-jsTestLog("Waiting until the recipient primary is about to create an oplog buffer collection.");
-fpBeforeCreatingOplogBuffer.wait();
-
-jsTestLog("Stepping a new primary up.");
-tenantMigrationTest.getRecipientRst().stepUp(
- tenantMigrationTest.getRecipientRst().getSecondaries()[0]);
-
-fpBeforeCreatingOplogBuffer.off();
-
-jsTestLog("Waiting for migration to complete.");
-TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-
-tenantMigrationTest.stop();
diff --git a/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js b/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js
deleted file mode 100644
index b0c58169b01..00000000000
--- a/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js
+++ /dev/null
@@ -1,199 +0,0 @@
-/**
- * Tests that during tenant migration, a new recipient node's state document and in-memory state is
- * initialized after initial sync, when 1) the node hasn't begun cloning data yet, 2) is cloning
- * data, and 3) is in the tenant oplog application phase.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * incompatible_with_shard_merge,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {makeX509OptionsForTest} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js");
-load('jstests/replsets/rslib.js'); // for waitForNewlyAddedRemovalForNodeToBeCommitted
-
-const migrationX509Options = makeX509OptionsForTest();
-
-const testDBName = 'testDB';
-const testCollName = 'testColl';
-
-// Restarts a node, allows the node to go through initial sync, and then makes sure its state
-// matches up with the primary's. Returns the initial sync node.
-function restartNodeAndCheckState(tenantId, tenantMigrationTest, checkMtab) {
- // Restart a node and allow it to complete initial sync.
- const recipientRst = tenantMigrationTest.getRecipientRst();
- const originalRecipientPrimary = recipientRst.getPrimary();
-
- jsTestLog("Restarting a node from the recipient replica set.");
- let initialSyncNode = recipientRst.getSecondaries()[0];
- initialSyncNode =
- recipientRst.restart(initialSyncNode, {startClean: true, skipValidation: true});
-
- // Allow the new node to finish initial sync.
- waitForNewlyAddedRemovalForNodeToBeCommitted(originalRecipientPrimary,
- recipientRst.getNodeId(initialSyncNode));
- recipientRst.awaitSecondaryNodes();
- recipientRst.awaitReplication();
-
- jsTestLog("Ensure that the new node's state matches up with the primary's.");
- // Make sure the new node's state makes sense.
- let recipientDocOnPrimary = undefined;
- let recipientDocOnNewNode = undefined;
- assert.soon(
- () => {
- recipientDocOnPrimary =
- originalRecipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS)
- .findOne({tenantId});
- recipientDocOnNewNode =
- initialSyncNode.getCollection(TenantMigrationTest.kConfigRecipientsNS)
- .findOne({tenantId});
-
- return recipientDocOnPrimary.state == recipientDocOnNewNode.state;
- },
- `States never matched, primary: ${recipientDocOnPrimary}, on new node: ${
- recipientDocOnNewNode}`);
-
- if (checkMtab) {
- jsTestLog("Ensuring TenantMigrationAccessBlocker states match.");
- const primaryMtab = tenantMigrationTest.getTenantMigrationAccessBlocker(
- {recipientNode: originalRecipientPrimary, tenantId});
- const newNodeMtab = tenantMigrationTest.getTenantMigrationAccessBlocker(
- {recipientNode: initialSyncNode, tenantId});
-
- assert.eq(primaryMtab.recipient.state,
- newNodeMtab.recipient.state,
- `Mtab didn't match, primary: ${primaryMtab}, on new node: ${newNodeMtab}`);
- }
-
- return initialSyncNode;
-}
-
-// Restarts a node without tenant oplog application. Ensures its state matches up with the
-// primary's, and then steps it up.
-function restartNodeAndCheckStateWithoutOplogApplication(
- tenantId, tenantMigrationTest, checkMtab, fpOnRecipient) {
- fpOnRecipient.wait();
-
- const initialSyncNode = restartNodeAndCheckState(tenantId, tenantMigrationTest, checkMtab);
-
- jsTestLog("Stepping up the new node.");
- // Now step up the new node
- tenantMigrationTest.getRecipientRst().stepUp(initialSyncNode);
- fpOnRecipient.off();
-}
-
-// Pauses the recipient before the tenant oplog application phase, and inserts documents on the
-// donor that the recipient tenant oplog applier must apply. Then restarts node, allows initial
-// sync, and steps the restarted node up.
-function restartNodeAndCheckStateDuringOplogApplication(
- tenantId, tenantMigrationTest, checkMtab, fpOnRecipient) {
- fpOnRecipient.wait();
-
- // Pause the tenant oplog applier before applying a batch.
- const originalRecipientPrimary = tenantMigrationTest.getRecipientPrimary();
- const fpPauseOplogApplierOnBatch =
- configureFailPoint(originalRecipientPrimary, "fpBeforeTenantOplogApplyingBatch");
-
- // Insert documents into the donor after data cloning but before tenant oplog application, so
- // that the recipient has entries to apply during tenant oplog application.
- tenantMigrationTest.insertDonorDB(
- tenantMigrationTest.tenantDB(tenantId, testDBName),
- testCollName,
- [...Array(30).keys()].map((i) => ({a: i, b: "George Harrison - All Things Must Pass"})));
-
- // Wait until the oplog applier has started and is trying to apply a batch. Then restart a node.
- fpPauseOplogApplierOnBatch.wait();
- const initialSyncNode = restartNodeAndCheckState(tenantId, tenantMigrationTest, checkMtab);
-
- jsTestLog("Stepping up the new node.");
- // Now step up the new node
- tenantMigrationTest.getRecipientRst().stepUp(initialSyncNode);
- fpPauseOplogApplierOnBatch.off();
- fpOnRecipient.off();
-}
-
-// This function does the following:
-// 1. Configures a failpoint on the recipient primary, depending on the 'recipientFailpoint' that is
-// passed into the function.
-// 2. Starts a tenant migration.
-// 3. Waits for the recipient failpoint to be hit. Restarts a node, to make it go through initial
-// sync.
-// 4. Makes sure the restarted node's state is as expected.
-// 5. Steps up the restarted node as the recipient primary, lifts the recipient failpoint, and
-// allows the migration to complete.
-function runTestCase(recipientFailpoint, checkMtab, restartNodeAndCheckStateFunction) {
- const tenantId = ObjectId().str;
- const donorRst = new ReplSetTest({
- name: "donorRst",
- nodes: 1,
- serverless: true,
- nodeOptions: Object.assign(migrationX509Options.donor, {
- setParameter: {
- // Allow non-timestamped reads on donor after migration completes for testing.
- 'failpoint.tenantMigrationDonorAllowsNonTimestampedReads':
- tojson({mode: 'alwaysOn'}),
- }
- })
- });
- donorRst.startSet();
- donorRst.initiate();
-
- const tenantMigrationTest = new TenantMigrationTest({
- name: jsTestName(),
- donorRst,
- sharedOptions: {setParameter: {tenantApplierBatchSizeOps: 2}}
- });
-
- const migrationOpts = {migrationIdString: extractUUIDFromObject(UUID()), tenantId};
- const dbName = tenantMigrationTest.tenantDB(tenantId, testDBName);
- const originalRecipientPrimary = tenantMigrationTest.getRecipientPrimary();
-
- const fpOnRecipient =
- configureFailPoint(originalRecipientPrimary, recipientFailpoint, {action: "hang"});
- tenantMigrationTest.insertDonorDB(dbName, testCollName);
-
- jsTestLog(`Starting a tenant migration with migrationID ${
- migrationOpts.migrationIdString}, and tenantId ${tenantId}`);
- assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
-
- restartNodeAndCheckStateFunction(tenantId, tenantMigrationTest, checkMtab, fpOnRecipient);
-
- // Allow the migration to run to completion.
- jsTestLog("Allowing migration to run to completion.");
- TenantMigrationTest.assertCommitted(
- tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-
- assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
-
- tenantMigrationTest.stop();
- donorRst.stopSet();
-}
-
-// These two test cases are for before the mtab is created, and before the oplog applier has been
-// started.
-runTestCase("fpAfterStartingOplogFetcherMigrationRecipientInstance",
- false /* checkMtab */,
- restartNodeAndCheckStateWithoutOplogApplication);
-runTestCase("tenantCollectionClonerHangAfterCreateCollection",
- false /* checkMtab */,
- restartNodeAndCheckStateWithoutOplogApplication);
-
-// Test case to initial sync a node while the recipient is in the oplog application phase.
-runTestCase("fpBeforeFulfillingDataConsistentPromise",
- true /* checkMtab */,
- restartNodeAndCheckStateDuringOplogApplication);
-
-// A case after data consistency so that the mtab exists. We do not care about the oplog applier in
-// this case.
-runTestCase("fpAfterWaitForRejectReadsBeforeTimestamp",
- true /* checkMtab */,
- restartNodeAndCheckStateWithoutOplogApplication);
diff --git a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
deleted file mode 100644
index f00a4c4f083..00000000000
--- a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Tests that tenant migrations resume successfully on recipient stepup and restart.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_windows_tls,
- * incompatible_with_shard_merge,
- * # Some tenant migration statistics field names were changed in 6.1.
- * requires_fcv_61,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {
- forgetMigrationAsync,
- makeX509OptionsForTest,
- runMigrationAsync,
-} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/parallelTester.js");
-load("jstests/libs/uuid_util.js");
-load('jstests/replsets/rslib.js'); // 'createRstArgs'
-
-const kMaxSleepTimeMS = 100;
-const kTenantId = ObjectId().str;
-
-// Set the delay before a state doc is garbage collected to be short to speed up the test but long
-// enough for the state doc to still be around after stepup or restart.
-const kGarbageCollectionDelayMS = 30 * 1000;
-
-// Set the TTL monitor to run at a smaller interval to speed up the test.
-const kTTLMonitorSleepSecs = 1;
-
-const migrationX509Options = makeX509OptionsForTest();
-
-/**
- * Runs the donorStartMigration command to start a migration, and interrupts the migration on the
- * recipient using the 'interruptFunc' after the migration starts on the recipient side, and
- * asserts that migration eventually commits.
- * @param {recipientRestarted} bool is needed to properly assert the tenant migrations stat count.
- */
-function testRecipientSyncDataInterrupt(interruptFunc, recipientRestarted) {
- const recipientRst = new ReplSetTest({
- nodes: 3,
- name: "recipientRst",
- serverless: true,
- nodeOptions: migrationX509Options.recipient
- });
- recipientRst.startSet();
- recipientRst.initiate();
-
- const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), recipientRst});
-
- const donorRst = tenantMigrationTest.getDonorRst();
- const donorPrimary = tenantMigrationTest.getDonorPrimary();
- let recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-
- const migrationId = UUID();
- const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: kTenantId,
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- };
- const donorRstArgs = createRstArgs(donorRst);
-
- const runMigrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs);
- runMigrationThread.start();
-
- // Wait for recipientSyncData command to start.
- assert.soon(
- () => recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"})
- .inprog.length > 0);
-
- sleep(Math.random() * kMaxSleepTimeMS);
- interruptFunc(recipientRst);
-
- TenantMigrationTest.assertCommitted(runMigrationThread.returnData());
- tenantMigrationTest.waitForDonorNodesToReachState(donorRst.nodes,
- migrationId,
- migrationOpts.tenantId,
- TenantMigrationTest.DonorState.kCommitted);
- assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
-
- const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary);
- assert.eq(1, donorStats.totalMigrationDonationsCommitted);
-
- tenantMigrationTest.stop();
- recipientRst.stopSet();
-}
-
-/**
- * Starts a migration and waits for it to commit, then runs the donorForgetMigration, and interrupts
- * the recipient using the 'interruptFunc', and asserts that the migration state is eventually
- * garbage collected.
- */
-function testRecipientForgetMigrationInterrupt(interruptFunc) {
- const donorRst = new ReplSetTest({
- nodes: 1,
- name: "donorRst",
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.donor, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
- }
- })
- });
- const recipientRst = new ReplSetTest({
- nodes: 3,
- name: "recipientRst",
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.recipient, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
- }
- })
- });
-
- donorRst.startSet();
- donorRst.initiate();
-
- recipientRst.startSet();
- recipientRst.initiate();
-
- const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst});
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-
- const migrationId = UUID();
- const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: kTenantId,
- recipientConnString: recipientRst.getURL(),
- };
- const donorRstArgs = createRstArgs(donorRst);
-
- TenantMigrationTest.assertCommitted(
- tenantMigrationTest.runMigration(migrationOpts, {automaticForgetMigration: false}));
- const forgetMigrationThread = new Thread(forgetMigrationAsync,
- migrationOpts.migrationIdString,
- donorRstArgs,
- false /* retryOnRetryableErrors */);
- forgetMigrationThread.start();
-
- // Wait for recipientForgetMigration command to start.
- assert.soon(() => {
- const res = assert.commandWorked(
- recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}));
- return res.inprog[0].expireAt != null;
- });
- sleep(Math.random() * kMaxSleepTimeMS);
- interruptFunc(recipientRst);
-
- assert.commandWorkedOrFailedWithCode(
- tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString),
- ErrorCodes.NoSuchTenantMigration);
-
- assert.commandWorked(forgetMigrationThread.returnData());
- tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, migrationOpts.tenantId);
-
- tenantMigrationTest.stop();
- donorRst.stopSet();
- recipientRst.stopSet();
-}
-
-(() => {
- jsTest.log("Test that the migration resumes on stepup");
- testRecipientSyncDataInterrupt((recipientRst) => {
- // Force the primary to step down but make it likely to step back up.
- const recipientPrimary = recipientRst.getPrimary();
- assert.commandWorked(recipientPrimary.adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
- }, false);
-})();
-
-(() => {
- jsTest.log("Test that the migration resumes after restart");
- testRecipientSyncDataInterrupt((recipientRst) => {
- recipientRst.stopSet(null /* signal */, true /*forRestart */);
- recipientRst.startSet({restart: true});
- recipientRst.awaitSecondaryNodes();
- recipientRst.getPrimary();
- }, true);
-})();
-
-(() => {
- jsTest.log("Test that the recipientForgetMigration command can be retried on stepup");
- testRecipientForgetMigrationInterrupt((recipientRst) => {
- // Force the primary to step down but make it likely to step back up.
- const recipientPrimary = recipientRst.getPrimary();
- assert.commandWorked(recipientPrimary.adminCommand(
- {replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
- assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
- });
-})();
-
-(() => {
- jsTest.log("Test that the recipientForgetMigration command can be retried after restart");
- testRecipientForgetMigrationInterrupt((recipientRst) => {
- recipientRst.stopSet(null /* signal */, true /*forRestart */);
- recipientRst.startSet({restart: true});
- recipientRst.awaitSecondaryNodes();
- recipientRst.getPrimary();
- });
-})();
diff --git a/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js b/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js
deleted file mode 100644
index 03b5f7eb972..00000000000
--- a/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- * Tests whether the recipient correctly clears its oplog buffer if the recipient primary
- * fails over while fetching retryable writes oplog entries from the donor.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject().
-load("jstests/libs/fail_point_util.js"); // For configureFailPoint().
-
-const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), sharedOptions: {nodes: 2}});
-
-const kMigrationId = UUID();
-const kTenantId = ObjectId().str;
-const kDbName = tenantMigrationTest.tenantDB(kTenantId, "testDb");
-const kCollName = "testColl";
-const migrationOpts = {
- migrationIdString: extractUUIDFromObject(kMigrationId),
- tenantId: kTenantId,
-};
-
-const donorRst = tenantMigrationTest.getDonorRst();
-const donorPrimary = tenantMigrationTest.getDonorPrimary();
-const rsConn = new Mongo(donorRst.getURL());
-const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-
-const session = rsConn.startSession({retryWrites: true});
-const sessionColl = session.getDatabase(kDbName)[kCollName];
-
-const session2 = rsConn.startSession({retryWrites: true});
-const sessionColl2 = session2.getDatabase(kDbName)[kCollName];
-
-jsTestLog("Run retryable writes prior to the migration.");
-assert.commandWorked(sessionColl.insert({_id: "retryableWrite1"}));
-assert.commandWorked(sessionColl2.insert({_id: "retryableWrite2"}));
-
-jsTestLog("Setting up failpoints.");
-// Use `pauseAfterRetrievingRetryableWritesBatch` to hang after inserting the first batch of results
-// from the aggregation request into the oplog buffer.
-const fpPauseAfterRetrievingRetryableWritesBatch =
- configureFailPoint(recipientPrimary, "pauseAfterRetrievingRetryableWritesBatch");
-
-// Set aggregation request batch size to 1 so that we can failover in between batches.
-const fpSetSmallAggregationBatchSize =
- configureFailPoint(recipientPrimary, "fpSetSmallAggregationBatchSize");
-
-jsTestLog("Starting tenant migration with migrationId: " + kMigrationId +
- ", tenantId: " + kTenantId);
-assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
-
-jsTestLog("Waiting until the recipient primary fetches a batch of retryable writes oplog entries.");
-fpSetSmallAggregationBatchSize.wait();
-fpPauseAfterRetrievingRetryableWritesBatch.wait();
-
-// Check that the oplog buffer is correctly populated.
-const kOplogBufferNS = "repl.migration.oplog_" + migrationOpts.migrationIdString;
-let recipientOplogBuffer = recipientPrimary.getDB("config")[kOplogBufferNS];
-// We expect to have only retryableWrite1 since the cursor batch size is 1 and we paused after
-// inserting the first branch of results from the aggregation request.
-let cursor = recipientOplogBuffer.find();
-assert.eq(cursor.itcount(), 1, "Incorrect number of oplog entries in buffer: " + cursor.toArray());
-
-// Check that we haven't completed the retryable writes fetching stage yet.
-let recipientConfigColl = recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS);
-let recipientDoc = recipientConfigColl.find({"_id": kMigrationId}).toArray();
-assert.eq(recipientDoc.length, 1);
-assert.eq(recipientDoc[0].completedFetchingRetryableWritesBeforeStartOpTime, false);
-
-jsTestLog("Stepping a new primary up.");
-const recipientRst = tenantMigrationTest.getRecipientRst();
-const recipientSecondary = recipientRst.getSecondary();
-// Use `fpAfterFetchingRetryableWritesEntriesBeforeStartOpTime` to hang after populating the oplog
-// buffer with retryable writes entries. Set this before stepping up instead of after so that the
-// new primary will not be able to pass this stage without the failpoint being set.
-const fpAfterFetchingRetryableWritesEntries = configureFailPoint(
- recipientSecondary, "fpAfterFetchingRetryableWritesEntriesBeforeStartOpTime", {action: "hang"});
-
-recipientRst.stepUp(recipientSecondary);
-
-fpPauseAfterRetrievingRetryableWritesBatch.off();
-const newRecipientPrimary = recipientRst.getPrimary();
-
-fpAfterFetchingRetryableWritesEntries.wait();
-// The new primary should have cleared its oplog buffer and refetched both retryableWrite1 and
-// retryableWrite2. Otherwise, we will invariant when trying to add those entries.
-recipientOplogBuffer = newRecipientPrimary.getDB("config")[kOplogBufferNS];
-cursor = recipientOplogBuffer.find();
-assert.eq(cursor.itcount(), 2, "Incorrect number of oplog entries in buffer: " + cursor.toArray());
-
-recipientConfigColl = newRecipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS);
-recipientDoc = recipientConfigColl.find({"_id": kMigrationId}).toArray();
-assert.eq(recipientDoc.length, 1);
-assert.eq(recipientDoc[0].completedFetchingRetryableWritesBeforeStartOpTime, true);
-
-fpAfterFetchingRetryableWritesEntries.off();
-fpSetSmallAggregationBatchSize.off();
-
-jsTestLog("Waiting for migration to complete.");
-TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-
-tenantMigrationTest.stop();
diff --git a/jstests/replsets/tenant_migration_recipient_rollback_recovery.js b/jstests/replsets/tenant_migration_recipient_rollback_recovery.js
deleted file mode 100644
index ffb2cc4919e..00000000000
--- a/jstests/replsets/tenant_migration_recipient_rollback_recovery.js
+++ /dev/null
@@ -1,335 +0,0 @@
-/**
- * Tests that tenant migrations that go through recipient rollback are recovered correctly.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {
- forgetMigrationAsync,
- makeX509OptionsForTest,
- runMigrationAsync,
-} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js");
-load("jstests/libs/parallelTester.js");
-load("jstests/replsets/libs/rollback_test.js");
-load("jstests/replsets/rslib.js"); // 'createRstArgs'
-
-const kTenantId = ObjectId().str;
-
-const kMaxSleepTimeMS = 250;
-
-// Set the delay before a state doc is garbage collected to be short to speed up the test but long
-// enough for the state doc to still be around after the recipient is back in the replication steady
-// state.
-const kGarbageCollectionDelayMS = 30 * 1000;
-
-const migrationX509Options = makeX509OptionsForTest();
-
-function makeMigrationOpts(tenantMigrationTest, migrationId, tenantId) {
- return {
- migrationIdString: extractUUIDFromObject(migrationId),
- tenantId: tenantId,
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- readPreference: {mode: "primary"},
- };
-}
-
-/**
- * Starts a recipient ReplSetTest and creates a TenantMigrationTest for it. Runs 'setUpFunc' after
- * initiating the recipient. Then, runs 'rollbackOpsFunc' while replication is disabled on the
- * secondaries, shuts down the primary and restarts it after re-election to force the operations in
- * 'rollbackOpsFunc' to be rolled back. Finally, runs 'steadyStateFunc' after it is back in the
- * replication steady state.
- */
-function testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc) {
- const donorRst = new ReplSetTest({
- name: "donorRst",
- nodes: 1,
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.donor, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: 1,
- }
- })
- });
- donorRst.startSet();
- donorRst.initiate();
-
- const donorRstArgs = createRstArgs(donorRst);
-
- const recipientRst = new ReplSetTest({
- name: "recipientRst",
- nodes: 3,
- serverless: true,
- nodeOptions: Object.assign({}, migrationX509Options.recipient, {
- setParameter: {
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: 1,
- }
- })
- });
- recipientRst.startSet();
- recipientRst.initiate();
-
- const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst});
- setUpFunc(tenantMigrationTest, donorRstArgs);
-
- let originalRecipientPrimary = recipientRst.getPrimary();
- const originalRecipientSecondaries = recipientRst.getSecondaries();
- // The default WC is majority and stopServerReplication will prevent satisfying any majority
- // writes.
- assert.commandWorked(originalRecipientPrimary.adminCommand(
- {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}}));
- recipientRst.awaitLastOpCommitted();
-
- // Disable replication on the secondaries so that writes during this step will be rolled back.
- stopServerReplication(originalRecipientSecondaries);
- rollbackOpsFunc(tenantMigrationTest, donorRstArgs);
-
- // Shut down the primary and re-enable replication to allow one of the secondaries to get
- // elected, and make the writes above get rolled back on the original primary when it comes
- // back up.
- recipientRst.stop(originalRecipientPrimary);
- restartServerReplication(originalRecipientSecondaries);
- const newRecipientPrimary = recipientRst.getPrimary();
- assert.neq(originalRecipientPrimary, newRecipientPrimary);
-
- // Restart the original primary.
- originalRecipientPrimary =
- recipientRst.start(originalRecipientPrimary, {waitForConnect: true}, true /* restart */);
- originalRecipientPrimary.setSecondaryOk();
- recipientRst.awaitReplication();
-
- steadyStateFunc(tenantMigrationTest);
-
- donorRst.stopSet();
- recipientRst.stopSet();
-}
-
-/**
- * Starts a migration and waits for the recipient's primary to insert the recipient's state doc.
- * Forces the write to be rolled back. After the replication steady state is reached, asserts that
- * recipientSyncData can restart the migration on the new primary.
- */
-function testRollbackInitialState() {
- const migrationId = UUID();
- let migrationOpts;
- let migrationThread;
-
- let setUpFunc = (tenantMigrationTest, donorRstArgs) => {};
-
- let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => {
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-
- // Start the migration asynchronously and wait for the primary to insert the state doc.
- migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str);
- migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs);
- migrationThread.start();
- assert.soon(() => {
- return 1 ===
- recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS).count({
- _id: migrationId
- });
- });
- };
-
- let steadyStateFunc = (tenantMigrationTest) => {
- // Verify that the migration restarted successfully on the new primary despite rollback.
- TenantMigrationTest.assertCommitted(migrationThread.returnData());
- tenantMigrationTest.assertRecipientNodesInExpectedState({
- nodes: tenantMigrationTest.getRecipientRst().nodes,
- migrationId: migrationId,
- tenantId: migrationOpts.tenantId,
- expectedState: TenantMigrationTest.RecipientState.kConsistent,
- expectedAccessState: TenantMigrationTest.RecipientAccessState.kRejectBefore
- });
- assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
- };
-
- testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc);
-}
-
-/**
- * Starts a migration after enabling 'pauseFailPoint' (must pause the migration) and
- * 'setUpFailPoints' on the recipient's primary. Waits for the primary to do the write to transition
- * to 'nextState' after reaching 'pauseFailPoint' (i.e. the state doc matches 'query'), then forces
- * the write to be rolled back. After the replication steady state is reached, asserts that the
- * migration is resumed successfully by new primary regardless of what the rolled back state
- * transition is.
- */
-function testRollBackStateTransition(pauseFailPoint, setUpFailPoints, nextState, query) {
- jsTest.log(`Test roll back the write to transition to state "${
- nextState}" after reaching failpoint "${pauseFailPoint}"`);
-
- const migrationId = UUID();
- let migrationOpts;
- let migrationThread, pauseFp;
-
- let setUpFunc = (tenantMigrationTest, donorRstArgs) => {
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
- setUpFailPoints.forEach(failPoint => configureFailPoint(recipientPrimary, failPoint));
- pauseFp = configureFailPoint(recipientPrimary, pauseFailPoint, {action: "hang"});
-
- migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str);
- migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs);
- migrationThread.start();
- pauseFp.wait();
- };
-
- let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => {
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
- // Resume the migration and wait for the primary to do the write for the state transition.
- pauseFp.off();
- assert.soon(() => {
- return 1 ===
- recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS)
- .count(Object.assign({_id: migrationId}, query));
- });
- };
-
- let steadyStateFunc = (tenantMigrationTest) => {
- // Verify that the migration resumed successfully on the new primary despite the rollback.
- TenantMigrationTest.assertCommitted(migrationThread.returnData());
- tenantMigrationTest.waitForRecipientNodesToReachState(
- tenantMigrationTest.getRecipientRst().nodes,
- migrationId,
- migrationOpts.tenantId,
- TenantMigrationTest.RecipientState.kConsistent,
- TenantMigrationTest.RecipientAccessState.kRejectBefore);
- assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
- };
-
- testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc);
-}
-
-/**
- * Runs donorForgetMigration after completing a migration. Waits for the recipient's primary to
- * mark the recipient's state doc as garbage collectable, then forces the write to be rolled back.
- * After the replication steady state is reached, asserts that recipientForgetMigration can be
- * retried on the new primary and that the state doc is eventually garbage collected.
- */
-function testRollBackMarkingStateGarbageCollectable() {
- const migrationId = UUID();
- let migrationOpts;
- let forgetMigrationThread;
-
- let setUpFunc = (tenantMigrationTest, donorRstArgs) => {
- migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str);
- TenantMigrationTest.assertCommitted(
- tenantMigrationTest.runMigration(migrationOpts, {automaticForgetMigration: false}));
- };
-
- let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => {
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
- // Run donorForgetMigration and wait for the primary to do the write to mark the state doc
- // as garbage collectable.
- forgetMigrationThread = new Thread(forgetMigrationAsync,
- migrationOpts.migrationIdString,
- donorRstArgs,
- false /* retryOnRetryableErrors */);
- forgetMigrationThread.start();
- assert.soon(() => {
- return 1 ===
- recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS)
- .count({_id: migrationId, expireAt: {$exists: 1}});
- });
- };
-
- let steadyStateFunc = (tenantMigrationTest) => {
- // Verify that the migration state got garbage collected successfully despite the rollback.
- assert.commandWorked(forgetMigrationThread.returnData());
- tenantMigrationTest.waitForMigrationGarbageCollection(
- migrationId,
- migrationOpts.tenantId,
- tenantMigrationTest.getDonorRst().nodes,
- tenantMigrationTest.getRecipientRst().nodes);
- };
-
- testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc);
-}
-
-/**
- * Starts a migration and forces the recipient's primary to go through rollback after a random
- * amount of time. After the replication steady state is reached, asserts that the migration is
- * resumed successfully.
- */
-function testRollBackRandom() {
- const migrationId = UUID();
- let migrationOpts;
- let migrationThread;
-
- let setUpFunc = (tenantMigrationTest, donorRstArgs) => {
- migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str);
- migrationThread = new Thread(async (donorRstArgs, migrationOpts) => {
- const {runMigrationAsync, forgetMigrationAsync} =
- await import("jstests/replsets/libs/tenant_migration_util.js");
- assert.commandWorked(await runMigrationAsync(migrationOpts, donorRstArgs));
- assert.commandWorked(await forgetMigrationAsync(
- migrationOpts.migrationIdString, donorRstArgs, false /* retryOnRetryableErrors */));
- }, donorRstArgs, migrationOpts);
-
- // Start the migration and wait for a random amount of time before transitioning to the
- // rollback operations state.
- migrationThread.start();
- sleep(Math.random() * kMaxSleepTimeMS);
- };
-
- let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => {
- // Let the migration run in the rollback operations state for a random amount of time.
- sleep(Math.random() * kMaxSleepTimeMS);
- };
-
- let steadyStateFunc = (tenantMigrationTest) => {
- // Verify that the migration completed and was garbage collected successfully despite the
- // rollback.
- migrationThread.join();
- tenantMigrationTest.waitForRecipientNodesToReachState(
- tenantMigrationTest.getRecipientRst().nodes,
- migrationId,
- migrationOpts.tenantId,
- TenantMigrationTest.RecipientState.kDone,
- TenantMigrationTest.RecipientAccessState.kRejectBefore);
- tenantMigrationTest.waitForMigrationGarbageCollection(
- migrationId,
- migrationOpts.tenantId,
- tenantMigrationTest.getDonorRst().nodes,
- tenantMigrationTest.getRecipientRst().nodes);
- };
-
- testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc);
-}
-
-jsTest.log("Test roll back recipient's state doc insert");
-testRollbackInitialState();
-
-jsTest.log("Test roll back recipient's state doc update");
-[{
- pauseFailPoint: "fpBeforeMarkingCloneSuccess",
- nextState: "reject",
- query: {dataConsistentStopDonorOpTime: {$exists: 1}}
-},
- {
- pauseFailPoint: "fpBeforePersistingRejectReadsBeforeTimestamp",
- nextState: "rejectBefore",
- query: {rejectReadsBeforeTimestamp: {$exists: 1}}
- }].forEach(({pauseFailPoint, setUpFailPoints = [], nextState, query}) => {
- testRollBackStateTransition(pauseFailPoint, setUpFailPoints, nextState, query);
-});
-
-jsTest.log("Test roll back marking the donor's state doc as garbage collectable");
-testRollBackMarkingStateGarbageCollectable();
-
-jsTest.log("Test roll back random");
-testRollBackRandom();
diff --git a/jstests/replsets/tenant_migration_recipient_sync_donor_timestamp.js b/jstests/replsets/tenant_migration_recipient_sync_donor_timestamp.js
deleted file mode 100644
index 81422e28454..00000000000
--- a/jstests/replsets/tenant_migration_recipient_sync_donor_timestamp.js
+++ /dev/null
@@ -1,94 +0,0 @@
-/**
- * Exercises the code path for the recipientSyncData command that waits until a timestamp provided
- * by the donor is majority committed: make sure that in this code path, when the recipient is
- * interrupted by a primary step down, the recipient properly swaps the error code to the true code
- * (like primary step down) that the donor can retry on.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_persistence,
- * requires_replication,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject()
-
-// Make the batch size small so that we can pause before all the batches are applied.
-const tenantMigrationTest = new TenantMigrationTest(
- {name: jsTestName(), sharedOptions: {setParameter: {tenantApplierBatchSizeOps: 2}}});
-
-const kMigrationId = UUID();
-const kTenantId = ObjectId().str;
-const kReadPreference = {
- mode: "primary"
-};
-const migrationOpts = {
- migrationIdString: extractUUIDFromObject(kMigrationId),
- tenantId: kTenantId,
- readPreference: kReadPreference
-};
-
-const dbName = tenantMigrationTest.tenantDB(kTenantId, "testDB");
-const collName = jsTestName() + "_collection";
-
-const recipientRst = tenantMigrationTest.getRecipientRst();
-const recipientPrimary = recipientRst.getPrimary();
-
-// FailPoint to pause right before the data consistent promise is fulfilled.
-const fpBeforeDataConsistent = configureFailPoint(
- recipientPrimary, "fpBeforeFulfillingDataConsistentPromise", {action: "hang"});
-const fpBeforeApplierFutureCalled =
- configureFailPoint(recipientPrimary, "fpWaitUntilTimestampMajorityCommitted");
-
-tenantMigrationTest.insertDonorDB(dbName, collName);
-
-jsTestLog("Starting migration.");
-// Start the migration, and allow it to progress to the point where the _dataConsistentPromise has
-// been fulfilled.
-tenantMigrationTest.startMigration(migrationOpts);
-
-jsTestLog("Waiting for data consistent promise.");
-// Pause right before the _dataConsistentPromise is fulfilled. Therefore, the applier has
-// finished applying entries at least until dataConsistentStopDonorOpTime.
-fpBeforeDataConsistent.wait();
-
-jsTestLog("Pausing the tenant oplog applier.");
-// Pause the applier now. All the entries that the applier cannot process now are past the
-// dataConsistentStopDonorOpTime.
-const fpPauseOplogApplier =
- configureFailPoint(recipientPrimary, "fpBeforeTenantOplogApplyingBatch");
-
-jsTestLog("Writing to donor db.");
-// Send writes to the donor. The applier will not be able to process these as it is paused.
-const docsToApply = [...Array(10).keys()].map((i) => ({a: i}));
-tenantMigrationTest.insertDonorDB(dbName, collName, docsToApply);
-
-jsTestLog("Waiting to hit failpoint in tenant oplog applier.");
-fpPauseOplogApplier.wait();
-
-jsTestLog("Allowing recipient to respond.");
-// Allow the recipient to respond to the donor for the recipientSyncData command that waits on the
-// fulfillment of the _dataConsistentPromise. The donor will then send another recipientSyncData
-// command that waits on the provided donor timestamp to be majority committed.
-fpBeforeDataConsistent.off();
-
-jsTestLog("Reach the point where we are waiting for the tenant oplog applier to catch up.");
-fpBeforeApplierFutureCalled.wait();
-fpBeforeApplierFutureCalled.off();
-
-jsTestLog("Stepping another node up.");
-// Make a new recipient primary step up. This will ask the applier to shutdown.
-recipientRst.stepUp(recipientRst.getSecondaries()[0]);
-
-jsTestLog("Release the tenant oplog applier failpoint.");
-fpPauseOplogApplier.off();
-
-jsTestLog("Waiting for migration to complete.");
-TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-
-tenantMigrationTest.stop();
diff --git a/jstests/replsets/tenant_migration_recipient_sync_source_reconnect_delayed_secondary.js b/jstests/replsets/tenant_migration_recipient_sync_source_reconnect_delayed_secondary.js
deleted file mode 100644
index c95fb70d474..00000000000
--- a/jstests/replsets/tenant_migration_recipient_sync_source_reconnect_delayed_secondary.js
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Tests that a migration will continuously retry sync source selection when there are no available
- * donor hosts. Also checks that a donor host is considered an uneligible sync source when it has a
- * majority OpTime earlier than the recipient's stored 'startApplyingDonorOpTime'.
- *
- * Tests that if the stale donor host advances its majority OpTime to 'startApplyingDonorOpTime'
- * or later, the recipient will successfully choose that donor as sync source and resume the
- * migration.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * # The currentOp output field 'dataSyncCompleted' was renamed to 'migrationCompleted'.
- * requires_fcv_70,
- * serverless,
- * ]
- */
-
-import {
- setUpMigrationSyncSourceTest
-} from "jstests/replsets/libs/tenant_migration_recipient_sync_source.js";
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-
-// After this setUp() call, we should have a migration with 'secondary' read preference. The
-// recipient should be continuously retrying sync source selection, unable to choose
-// 'delayedSecondary' because it is too stale and 'donorSecondary' because it is down.
-const {
- tenantMigrationTest,
- migrationOpts,
- donorSecondary,
- delayedSecondary,
- hangAfterCreatingConnections
-} = setUpMigrationSyncSourceTest();
-
-if (!tenantMigrationTest) {
- // Feature flag was not enabled.
- quit();
-}
-
-jsTestLog("Restarting replication on 'delayedSecondary'");
-restartServerReplication(delayedSecondary);
-
-// The recipient should eventually be able to connect to the lagged secondary, after the secondary
-// has caught up and the exclude timeout has expired.
-hangAfterCreatingConnections.wait();
-
-const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-const res = recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"});
-const currOp = res.inprog[0];
-assert.eq(delayedSecondary.host,
- currOp.donorSyncSource,
- `the recipient should only be able to choose 'delayedSecondary' as sync source`);
-
-hangAfterCreatingConnections.off();
-
-TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
-
-// Remove 'donorSecondary' so that the test can complete properly.
-const donorRst = tenantMigrationTest.getDonorRst();
-donorRst.remove(donorSecondary);
-donorRst.stopSet();
-tenantMigrationTest.stop();
diff --git a/jstests/replsets/tenant_migration_recipient_sync_source_restart_donor_secondary.js b/jstests/replsets/tenant_migration_recipient_sync_source_restart_donor_secondary.js
deleted file mode 100644
index 10ded49a34b..00000000000
--- a/jstests/replsets/tenant_migration_recipient_sync_source_restart_donor_secondary.js
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Tests that a migration will continuously retry sync source selection when there are no available
- * donor hosts. Also checks that a donor host is considered an uneligible sync source when it has a
- * majority OpTime earlier than the recipient's stored 'startApplyingDonorOpTime'.
- *
- * Tests that if a donor host becomes available, the recipient will successfully choose it as a
- * sync source and resume the migration.
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * # The currentOp output field 'dataSyncCompleted' was renamed to 'migrationCompleted'.
- * requires_fcv_70,
- * serverless,
- * ]
- */
-
-import {
- setUpMigrationSyncSourceTest
-} from "jstests/replsets/libs/tenant_migration_recipient_sync_source.js";
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-
-// After this setUp() call, we should have a migration with 'secondary' read preference. The
-// recipient should be continuously retrying sync source selection, unable to choose
-// 'delayedSecondary' because it is too stale and 'donorSecondary' because it is down.
-const {
- tenantMigrationTest,
- migrationOpts,
- donorSecondary,
- delayedSecondary,
- hangAfterCreatingConnections
-} = setUpMigrationSyncSourceTest();
-
-if (!tenantMigrationTest) {
- // Feature flag was not enabled.
- quit();
-}
-
-const donorRst = tenantMigrationTest.getDonorRst();
-
-jsTestLog("Restarting 'donorSecondary'");
-donorRst.start(donorSecondary, null /* options */, true /* restart */);
-
-// The recipient should eventually be able to connect to the donor secondary, after the node reaches
-// 'secondary' state.
-hangAfterCreatingConnections.wait();
-
-const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-const res = recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"});
-const currOp = res.inprog[0];
-// 'donorSecondary' should always be the chosen sync source, since read preference is 'secondary'
-// and 'delayedSecondary' cannot be chosen because it is too stale.
-assert.eq(donorSecondary.host,
- currOp.donorSyncSource,
- `the recipient should only be able to choose 'donorSecondary' as sync source`);
-
-hangAfterCreatingConnections.off();
-restartServerReplication(delayedSecondary);
-
-TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
-
-donorRst.stopSet();
-tenantMigrationTest.stop();
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js
deleted file mode 100644
index ea9654d97ac..00000000000
--- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js
+++ /dev/null
@@ -1,132 +0,0 @@
-/**
- * Tests that in tenant migration, the recipient set can resume collection cloning from the last
- * document cloned after a failover.
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {
- checkTenantDBHashes,
- makeX509OptionsForTest,
-} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
-
-const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn, docs) {
- const batchSize = 2;
- const recipientRst = new ReplSetTest({
- nodes: 2,
- name: jsTestName() + "_recipient",
- serverless: true,
- nodeOptions: Object.assign(makeX509OptionsForTest().recipient, {
- setParameter: {
- // Use a batch size of 2 so that collection cloner requires more than a single
- // batch to complete.
- collectionClonerBatchSize: batchSize,
- // Allow reads on recipient before migration completes for testing.
- 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
- }
- })
- });
-
- recipientRst.startSet();
- recipientRst.initiate();
-
- const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
- const donorPrimary = tenantMigrationTest.getDonorPrimary();
-
- const tenantId = ObjectId().str;
- const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
- const donorDB = donorPrimary.getDB(dbName);
- const collName = "testColl";
-
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-
- // Create collection and insert documents.
- assert.commandWorked(createCollFn(donorDB, collName));
- tenantMigrationTest.insertDonorDB(dbName, collName, docs);
-
- const migrationId = UUID();
- const migrationIdString = extractUUIDFromObject(migrationId);
- const migrationOpts = {
- migrationIdString: migrationIdString,
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- tenantId,
- };
-
- // Configure a fail point to have the recipient primary hang after cloning 2 documents.
- const recipientDb = recipientPrimary.getDB(dbName);
- let recipientColl = isTimeSeries ? recipientDb.getCollection("system.buckets." + collName)
- : recipientDb.getCollection(collName);
-
- const hangDuringCollectionClone =
- configureFailPoint(recipientDb,
- "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
- {nss: recipientColl.getFullName()});
-
- // Start a migration and wait for recipient to hang after cloning 2 documents.
- assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
- hangDuringCollectionClone.wait();
- assert.soon(() => recipientColl.find().itcount() === batchSize);
-
- // Insert some documents that will be fetched by the recipient. This is to test that on
- // failover, the fetcher will resume fetching from where it left off. The system is expected
- // to crash if the recipient fetches a duplicate oplog entry upon resuming the migration.
- tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]);
- assert.soon(() => {
- const configDb = recipientPrimary.getDB("config");
- const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString);
- return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1;
- });
-
- // Step up a new node in the recipient set and trigger a failover. The new primary should resume
- // cloning starting from the third document.
- const newRecipientPrimary = recipientRst.getSecondaries()[0];
- recipientRst.stepUp(newRecipientPrimary);
- hangDuringCollectionClone.off();
- recipientRst.getPrimary();
-
- // The migration should go through after recipient failover.
- TenantMigrationTest.assertCommitted(
- tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-
- // Check that recipient has cloned all documents in the collection.
- recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName);
- assert.eq(docs.length, recipientColl.find().itcount());
- assert.docEq(docs, recipientColl.find().sort({_id: 1}).toArray());
- checkTenantDBHashes({
- donorRst: tenantMigrationTest.getDonorRst(),
- recipientRst: tenantMigrationTest.getRecipientRst(),
- tenantId
- });
-
- tenantMigrationTest.stop();
- recipientRst.stopSet();
-};
-
-jsTestLog("Running tenant migration test for time-series collection");
-tenantMigrationFailoverTest(true,
- (db, collName) => db.createCollection(
- collName, {timeseries: {timeField: "time", metaField: "bucket"}}),
- [
- // Group each document in its own bucket in order to work with the
- // collectionClonerBatchSize we set at the recipient replSet.
- {_id: 1, time: ISODate(), bucket: "a"},
- {_id: 2, time: ISODate(), bucket: "b"},
- {_id: 3, time: ISODate(), bucket: "c"},
- {_id: 4, time: ISODate(), bucket: "d"}
- ]);
-
-jsTestLog("Running tenant migration test for regular collection");
-tenantMigrationFailoverTest(false,
- (db, collName) => db.createCollection(collName),
- [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]);
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
deleted file mode 100644
index 262e40edf00..00000000000
--- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Tests that in tenant migration, the collection recreated on a dropped view namespace is handled
- * correctly on resuming the logical tenant collection cloning phase due to recipient failover.
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {makeX509OptionsForTest} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
-
-const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn) {
- const recipientRst = new ReplSetTest({
- nodes: 2,
- name: jsTestName() + "_recipient",
- serverless: true,
- nodeOptions: Object.assign(makeX509OptionsForTest().recipient, {
- setParameter: {
- // Allow reads on recipient before migration completes for testing.
- 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
- }
- })
- });
-
- recipientRst.startSet();
- recipientRst.initiate();
-
- const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
-
- const donorRst = tenantMigrationTest.getDonorRst();
- const donorPrimary = donorRst.getPrimary();
-
- const tenantId = ObjectId().str;
- const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
- const donorDB = donorPrimary.getDB(dbName);
- const collName = "testColl";
- const donorColl = donorDB[collName];
-
- let getCollectionInfo = function(conn) {
- return conn.getDB(dbName).getCollectionInfos().filter(coll => {
- return coll.name === collName;
- });
- };
-
- // Create a timeseries collection or a regular view.
- assert.commandWorked(createCollFn(donorDB, collName));
- donorRst.awaitReplication();
-
- const migrationId = UUID();
- const migrationIdString = extractUUIDFromObject(migrationId);
- const migrationOpts = {
- migrationIdString: migrationIdString,
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- tenantId,
- };
-
- const recipientPrimary = recipientRst.getPrimary();
- const recipientDb = recipientPrimary.getDB(dbName);
- const recipientSystemViewsColl = recipientDb.getCollection("system.views");
-
- // Configure a fail point to have the recipient primary hang after cloning
- // "<OID>_testDB.system.views" collection.
- const hangDuringCollectionClone =
- configureFailPoint(recipientPrimary,
- "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
- {nss: recipientSystemViewsColl.getFullName()});
-
- // Start the migration and wait for the migration to hang after cloning
- // "<OID>_testDB.system.views" collection.
- assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts));
- hangDuringCollectionClone.wait();
-
- assert.soon(() => recipientSystemViewsColl.find().itcount() >= 1);
- recipientRst.awaitLastOpCommitted();
- const newRecipientPrimary = recipientRst.getSecondaries()[0];
-
- // Verify that a view has been registered for "<OID>_testDB.testColl" on the new
- // recipient primary.
- let collectionInfo = getCollectionInfo(newRecipientPrimary);
- assert.eq(1, collectionInfo.length);
- assert(collectionInfo[0].type === (isTimeSeries ? "timeseries" : "view"),
- "data store type mismatch: " + tojson(collectionInfo[0]));
-
- // Drop the view and create a regular collection with the same namespace as the
- // dropped view on donor.
- assert(donorColl.drop());
- assert.commandWorked(donorDB.createCollection(collName));
-
- // We need to skip TenantDatabaseCloner::listExistingCollectionsStage() to make sure
- // the recipient always clone the above newly created regular collection after the failover.
- // Currently, we restart cloning after a failover, only from the collection whose UUID is
- // greater than or equal to the last collection we have on disk.
- const skiplistExistingCollectionsStage =
- configureFailPoint(newRecipientPrimary, "skiplistExistingCollectionsStage");
-
- // Step up a new node in the recipient set and trigger a failover.
- recipientRst.stepUp(newRecipientPrimary);
- hangDuringCollectionClone.off();
-
- // The migration should go through after recipient failover.
- TenantMigrationTest.assertCommitted(
- tenantMigrationTest.waitForMigrationToComplete(migrationOpts));
-
- // Check that recipient has dropped the view and and re-created the regular collection as part
- // of migration oplog catchup phase.
- collectionInfo = getCollectionInfo(newRecipientPrimary);
- assert.eq(1, collectionInfo.length);
- assert(collectionInfo[0].type === "collection",
- "data store type mismatch: " + tojson(collectionInfo[0]));
-
- tenantMigrationTest.stop();
- recipientRst.stopSet();
-};
-
-jsTestLog("Running tenant migration test for time-series collection");
-// Creating a timeseries collection, implicity creates a view on the 'collName' collection
-// namespace.
-tenantMigrationFailoverTest(true,
- (db, collName) => db.createCollection(
- collName, {timeseries: {timeField: "time", metaField: "bucket"}}));
-
-jsTestLog("Running tenant migration test for regular view");
-tenantMigrationFailoverTest(false,
- (db, collName) => db.createView(collName, "sourceCollection", []));
diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
deleted file mode 100644
index 0919240cf24..00000000000
--- a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js
+++ /dev/null
@@ -1,124 +0,0 @@
-/**
- * Tests that in tenant migration, the recipient set can resume collection cloning from the last
- * document cloned after a failover even if the collection has been renamed on the donor.
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {
- checkTenantDBHashes,
- makeX509OptionsForTest,
- runMigrationAsync
-} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
-load("jstests/libs/parallelTester.js"); // for 'Thread'
-load('jstests/replsets/rslib.js'); // 'createRstArgs'
-
-const recipientRst = new ReplSetTest({
- nodes: 2,
- name: jsTestName() + "_recipient",
- serverless: true,
- nodeOptions: Object.assign(makeX509OptionsForTest().recipient, {
- setParameter: {
- // Use a batch size of 2 so that collection cloner requires more than a single batch to
- // complete.
- collectionClonerBatchSize: 2,
- // Allow reads on recipient before migration completes for testing.
- 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}),
- }
- })
-});
-
-recipientRst.startSet();
-recipientRst.initiate();
-
-const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
-const tenantId = ObjectId().str;
-const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
-const collName = "testColl";
-
-const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-const donorPrimary = tenantMigrationTest.getDonorPrimary();
-
-// Test _id with mixed bson types.
-const docs = [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}];
-tenantMigrationTest.insertDonorDB(dbName, collName, docs);
-
-const migrationId = UUID();
-const migrationIdString = extractUUIDFromObject(migrationId);
-const migrationOpts = {
- migrationIdString: migrationIdString,
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- tenantId,
-};
-
-// Configure a fail point to have the recipient primary hang after cloning 2 documents.
-const recipientDb = recipientPrimary.getDB(dbName);
-let recipientColl = recipientDb.getCollection(collName);
-const hangDuringCollectionClone =
- configureFailPoint(recipientDb,
- "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse",
- {nss: recipientColl.getFullName()});
-
-// Start a migration and wait for recipient to hang after cloning 2 documents.
-const donorRstArgs = createRstArgs(tenantMigrationTest.getDonorRst());
-const migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs);
-migrationThread.start();
-hangDuringCollectionClone.wait();
-assert.soon(() => recipientColl.find().itcount() === 2);
-
-// Insert some documents that will be fetched by the recipient. This is to test that on failover,
-// the fetcher will resume fetching from where it left off. The system is expected to crash if
-// the recipient fetches a duplicate oplog entry upon resuming the migration.
-tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]);
-assert.soon(() => {
- const configDb = recipientPrimary.getDB("config");
- const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString);
- return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1;
-});
-
-recipientRst.awaitLastOpCommitted();
-
-// Set a failpoint to prevent the new recipient primary from completing the migration before the
-// donor renames the collection.
-const newRecipientPrimary = recipientRst.getSecondaries()[0];
-const fpPauseAtStartOfMigration =
- configureFailPoint(newRecipientPrimary, "pauseAfterRunTenantMigrationRecipientInstance");
-
-// Step up a new node in the recipient set and trigger a failover. The new primary should resume
-// cloning starting from the third document.
-recipientRst.stepUp(newRecipientPrimary);
-hangDuringCollectionClone.off();
-recipientRst.getPrimary();
-
-// Rename the collection on the donor.
-const donorColl = donorPrimary.getDB(dbName).getCollection(collName);
-const collNameRenamed = collName + "_renamed";
-assert.commandWorked(donorColl.renameCollection(collNameRenamed));
-
-// The migration should go through after recipient failover.
-fpPauseAtStartOfMigration.off();
-TenantMigrationTest.assertCommitted(migrationThread.returnData());
-
-// Check that recipient has cloned all documents in the renamed collection.
-recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collNameRenamed);
-assert.eq(4, recipientColl.find().itcount());
-assert.eq(recipientColl.find().sort({_id: 1}).toArray(), docs);
-checkTenantDBHashes({
- donorRst: tenantMigrationTest.getDonorRst(),
- recipientRst: tenantMigrationTest.getRecipientRst(),
- tenantId
-});
-
-tenantMigrationTest.stop();
-recipientRst.stopSet();
diff --git a/jstests/replsets/tenant_migration_resume_oplog_application.js b/jstests/replsets/tenant_migration_resume_oplog_application.js
deleted file mode 100644
index 70849750914..00000000000
--- a/jstests/replsets/tenant_migration_resume_oplog_application.js
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * Tests that in a tenant migration, the recipient primary will resume oplog application on
- * failover.
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_shard_merge,
- * incompatible_with_windows_tls,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js";
-import {
- checkTenantDBHashes,
- makeX509OptionsForTest,
- runMigrationAsync,
-} from "jstests/replsets/libs/tenant_migration_util.js";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject'
-load("jstests/libs/parallelTester.js"); // for 'Thread'
-load("jstests/libs/write_concern_util.js"); // for 'stopReplicationOnSecondaries'
-load("jstests/aggregation/extras/utils.js"); // For assertArrayEq.
-load('jstests/replsets/rslib.js'); // For 'createRstArgs'
-
-const recipientRst = new ReplSetTest({
- nodes: 3,
- name: jsTestName() + "_recipient",
- serverless: true,
- // Use a batch size of 2 so that we can hang in the middle of tenant oplog application.
- nodeOptions: Object.assign(makeX509OptionsForTest().recipient,
- {setParameter: {tenantApplierBatchSizeOps: 2}})
-});
-
-recipientRst.startSet();
-recipientRst.initiate();
-
-const tenantMigrationTest =
- new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst});
-
-const tenantId = ObjectId().str;
-const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB");
-const collName = "testColl";
-
-const donorPrimary = tenantMigrationTest.getDonorPrimary();
-const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
-const donorRst = tenantMigrationTest.getDonorRst();
-const donorTestColl = donorPrimary.getDB(dbName).getCollection(collName);
-
-// Populate the donor replica set with some initial data and make sure it is majority committed.
-const majorityCommittedDocs = [{_id: 0, x: 0}, {_id: 1, x: 1}];
-assert.commandWorked(donorTestColl.insert(majorityCommittedDocs, {writeConcern: {w: "majority"}}));
-assert.eq(2, donorTestColl.find().readConcern("majority").itcount());
-
-const migrationId = UUID();
-const migrationOpts = {
- migrationIdString: extractUUIDFromObject(migrationId),
- recipientConnString: tenantMigrationTest.getRecipientConnString(),
- tenantId,
-};
-
-// Configure fail point to have the recipient primary hang after the cloner completes and the oplog
-// applier has started.
-let waitAfterDatabaseClone = configureFailPoint(
- recipientPrimary, "fpAfterStartingOplogApplierMigrationRecipientInstance", {action: "hang"});
-// Configure fail point to hang the tenant oplog applier after it applies the first batch.
-let waitInOplogApplier = configureFailPoint(recipientPrimary, "hangInTenantOplogApplication");
-
-// Start a migration and wait for recipient to hang in the tenant database cloner.
-const donorRstArgs = createRstArgs(donorRst);
-const migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs);
-migrationThread.start();
-waitAfterDatabaseClone.wait();
-
-// Insert some writes that will eventually be picked up by the tenant oplog applier on the
-// recipient.
-const docsToApply = [{_id: 2, x: 2}, {_id: 3, x: 3}, {_id: 4, x: 4}];
-tenantMigrationTest.insertDonorDB(dbName, collName, docsToApply);
-
-// Wait for the applied oplog batch to be replicated.
-waitInOplogApplier.wait();
-recipientRst.awaitReplication();
-let local = recipientPrimary.getDB("local");
-let appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"});
-let resultsArr = appliedNoOps.toArray();
-// It is possible that the first batch applied includes a resume no-op token. We do not write no-op
-// entries for resume token entries in tenant migrations.
-assert.gt(appliedNoOps.count(), 0, resultsArr);
-assert.lte(appliedNoOps.count(), 2, resultsArr);
-assert.eq(docsToApply[0], resultsArr[0].o2.o, resultsArr);
-if (appliedNoOps.count() === 2) {
- assert.eq(docsToApply[1], resultsArr[1].o2.o, resultsArr);
-}
-// Step up a new node in the recipient set and trigger a failover. The new primary should resume
-// fetching starting from the unapplied documents.
-const newRecipientPrimary = recipientRst.getSecondaries()[0];
-recipientRst.stepUp(newRecipientPrimary);
-waitAfterDatabaseClone.off();
-waitInOplogApplier.off();
-recipientRst.getPrimary();
-
-// The migration should go through after recipient failover.
-TenantMigrationTest.assertCommitted(migrationThread.returnData());
-// Validate that the last no-op entry is applied.
-local = newRecipientPrimary.getDB("local");
-appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"});
-resultsArr = appliedNoOps.toArray();
-assert.eq(3, appliedNoOps.count(), appliedNoOps);
-assert.eq(docsToApply[2], resultsArr[2].o2.o, resultsArr);
-
-checkTenantDBHashes({
- donorRst: tenantMigrationTest.getDonorRst(),
- recipientRst: tenantMigrationTest.getRecipientRst(),
- tenantId
-});
-tenantMigrationTest.stop();
-recipientRst.stopSet();
diff --git a/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js b/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js
index 245673daa62..a9ee61f0993 100644
--- a/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js
+++ b/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js
@@ -186,6 +186,7 @@ assert.commandWorked(
jsTest.log("Waiting for migration to complete");
waitBeforeFetchingTransactions.off();
TenantMigrationTest.assertCommitted(migrationThread.returnData());
+tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString);
// Print the no-op oplog entries for debugging purposes.
jsTestLog("Recipient oplog migration entries.");
@@ -262,6 +263,7 @@ function testRecipientRetryableWrites(db, writes) {
jsTestLog("Run retryable write on primary after the migration");
testRecipientRetryableWrites(recipientDb, beforeWrites);
testRecipientRetryableWrites(recipientDb, duringWrites);
+
jsTestLog("Step up secondary");
const recipientRst = tenantMigrationTest.getRecipientRst();
recipientRst.stepUp(recipientRst.getSecondary());
@@ -269,8 +271,6 @@ jsTestLog("Run retryable write on secondary after the migration");
testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), beforeWrites);
testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), duringWrites);
-tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString);
-
jsTestLog("Trying a back-to-back migration");
const tenantMigrationTest2 = new TenantMigrationTest(
{name: jsTestName() + "2", donorRst: tenantMigrationTest.getRecipientRst()});
diff --git a/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js b/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js
index 4a3475178f5..b5a466046f8 100644
--- a/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js
+++ b/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js
@@ -105,6 +105,7 @@ function testRetryOnRecipient(ordered) {
jsTest.log("Waiting for migration to complete");
pauseTenantMigrationBeforeLeavingDataSyncState.off();
TenantMigrationTest.assertCommitted(migrationThread.returnData());
+ tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString);
// Print the no-op oplog entries for debugging purposes.
jsTestLog("Recipient oplog migration entries.");
@@ -132,8 +133,6 @@ function testRetryOnRecipient(ordered) {
testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), beforeWrites);
testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), duringWrites);
- tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString);
-
jsTestLog("Trying a back-to-back migration");
const tenantMigrationTest2 = new TenantMigrationTest(
{name: jsTestName() + "2", donorRst: tenantMigrationTest.getRecipientRst()});