diff options
author | auto-revert-processor <dev-prod-dag@mongodb.com> | 2023-04-15 04:58:07 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-15 05:32:18 +0000 |
commit | fecd7c876ae3838942f752267f4a0b30e5e0d373 (patch) | |
tree | 31dc0c3b5f93149b007e2365c06b24df10cb0b05 /jstests/replsets | |
parent | 88a17d29b2890c0ab25ed44ddc349f3589fb6934 (diff) | |
download | mongo-fecd7c876ae3838942f752267f4a0b30e5e0d373.tar.gz |
Revert "SERVER-75990: Tenant Migrations are not resilient to recipient failover"
This reverts commit c92d1161fdf333a3e3219631557a480fae30a593.
Diffstat (limited to 'jstests/replsets')
18 files changed, 2441 insertions, 173 deletions
diff --git a/jstests/replsets/tenant_migration_cloner_stats_with_failover.js b/jstests/replsets/tenant_migration_cloner_stats_with_failover.js new file mode 100644 index 00000000000..77c98dea613 --- /dev/null +++ b/jstests/replsets/tenant_migration_cloner_stats_with_failover.js @@ -0,0 +1,146 @@ +/** + * Tests tenant migration cloner stats such as 'approxTotalDataSize', 'approxTotalBytesCopied', + * 'databasesClonedBeforeFailover' across multiple databases and collections with failovers. + * + * This test does the following: + * 1. Insert two databases on the donor. The first database consists of one collection, the second + * consists of two collections. + * 2. Wait for the primary (referred to as the original primary) to clone one batch from the second + * database's second collection. + * 3. Step up the new primary. Ensure that the stats such as 'databasesClonedBeforeFailover' tally. + * 4. Allow the tenant migration to complete and commit. Ensure that stats are sensible. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * incompatible_with_shard_merge, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject(). +load("jstests/libs/fail_point_util.js"); // For configureFailPoint(). + +// Limit the batch size to test the stat in between batches. +const tenantMigrationTest = new TenantMigrationTest( + {name: jsTestName(), sharedOptions: {setParameter: {collectionClonerBatchSize: 10}}}); + +const kMigrationId = UUID(); +const kTenantId = ObjectId().str; +const kReadPreference = { + mode: "primary" +}; +const migrationOpts = { + migrationIdString: extractUUIDFromObject(kMigrationId), + tenantId: kTenantId, + readPreference: kReadPreference +}; + +const dbName = tenantMigrationTest.tenantDB(kTenantId, "testDB"); +const collName = "coll"; +const dbName1 = dbName + '_db_1'; +const dbName2 = dbName + '_db_2'; +const db2Coll1 = collName + "_db_2_1"; +const db2Coll2 = collName + "_db_2_2"; + +// Add a large amount of data to the donor. +jsTestLog("Adding data to donor."); +const dataForEachCollection = [...Array(100).keys()].map((i) => ({a: i, b: 'metanoia'})); +tenantMigrationTest.insertDonorDB(dbName1, collName + "_1", dataForEachCollection); +tenantMigrationTest.insertDonorDB(dbName2, db2Coll1, dataForEachCollection); +tenantMigrationTest.insertDonorDB(dbName2, db2Coll2, dataForEachCollection); + +const originalRecipientPrimary = tenantMigrationTest.getRecipientPrimary(); +const newRecipientPrimary = tenantMigrationTest.getRecipientRst().getSecondaries()[0]; + +jsTestLog("Collecting the stats of the databases and collections from the donor."); +const donorPrimary = tenantMigrationTest.getDonorPrimary(); +const donorDB2 = donorPrimary.getDB(dbName2); + +const db1Size = assert.commandWorked(donorPrimary.getDB(dbName1).runCommand({dbStats: 1})).dataSize; +const db2Size = assert.commandWorked(donorDB2.runCommand({dbStats: 1})).dataSize; +const db2Collection1Size = assert.commandWorked(donorDB2.runCommand({collStats: db2Coll1})).size; +const db2Collection2Size = assert.commandWorked(donorDB2.runCommand({collStats: db2Coll2})).size; + +const donorStats = { + db1Size, + db2Size, + db2Collection1Size, + db2Collection2Size +}; +jsTestLog("Collected the following stats on the donor: " + tojson(donorStats)); + +// The last collection to be cloned is the one with a greater UUID. +const collInfo = donorDB2.getCollectionInfos(); +const uuid1 = collInfo[0].info.uuid; +const uuid2 = collInfo[1].info.uuid; +const lastCollection = (uuid1 > uuid2) ? db2Coll1 : db2Coll2; + +// Create a failpoint to pause after one batch of the second database's second collection has been +// cloned. +const fpAfterBatchOfSecondDB = configureFailPoint( + originalRecipientPrimary, + "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse", + {nss: originalRecipientPrimary.getDB(dbName2).getCollection(lastCollection).getFullName()}); + +jsTestLog("Starting tenant migration with migrationId: " + kMigrationId + + ", tenantId: " + kTenantId); +assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + +let res = 0; +let currOp = 0; +jsTestLog("Waiting until one batch of second database has been cloned by original primary."); +fpAfterBatchOfSecondDB.wait(); +// Since documents are inserted on a separate thread, wait until the expected stats are seen. The +// failpoint needs to be maintained so that the next batch isn't processed. +assert.soon(() => { + res = originalRecipientPrimary.adminCommand( + {currentOp: true, desc: "tenant recipient migration"}); + currOp = res.inprog[0]; + + // Wait until one batch of documents of the second database's second collection has been copied. + return currOp.approxTotalBytesCopied > db1Size + db2Collection1Size; +}, res); + +assert.eq(currOp.approxTotalDataSize, db1Size + db2Size, res); +// Since the two collections on the second database are the same size, +// 'db1Size + db2Collection1Size' and 'db1Size + db2Collection2Size' evaluate to the same value. +assert.gt(currOp.approxTotalBytesCopied, db1Size + db2Collection1Size, res); +assert.lt(currOp.approxTotalBytesCopied, db1Size + db2Size, res); +assert.eq(currOp.databases.databasesClonedBeforeFailover, 0, res); +assert.eq(currOp.databases[dbName2].clonedCollectionsBeforeFailover, 0, res); +const bytesCopiedIncludingSecondDB = currOp.approxTotalBytesCopied; +jsTestLog("Bytes copied after first batch of second database: " + bytesCopiedIncludingSecondDB); + +// Wait until the batch of the second collection of the second database has been replicated from the +// original primary to the new primary. Then, step up the new primary. +const fpAfterCreatingCollectionOfSecondDB = + configureFailPoint(newRecipientPrimary, "tenantCollectionClonerHangAfterCreateCollection"); +tenantMigrationTest.getRecipientRst().stepUp(newRecipientPrimary); +fpAfterBatchOfSecondDB.off(); + +jsTestLog("Wait until the new primary creates collection of second database."); +fpAfterCreatingCollectionOfSecondDB.wait(); +res = newRecipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); +currOp = res.inprog[0]; +assert.eq(currOp.approxTotalDataSize, db1Size + db2Size, res); +assert.eq(currOp.approxTotalBytesCopied, bytesCopiedIncludingSecondDB, res); +assert.eq(currOp.databases.databasesClonedBeforeFailover, 1, res); +assert.eq(currOp.databases[dbName2].clonedCollectionsBeforeFailover, 1, res); +fpAfterCreatingCollectionOfSecondDB.off(); + +// After the migration completes, the total bytes copied should be equal to the total data size. +jsTestLog("Waiting for migration to complete."); +TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); +res = newRecipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); +currOp = res.inprog[0]; +assert.eq(currOp.approxTotalDataSize, db1Size + db2Size, res); +assert.eq(currOp.approxTotalBytesCopied, db1Size + db2Size, res); +assert.eq(currOp.databases.databasesClonedBeforeFailover, 1, res); +assert.eq(currOp.databases[dbName2].clonedCollectionsBeforeFailover, 1, res); + +tenantMigrationTest.stop(); diff --git a/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js b/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js index f3ab1df7e24..f3737dd4706 100644 --- a/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js +++ b/jstests/replsets/tenant_migration_cluster_time_keys_cloning.js @@ -177,58 +177,55 @@ const migrationX509Options = makeX509OptionsForTest(); tenantMigrationTest.stop(); })(); -// TODO SERVER-76128: Tenant Migrations are not robust to recipient failover. -// (() => { -// jsTest.log("Test that the donor and recipient correctly copy each other's cluster time keys " -// + -// "when there is recipient failover."); -// const recipientRst = new ReplSetTest({ -// nodes: 3, -// name: "recipientRst", -// serverless: true, -// nodeOptions: migrationX509Options.recipient -// }); -// recipientRst.startSet(); -// recipientRst.initiate(); -// if (isShardMergeEnabled(recipientRst.getPrimary().getDB("adminDB"))) { -// jsTestLog("Skip: featureFlagShardMerge enabled, but shard merge does not survive -// failover"); -// recipientRst.stopSet(); -// return; -// } - -// const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), recipientRst}); - -// const recipientPrimary = recipientRst.getPrimary(); -// const fp = configureFailPoint(recipientPrimary, -// "fpAfterPersistingTenantMigrationRecipientInstanceStateDoc", -// {action: "hang"}); - -// const migrationId = UUID(); -// const migrationOpts = { -// migrationIdString: extractUUIDFromObject(migrationId), -// tenantId: kTenantId1, -// }; -// assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); -// fp.wait(); - -// assert.commandWorked( -// recipientPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true})); -// assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); - -// fp.off(); -// TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete( -// migrationOpts, true /* retryOnRetryableErrors */)); - -// assertCopiedExternalKeys(tenantMigrationTest, migrationId); - -// // After another migration, the first's keys should still exist. -// runMigrationAndAssertExternalKeysCopied(tenantMigrationTest, kTenantId2); -// assertCopiedExternalKeys(tenantMigrationTest, migrationId); - -// recipientRst.stopSet(); -// tenantMigrationTest.stop(); -// })(); +(() => { + jsTest.log("Test that the donor and recipient correctly copy each other's cluster time keys " + + "when there is recipient failover."); + const recipientRst = new ReplSetTest({ + nodes: 3, + name: "recipientRst", + serverless: true, + nodeOptions: migrationX509Options.recipient + }); + recipientRst.startSet(); + recipientRst.initiate(); + if (isShardMergeEnabled(recipientRst.getPrimary().getDB("adminDB"))) { + jsTestLog("Skip: featureFlagShardMerge enabled, but shard merge does not survive failover"); + recipientRst.stopSet(); + return; + } + + const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), recipientRst}); + + const recipientPrimary = recipientRst.getPrimary(); + const fp = configureFailPoint(recipientPrimary, + "fpAfterPersistingTenantMigrationRecipientInstanceStateDoc", + {action: "hang"}); + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId1, + }; + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + fp.wait(); + + assert.commandWorked( + recipientPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); + + fp.off(); + TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete( + migrationOpts, true /* retryOnRetryableErrors */)); + + assertCopiedExternalKeys(tenantMigrationTest, migrationId); + + // After another migration, the first's keys should still exist. + runMigrationAndAssertExternalKeysCopied(tenantMigrationTest, kTenantId2); + assertCopiedExternalKeys(tenantMigrationTest, migrationId); + + recipientRst.stopSet(); + tenantMigrationTest.stop(); +})(); (() => { jsTest.log("Test that the donor waits for copied external keys to replicate to every node"); diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js new file mode 100644 index 00000000000..b3b158c7134 --- /dev/null +++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js @@ -0,0 +1,488 @@ +/** + * Tests that tenant migrations resume successfully on donor stepup and restart. + * + * Incompatible with shard merge, which can't handle restart. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * # Some tenant migration statistics field names were changed in 6.1. + * requires_fcv_61, + * requires_majority_read_concern, + * requires_persistence, + * # Tenant migrations are only used in serverless. + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import { + forgetMigrationAsync, + isShardMergeEnabled, + makeX509OptionsForTest, + runMigrationAsync, + tryAbortMigrationAsync +} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/rslib.js"); // 'createRstArgs' + +const kMaxSleepTimeMS = 100; +const kTenantId = ObjectId().str; +const kMigrationFpNames = [ + "pauseTenantMigrationBeforeLeavingDataSyncState", + "pauseTenantMigrationBeforeLeavingBlockingState", + "abortTenantMigrationBeforeLeavingBlockingState", + "" +]; + +// Set the delay before a state doc is garbage collected to be short to speed up the test but long +// enough for the state doc to still be around after stepup or restart. +const kGarbageCollectionDelayMS = 30 * 1000; + +// Set the TTL monitor to run at a smaller interval to speed up the test. +const kTTLMonitorSleepSecs = 1; + +const migrationX509Options = makeX509OptionsForTest(); + +/** + * Runs the donorStartMigration command to start a migration, and interrupts the migration on the + * donor using the 'interruptFunc', and asserts that migration eventually commits. + */ +function testDonorStartMigrationInterrupt(interruptFunc, + {donorRestarted = false, disableForShardMerge = true}) { + const donorRst = new ReplSetTest( + {nodes: 3, name: "donorRst", serverless: true, nodeOptions: migrationX509Options.donor}); + + donorRst.startSet(); + donorRst.initiate(); + + const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst}); + + let donorPrimary = tenantMigrationTest.getDonorPrimary(); + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + if (disableForShardMerge && isShardMergeEnabled(recipientPrimary.getDB("admin"))) { + jsTest.log("Skipping test for shard merge"); + tenantMigrationTest.stop(); + donorRst.stopSet(); + return; + } + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + }; + const donorRstArgs = createRstArgs(donorRst); + + const runMigrationThread = + new Thread(runMigrationAsync, migrationOpts, donorRstArgs, {retryOnRetryableErrors: true}); + runMigrationThread.start(); + + // Wait for donorStartMigration command to start. + assert.soon(() => donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"}) + .inprog.length > 0); + + sleep(Math.random() * kMaxSleepTimeMS); + interruptFunc(donorRst); + + TenantMigrationTest.assertCommitted(runMigrationThread.returnData()); + tenantMigrationTest.waitForDonorNodesToReachState(donorRst.nodes, + migrationId, + migrationOpts.tenantId, + TenantMigrationTest.DonorState.kCommitted); + assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + + donorPrimary = tenantMigrationTest.getDonorPrimary(); // Could change after interrupt. + const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary); + jsTestLog(`Stats at the donor primary: ${tojson(donorStats)}`); + if (donorRestarted) { + // If full restart happened the count could be lost completely. + assert.gte(1, donorStats.totalMigrationDonationsCommitted); + } else { + // The double counting happens when the failover happens after migration completes + // but before the state doc GC mark is persisted. While this test is targeting this + // scenario it is low probability in production. + assert(1 == donorStats.totalMigrationDonationsCommitted || + 2 == donorStats.totalMigrationDonationsCommitted); + } + // Skip checking the stats on the recipient since enableRecipientTesting is false + // so the recipient is forced to respond to recipientSyncData without starting the + // migration. + + tenantMigrationTest.stop(); + donorRst.stopSet(); +} + +/** + * Starts a migration and waits for it to commit, then runs the donorForgetMigration, and interrupts + * the donor using the 'interruptFunc', and asserts that the migration state is eventually garbage + * collected. + */ +function testDonorForgetMigrationInterrupt(interruptFunc) { + const donorRst = new ReplSetTest({ + nodes: 3, + name: "donorRst", + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.donor, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + }) + }); + const recipientRst = new ReplSetTest({ + nodes: 1, + name: "recipientRst", + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.recipient, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + }) + }); + + donorRst.startSet(); + donorRst.initiate(); + + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst}); + + const donorPrimary = tenantMigrationTest.getDonorPrimary(); + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + recipientConnString: recipientRst.getURL(), + }; + const donorRstArgs = createRstArgs(donorRst); + + TenantMigrationTest.assertCommitted( + tenantMigrationTest.runMigration(migrationOpts, {automaticForgetMigration: false})); + const forgetMigrationThread = new Thread(forgetMigrationAsync, + migrationOpts.migrationIdString, + donorRstArgs, + true /* retryOnRetryableErrors */); + forgetMigrationThread.start(); + + // Wait for donorForgetMigration command to start. + assert.soon(() => { + const res = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})); + return res.inprog[0].expireAt != null; + }); + sleep(Math.random() * kMaxSleepTimeMS); + interruptFunc(donorRst); + + assert.commandWorkedOrFailedWithCode( + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString), + ErrorCodes.NoSuchTenantMigration); + + assert.commandWorked(forgetMigrationThread.returnData()); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, migrationOpts.tenantId); + + tenantMigrationTest.stop(); + donorRst.stopSet(); + recipientRst.stopSet(); +} + +/** + * Starts a migration and sets the passed in failpoint, then runs the donorAbortMigration, and + * interrupts the donor using the 'interruptFunc', and asserts that the migration state is + * eventually garbage collected. + */ +function testDonorAbortMigrationInterrupt( + interruptFunc, fpName, {fpWaitBeforeAbort = false, isShutdown = false} = {}) { + const donorRst = new ReplSetTest({ + nodes: 3, + name: "donorRst", + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.donor, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + }) + }); + const recipientRst = new ReplSetTest({ + nodes: 1, + name: "recipientRst", + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.recipient, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + }) + }); + + donorRst.startSet(); + donorRst.initiate(); + + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst}); + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + recipientConnString: recipientRst.getURL(), + }; + const donorRstArgs = createRstArgs(donorRst); + let donorPrimary = tenantMigrationTest.getDonorPrimary(); + + // If we passed in a valid failpoint we set it, otherwise we let the migration run normally. + let fp; + if (fpName) { + fp = configureFailPoint(donorPrimary, fpName); + } + + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + + if (fp && !isShutdown && fpWaitBeforeAbort) { + fp.wait(); + } + + const tryAbortThread = new Thread(tryAbortMigrationAsync, + {migrationIdString: migrationOpts.migrationIdString}, + donorRstArgs, + true /* retryOnRetryableErrors */); + tryAbortThread.start(); + + // Wait for donorAbortMigration command to start. + assert.soon(() => { + const res = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})); + return res.inprog[0].receivedCancellation; + }); + + interruptFunc(donorRst); + + if (fp && !isShutdown) { + // Turn off failpoint in order to allow the migration to resume after stepup. + fp.off(); + } + + tryAbortThread.join(); + + let res = tryAbortThread.returnData(); + assert.commandWorkedOrFailedWithCode(res, ErrorCodes.TenantMigrationCommitted); + + donorPrimary = tenantMigrationTest.getDonorPrimary(); + let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS); + let donorDoc = configDonorsColl.findOne({tenantId: kTenantId}); + + if (!res.ok) { + assert.eq(donorDoc.state, TenantMigrationTest.DonorState.kCommitted); + } else { + assert.eq(donorDoc.state, TenantMigrationTest.DonorState.kAborted); + } + + tenantMigrationTest.stop(); + donorRst.stopSet(); + recipientRst.stopSet(); +} + +/** + * Starts a migration and sets the passed in failpoint, then either waits for the failpoint or lets + * the migration run successfully and interrupts the donor using the 'interruptFunc'. After + * restarting, check the to see if the donorDoc data has persisted. + */ +function testStateDocPersistenceOnFailover(interruptFunc, fpName, isShutdown = false) { + const donorRst = new ReplSetTest( + {nodes: 3, name: "donorRst", serverless: true, nodeOptions: migrationX509Options.donor}); + + donorRst.startSet(); + donorRst.initiate(); + + const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst}); + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + }; + let donorPrimary = tenantMigrationTest.getDonorPrimary(); + + // If we passed in a valid failpoint we set it, otherwise we let the migration run normally. + let fp; + if (fpName) { + fp = configureFailPoint(donorPrimary, fpName); + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + fp.wait(); + } else { + TenantMigrationTest.assertCommitted(tenantMigrationTest.runMigration(migrationOpts)); + } + + let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS); + let donorDocBeforeFailover = configDonorsColl.findOne({tenantId: kTenantId}); + + interruptFunc(tenantMigrationTest.getDonorRst()); + + if (fp && !isShutdown) { + // Turn off failpoint in order to allow the migration to resume after stepup. + fp.off(); + } + + donorPrimary = tenantMigrationTest.getDonorPrimary(); + configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS); + let donorDocAfterFailover = configDonorsColl.findOne({tenantId: kTenantId}); + + // Check persisted fields in the donor doc. + assert.eq(donorDocBeforeFailover._id, donorDocAfterFailover._id); + assert.eq(donorDocBeforeFailover.recipientConnString, + donorDocAfterFailover.recipientConnString); + assert.eq(donorDocBeforeFailover.readPreference, donorDocAfterFailover.readPreference); + assert.eq(donorDocBeforeFailover.startMigrationDonorTimestamp, + donorDocAfterFailover.startMigrationDonorTimestamp); + assert.eq(donorDocBeforeFailover.migration, donorDocAfterFailover.migration); + assert.eq(donorDocBeforeFailover.tenantId, donorDocAfterFailover.tenantId); + assert.eq(donorDocBeforeFailover.donorCertificateForRecipient, + donorDocAfterFailover.donorCertificateForRecipient); + assert.eq(donorDocBeforeFailover.recipientCertificateForDonor, + donorDocAfterFailover.recipientCertificateForDonor); + assert.eq(donorDocBeforeFailover.migrationStart, donorDocAfterFailover.migrationStart); + + tenantMigrationTest.stop(); + donorRst.stopSet(); +} + +(() => { + jsTest.log("Test that the migration resumes on stepup"); + testDonorStartMigrationInterrupt((donorRst) => { + // Force the primary to step down but make it likely to step back up. + const donorPrimary = donorRst.getPrimary(); + assert.commandWorked( + donorPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0})); + }, {donorRestarted: false}); +})(); + +(() => { + jsTest.log("Test that the migration resumes after restart"); + testDonorStartMigrationInterrupt((donorRst) => { + // Skip validation on shutdown because the full validation can conflict with the tenant + // migration and cause it to fail. + donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true}); + donorRst.startSet({restart: true}); + }, {donorRestarted: true, disableForShardMerge: true}); +})(); + +(() => { + jsTest.log("Test that the donorForgetMigration command can be retried on stepup"); + testDonorForgetMigrationInterrupt((donorRst) => { + // Force the primary to step down but make it likely to step back up. + const donorPrimary = donorRst.getPrimary(); + assert.commandWorked( + donorPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0})); + }); +})(); + +(() => { + jsTest.log("Test that the donorForgetMigration command can be retried after restart"); + testDonorForgetMigrationInterrupt((donorRst) => { + // Skip validation on shutdown because the full validation can conflict with the tenant + // migration and cause it to fail. + donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true}); + donorRst.startSet({restart: true}); + }); +})(); + +(() => { + jsTest.log("Test that the donorAbortMigration command can be retried after restart"); + + kMigrationFpNames.forEach(fpName => { + if (!fpName) { + jsTest.log("Testing without setting a failpoint."); + } else { + jsTest.log("Testing with failpoint: " + fpName); + } + + testDonorAbortMigrationInterrupt((donorRst) => { + // Skip validation on shutdown because the full validation can conflict with the tenant + // migration and cause it to fail. + donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true}); + donorRst.startSet({restart: true}); + }, fpName, {isShutdown: true}); + }); +})(); + +(() => { + jsTest.log( + "Test that the donorAbortMigration command fails if issued after state == kCommitted"); + + testDonorAbortMigrationInterrupt((donorRst) => {}, + "pauseTenantMigrationAfterUpdatingToCommittedState", + {fpWaitBeforeAbort: true}); +})(); + +(() => { + jsTest.log("Test that the donorAbortMigration command can be retried on stepup"); + kMigrationFpNames.forEach(fpName => { + if (!fpName) { + jsTest.log("Testing without setting a failpoint."); + } else { + jsTest.log("Testing with failpoint: " + fpName); + } + + testDonorAbortMigrationInterrupt((donorRst) => { + // Force the primary to step down but make it likely to step back up. + const donorPrimary = donorRst.getPrimary(); + assert.commandWorked(donorPrimary.adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0})); + }, fpName); + }); +})(); + +(() => { + jsTest.log("Test stateDoc data persistence on restart."); + kMigrationFpNames.forEach(fpName => { + if (!fpName) { + jsTest.log("Testing without setting a failpoint."); + } else { + jsTest.log("Testing with failpoint: " + fpName); + } + + testStateDocPersistenceOnFailover((donorRst) => { + // Skip validation on shutdown because the full validation can conflict with the tenant + // migration and cause it to fail. + donorRst.stopSet(null /* signal */, true /*forRestart */, {skipValidation: true}); + donorRst.startSet({restart: true}); + }, fpName, true); + }); +})(); + +(() => { + jsTest.log("Test stateDoc data persistence on stepup."); + kMigrationFpNames.forEach(fpName => { + if (!fpName) { + jsTest.log("Testing without setting a failpoint."); + } else { + jsTest.log("Testing with failpoint: " + fpName); + } + + testStateDocPersistenceOnFailover((donorRst) => { + // Force the primary to step down but make it likely to step back up. + const donorPrimary = donorRst.getPrimary(); + assert.commandWorked(donorPrimary.adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0})); + }, fpName); + }); +})(); diff --git a/jstests/replsets/tenant_migration_external_keys_ttl.js b/jstests/replsets/tenant_migration_external_keys_ttl.js index 172600f1d2a..28611f9abaf 100644 --- a/jstests/replsets/tenant_migration_external_keys_ttl.js +++ b/jstests/replsets/tenant_migration_external_keys_ttl.js @@ -286,39 +286,38 @@ function makeTestParams() { teardown(); } - // TODO SERVER-76128: Tenant Migrations are not robust to recipient failover. - // jsTestLog("Recipient failover before receiving forgetMigration"); - // { - // const {tmt, teardown} = setup(); - // const [tenantId, migrationId, migrationOpts] = makeTestParams(); - // const recipientPrimary = tmt.getRecipientPrimary(); - // const fp = configureFailPoint(recipientPrimary, - // "fpAfterConnectingTenantMigrationRecipientInstance", - // {action: "hang"}); - - // assert.commandWorked(tmt.startMigration(migrationOpts)); - // fp.wait(); - - // assert.commandWorked(recipientPrimary.adminCommand( - // {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); - // assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); - // fp.off(); - - // TenantMigrationTest.assertCommitted( - // tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */)); - - // // The keys should have been created without a TTL deadline. - // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false}); - // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false}); - - // assert.commandWorked(tmt.forgetMigration(migrationOpts.migrationIdString)); - - // // After running donorForgetMigration, the TTL value should be updated. The default TTL - // // buffer is 1 day so the keys will not have been deleted. - // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true}); - // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true}); - // teardown(); - // } + jsTestLog("Recipient failover before receiving forgetMigration"); + { + const {tmt, teardown} = setup(); + const [tenantId, migrationId, migrationOpts] = makeTestParams(); + const recipientPrimary = tmt.getRecipientPrimary(); + const fp = configureFailPoint(recipientPrimary, + "fpAfterConnectingTenantMigrationRecipientInstance", + {action: "hang"}); + + assert.commandWorked(tmt.startMigration(migrationOpts)); + fp.wait(); + + assert.commandWorked(recipientPrimary.adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); + fp.off(); + + TenantMigrationTest.assertCommitted( + tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */)); + + // The keys should have been created without a TTL deadline. + verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false}); + verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false}); + + assert.commandWorked(tmt.forgetMigration(migrationOpts.migrationIdString)); + + // After running donorForgetMigration, the TTL value should be updated. The default TTL + // buffer is 1 day so the keys will not have been deleted. + verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true}); + verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true}); + teardown(); + } jsTestLog( "Donor failover after receiving forgetMigration before marking keys garbage collectable"); @@ -356,42 +355,39 @@ function makeTestParams() { teardown(); } - // TODO SERVER-76128: Tenant Migrations are not robust to recipient failover. - // jsTestLog( - // "Recipient failover after receiving forgetMigration before marking keys garbage - // collectable"); - // { - // const {tmt, donorRst, teardown} = setup(); - // const [tenantId, migrationId, migrationOpts] = makeTestParams(); - // const recipientPrimary = tmt.getRecipientPrimary(); - - // assert.commandWorked(tmt.startMigration(migrationOpts)); - // TenantMigrationTest.assertCommitted( - // tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */)); - - // // The keys should have been created without a TTL deadline. - // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false}); - // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false}); - - // const fp = configureFailPoint( - // recipientPrimary, "pauseTenantMigrationBeforeMarkingExternalKeysGarbageCollectable"); - // const forgetMigrationThread = new Thread( - // forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst), - // true); - // forgetMigrationThread.start(); - // fp.wait(); - - // assert.commandWorked(recipientPrimary.adminCommand( - // {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); - // assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); - // fp.off(); - - // assert.commandWorked(forgetMigrationThread.returnData()); - - // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true}); - // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true}); - // teardown(); - // } + jsTestLog( + "Recipient failover after receiving forgetMigration before marking keys garbage collectable"); + { + const {tmt, donorRst, teardown} = setup(); + const [tenantId, migrationId, migrationOpts] = makeTestParams(); + const recipientPrimary = tmt.getRecipientPrimary(); + + assert.commandWorked(tmt.startMigration(migrationOpts)); + TenantMigrationTest.assertCommitted( + tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */)); + + // The keys should have been created without a TTL deadline. + verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false}); + verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false}); + + const fp = configureFailPoint( + recipientPrimary, "pauseTenantMigrationBeforeMarkingExternalKeysGarbageCollectable"); + const forgetMigrationThread = new Thread( + forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst), true); + forgetMigrationThread.start(); + fp.wait(); + + assert.commandWorked(recipientPrimary.adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); + fp.off(); + + assert.commandWorked(forgetMigrationThread.returnData()); + + verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: true}); + verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: true}); + teardown(); + } jsTestLog("Donor failover after receiving forgetMigration after updating keys."); { @@ -437,53 +433,49 @@ function makeTestParams() { teardown(); } - // TODO SERVER-76128: Tenant Migrations are not robust to recipient failover. - // jsTestLog("Recipient failover after receiving forgetMigration after updating keys."); - // { - // const {tmt, donorRst, recipientRst, teardown} = setup(); - // // this test expects the external keys to expire, so lower the expiration timeouts. - // const lowerExternalKeysBufferSecs = 5; - // const lowerStateDocExpirationMS = 500; - // for (let conn of [...donorRst.nodes, ...recipientRst.nodes]) { - // setTenantMigrationExpirationParams( - // conn, lowerStateDocExpirationMS, lowerExternalKeysBufferSecs); - // } - // const [tenantId, migrationId, migrationOpts] = makeTestParams(); - // const recipientPrimary = tmt.getRecipientPrimary(); - - // assert.commandWorked(tmt.startMigration(migrationOpts)); - // TenantMigrationTest.assertCommitted( - // tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */)); - - // // The keys should have been created without a TTL deadline. - // verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false}); - // verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false}); - - // const fp = configureFailPoint( - // recipientPrimary, "fpAfterReceivingRecipientForgetMigration", {action: "hang"}); - // const forgetMigrationThread = new Thread( - // forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst), - // true); - // forgetMigrationThread.start(); - // fp.wait(); - - // // Let the keys expire on the donor before the state document is deleted to verify - // retrying - // // recipientForgetMigration can handle this case. The keys won't be deleted until the - // buffer - // // expires, so sleep to avoid wasted work. - // sleep((lowerExternalKeysBufferSecs * 1000) + lowerStateDocExpirationMS + 500); - // waitForExternalKeysToBeDeleted(tmt.getRecipientPrimary(), migrationId); - - // assert.commandWorked(recipientPrimary.adminCommand( - // {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); - // assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); - // fp.off(); - - // assert.commandWorked(forgetMigrationThread.returnData()); - - // // Eventually the donor's keys should be deleted too. - // waitForExternalKeysToBeDeleted(tmt.getDonorPrimary(), migrationId); - // teardown(); - // } + jsTestLog("Recipient failover after receiving forgetMigration after updating keys."); + { + const {tmt, donorRst, recipientRst, teardown} = setup(); + // this test expects the external keys to expire, so lower the expiration timeouts. + const lowerExternalKeysBufferSecs = 5; + const lowerStateDocExpirationMS = 500; + for (let conn of [...donorRst.nodes, ...recipientRst.nodes]) { + setTenantMigrationExpirationParams( + conn, lowerStateDocExpirationMS, lowerExternalKeysBufferSecs); + } + const [tenantId, migrationId, migrationOpts] = makeTestParams(); + const recipientPrimary = tmt.getRecipientPrimary(); + + assert.commandWorked(tmt.startMigration(migrationOpts)); + TenantMigrationTest.assertCommitted( + tmt.waitForMigrationToComplete(migrationOpts, true /* retryOnRetryableErrors */)); + + // The keys should have been created without a TTL deadline. + verifyExternalKeys(tmt.getDonorPrimary(), {migrationId, expectTTLValue: false}); + verifyExternalKeys(tmt.getRecipientPrimary(), {migrationId, expectTTLValue: false}); + + const fp = configureFailPoint( + recipientPrimary, "fpAfterReceivingRecipientForgetMigration", {action: "hang"}); + const forgetMigrationThread = new Thread( + forgetMigrationAsync, migrationOpts.migrationIdString, createRstArgs(donorRst), true); + forgetMigrationThread.start(); + fp.wait(); + + // Let the keys expire on the donor before the state document is deleted to verify retrying + // recipientForgetMigration can handle this case. The keys won't be deleted until the buffer + // expires, so sleep to avoid wasted work. + sleep((lowerExternalKeysBufferSecs * 1000) + lowerStateDocExpirationMS + 500); + waitForExternalKeysToBeDeleted(tmt.getRecipientPrimary(), migrationId); + + assert.commandWorked(recipientPrimary.adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); + fp.off(); + + assert.commandWorked(forgetMigrationThread.returnData()); + + // Eventually the donor's keys should be deleted too. + waitForExternalKeysToBeDeleted(tmt.getDonorPrimary(), migrationId); + teardown(); + } })(); diff --git a/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js b/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js new file mode 100644 index 00000000000..4cd9ae5a415 --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_failover_before_creating_oplog_buffer.js @@ -0,0 +1,54 @@ +/** + * Tests whether the recipient returns an appropriate error code to the donor when the recipient + * primary is made to step down before creating the oplog buffer collection. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_persistence, + * requires_replication, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject(). +load("jstests/libs/fail_point_util.js"); // For configureFailPoint(). + +const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), sharedOptions: {nodes: 2}}); + +const kMigrationId = UUID(); +const kTenantId = ObjectId().str; +const kReadPreference = { + mode: "primary" +}; +const migrationOpts = { + migrationIdString: extractUUIDFromObject(kMigrationId), + tenantId: kTenantId, + readPreference: kReadPreference +}; + +const fpBeforeCreatingOplogBuffer = + configureFailPoint(tenantMigrationTest.getRecipientPrimary(), + "fpAfterRetrievingStartOpTimesMigrationRecipientInstance", + {action: "hang"}); + +jsTestLog("Starting tenant migration with migrationId: " + kMigrationId + + ", tenantId: " + kTenantId); +assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + +jsTestLog("Waiting until the recipient primary is about to create an oplog buffer collection."); +fpBeforeCreatingOplogBuffer.wait(); + +jsTestLog("Stepping a new primary up."); +tenantMigrationTest.getRecipientRst().stepUp( + tenantMigrationTest.getRecipientRst().getSecondaries()[0]); + +fpBeforeCreatingOplogBuffer.off(); + +jsTestLog("Waiting for migration to complete."); +TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + +tenantMigrationTest.stop(); diff --git a/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js b/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js new file mode 100644 index 00000000000..b0c58169b01 --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_initial_sync_cloning.js @@ -0,0 +1,199 @@ +/** + * Tests that during tenant migration, a new recipient node's state document and in-memory state is + * initialized after initial sync, when 1) the node hasn't begun cloning data yet, 2) is cloning + * data, and 3) is in the tenant oplog application phase. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * incompatible_with_shard_merge, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import {makeX509OptionsForTest} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load('jstests/replsets/rslib.js'); // for waitForNewlyAddedRemovalForNodeToBeCommitted + +const migrationX509Options = makeX509OptionsForTest(); + +const testDBName = 'testDB'; +const testCollName = 'testColl'; + +// Restarts a node, allows the node to go through initial sync, and then makes sure its state +// matches up with the primary's. Returns the initial sync node. +function restartNodeAndCheckState(tenantId, tenantMigrationTest, checkMtab) { + // Restart a node and allow it to complete initial sync. + const recipientRst = tenantMigrationTest.getRecipientRst(); + const originalRecipientPrimary = recipientRst.getPrimary(); + + jsTestLog("Restarting a node from the recipient replica set."); + let initialSyncNode = recipientRst.getSecondaries()[0]; + initialSyncNode = + recipientRst.restart(initialSyncNode, {startClean: true, skipValidation: true}); + + // Allow the new node to finish initial sync. + waitForNewlyAddedRemovalForNodeToBeCommitted(originalRecipientPrimary, + recipientRst.getNodeId(initialSyncNode)); + recipientRst.awaitSecondaryNodes(); + recipientRst.awaitReplication(); + + jsTestLog("Ensure that the new node's state matches up with the primary's."); + // Make sure the new node's state makes sense. + let recipientDocOnPrimary = undefined; + let recipientDocOnNewNode = undefined; + assert.soon( + () => { + recipientDocOnPrimary = + originalRecipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS) + .findOne({tenantId}); + recipientDocOnNewNode = + initialSyncNode.getCollection(TenantMigrationTest.kConfigRecipientsNS) + .findOne({tenantId}); + + return recipientDocOnPrimary.state == recipientDocOnNewNode.state; + }, + `States never matched, primary: ${recipientDocOnPrimary}, on new node: ${ + recipientDocOnNewNode}`); + + if (checkMtab) { + jsTestLog("Ensuring TenantMigrationAccessBlocker states match."); + const primaryMtab = tenantMigrationTest.getTenantMigrationAccessBlocker( + {recipientNode: originalRecipientPrimary, tenantId}); + const newNodeMtab = tenantMigrationTest.getTenantMigrationAccessBlocker( + {recipientNode: initialSyncNode, tenantId}); + + assert.eq(primaryMtab.recipient.state, + newNodeMtab.recipient.state, + `Mtab didn't match, primary: ${primaryMtab}, on new node: ${newNodeMtab}`); + } + + return initialSyncNode; +} + +// Restarts a node without tenant oplog application. Ensures its state matches up with the +// primary's, and then steps it up. +function restartNodeAndCheckStateWithoutOplogApplication( + tenantId, tenantMigrationTest, checkMtab, fpOnRecipient) { + fpOnRecipient.wait(); + + const initialSyncNode = restartNodeAndCheckState(tenantId, tenantMigrationTest, checkMtab); + + jsTestLog("Stepping up the new node."); + // Now step up the new node + tenantMigrationTest.getRecipientRst().stepUp(initialSyncNode); + fpOnRecipient.off(); +} + +// Pauses the recipient before the tenant oplog application phase, and inserts documents on the +// donor that the recipient tenant oplog applier must apply. Then restarts node, allows initial +// sync, and steps the restarted node up. +function restartNodeAndCheckStateDuringOplogApplication( + tenantId, tenantMigrationTest, checkMtab, fpOnRecipient) { + fpOnRecipient.wait(); + + // Pause the tenant oplog applier before applying a batch. + const originalRecipientPrimary = tenantMigrationTest.getRecipientPrimary(); + const fpPauseOplogApplierOnBatch = + configureFailPoint(originalRecipientPrimary, "fpBeforeTenantOplogApplyingBatch"); + + // Insert documents into the donor after data cloning but before tenant oplog application, so + // that the recipient has entries to apply during tenant oplog application. + tenantMigrationTest.insertDonorDB( + tenantMigrationTest.tenantDB(tenantId, testDBName), + testCollName, + [...Array(30).keys()].map((i) => ({a: i, b: "George Harrison - All Things Must Pass"}))); + + // Wait until the oplog applier has started and is trying to apply a batch. Then restart a node. + fpPauseOplogApplierOnBatch.wait(); + const initialSyncNode = restartNodeAndCheckState(tenantId, tenantMigrationTest, checkMtab); + + jsTestLog("Stepping up the new node."); + // Now step up the new node + tenantMigrationTest.getRecipientRst().stepUp(initialSyncNode); + fpPauseOplogApplierOnBatch.off(); + fpOnRecipient.off(); +} + +// This function does the following: +// 1. Configures a failpoint on the recipient primary, depending on the 'recipientFailpoint' that is +// passed into the function. +// 2. Starts a tenant migration. +// 3. Waits for the recipient failpoint to be hit. Restarts a node, to make it go through initial +// sync. +// 4. Makes sure the restarted node's state is as expected. +// 5. Steps up the restarted node as the recipient primary, lifts the recipient failpoint, and +// allows the migration to complete. +function runTestCase(recipientFailpoint, checkMtab, restartNodeAndCheckStateFunction) { + const tenantId = ObjectId().str; + const donorRst = new ReplSetTest({ + name: "donorRst", + nodes: 1, + serverless: true, + nodeOptions: Object.assign(migrationX509Options.donor, { + setParameter: { + // Allow non-timestamped reads on donor after migration completes for testing. + 'failpoint.tenantMigrationDonorAllowsNonTimestampedReads': + tojson({mode: 'alwaysOn'}), + } + }) + }); + donorRst.startSet(); + donorRst.initiate(); + + const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + donorRst, + sharedOptions: {setParameter: {tenantApplierBatchSizeOps: 2}} + }); + + const migrationOpts = {migrationIdString: extractUUIDFromObject(UUID()), tenantId}; + const dbName = tenantMigrationTest.tenantDB(tenantId, testDBName); + const originalRecipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + const fpOnRecipient = + configureFailPoint(originalRecipientPrimary, recipientFailpoint, {action: "hang"}); + tenantMigrationTest.insertDonorDB(dbName, testCollName); + + jsTestLog(`Starting a tenant migration with migrationID ${ + migrationOpts.migrationIdString}, and tenantId ${tenantId}`); + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + + restartNodeAndCheckStateFunction(tenantId, tenantMigrationTest, checkMtab, fpOnRecipient); + + // Allow the migration to run to completion. + jsTestLog("Allowing migration to run to completion."); + TenantMigrationTest.assertCommitted( + tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + + assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + + tenantMigrationTest.stop(); + donorRst.stopSet(); +} + +// These two test cases are for before the mtab is created, and before the oplog applier has been +// started. +runTestCase("fpAfterStartingOplogFetcherMigrationRecipientInstance", + false /* checkMtab */, + restartNodeAndCheckStateWithoutOplogApplication); +runTestCase("tenantCollectionClonerHangAfterCreateCollection", + false /* checkMtab */, + restartNodeAndCheckStateWithoutOplogApplication); + +// Test case to initial sync a node while the recipient is in the oplog application phase. +runTestCase("fpBeforeFulfillingDataConsistentPromise", + true /* checkMtab */, + restartNodeAndCheckStateDuringOplogApplication); + +// A case after data consistency so that the mtab exists. We do not care about the oplog applier in +// this case. +runTestCase("fpAfterWaitForRejectReadsBeforeTimestamp", + true /* checkMtab */, + restartNodeAndCheckStateWithoutOplogApplication); diff --git a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js new file mode 100644 index 00000000000..f00a4c4f083 --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js @@ -0,0 +1,211 @@ +/** + * Tests that tenant migrations resume successfully on recipient stepup and restart. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_windows_tls, + * incompatible_with_shard_merge, + * # Some tenant migration statistics field names were changed in 6.1. + * requires_fcv_61, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import { + forgetMigrationAsync, + makeX509OptionsForTest, + runMigrationAsync, +} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load('jstests/replsets/rslib.js'); // 'createRstArgs' + +const kMaxSleepTimeMS = 100; +const kTenantId = ObjectId().str; + +// Set the delay before a state doc is garbage collected to be short to speed up the test but long +// enough for the state doc to still be around after stepup or restart. +const kGarbageCollectionDelayMS = 30 * 1000; + +// Set the TTL monitor to run at a smaller interval to speed up the test. +const kTTLMonitorSleepSecs = 1; + +const migrationX509Options = makeX509OptionsForTest(); + +/** + * Runs the donorStartMigration command to start a migration, and interrupts the migration on the + * recipient using the 'interruptFunc' after the migration starts on the recipient side, and + * asserts that migration eventually commits. + * @param {recipientRestarted} bool is needed to properly assert the tenant migrations stat count. + */ +function testRecipientSyncDataInterrupt(interruptFunc, recipientRestarted) { + const recipientRst = new ReplSetTest({ + nodes: 3, + name: "recipientRst", + serverless: true, + nodeOptions: migrationX509Options.recipient + }); + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), recipientRst}); + + const donorRst = tenantMigrationTest.getDonorRst(); + const donorPrimary = tenantMigrationTest.getDonorPrimary(); + let recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + }; + const donorRstArgs = createRstArgs(donorRst); + + const runMigrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs); + runMigrationThread.start(); + + // Wait for recipientSyncData command to start. + assert.soon( + () => recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}) + .inprog.length > 0); + + sleep(Math.random() * kMaxSleepTimeMS); + interruptFunc(recipientRst); + + TenantMigrationTest.assertCommitted(runMigrationThread.returnData()); + tenantMigrationTest.waitForDonorNodesToReachState(donorRst.nodes, + migrationId, + migrationOpts.tenantId, + TenantMigrationTest.DonorState.kCommitted); + assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + + const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary); + assert.eq(1, donorStats.totalMigrationDonationsCommitted); + + tenantMigrationTest.stop(); + recipientRst.stopSet(); +} + +/** + * Starts a migration and waits for it to commit, then runs the donorForgetMigration, and interrupts + * the recipient using the 'interruptFunc', and asserts that the migration state is eventually + * garbage collected. + */ +function testRecipientForgetMigrationInterrupt(interruptFunc) { + const donorRst = new ReplSetTest({ + nodes: 1, + name: "donorRst", + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.donor, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + }) + }); + const recipientRst = new ReplSetTest({ + nodes: 3, + name: "recipientRst", + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.recipient, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + }) + }); + + donorRst.startSet(); + donorRst.initiate(); + + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst}); + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + recipientConnString: recipientRst.getURL(), + }; + const donorRstArgs = createRstArgs(donorRst); + + TenantMigrationTest.assertCommitted( + tenantMigrationTest.runMigration(migrationOpts, {automaticForgetMigration: false})); + const forgetMigrationThread = new Thread(forgetMigrationAsync, + migrationOpts.migrationIdString, + donorRstArgs, + false /* retryOnRetryableErrors */); + forgetMigrationThread.start(); + + // Wait for recipientForgetMigration command to start. + assert.soon(() => { + const res = assert.commandWorked( + recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"})); + return res.inprog[0].expireAt != null; + }); + sleep(Math.random() * kMaxSleepTimeMS); + interruptFunc(recipientRst); + + assert.commandWorkedOrFailedWithCode( + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString), + ErrorCodes.NoSuchTenantMigration); + + assert.commandWorked(forgetMigrationThread.returnData()); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, migrationOpts.tenantId); + + tenantMigrationTest.stop(); + donorRst.stopSet(); + recipientRst.stopSet(); +} + +(() => { + jsTest.log("Test that the migration resumes on stepup"); + testRecipientSyncDataInterrupt((recipientRst) => { + // Force the primary to step down but make it likely to step back up. + const recipientPrimary = recipientRst.getPrimary(); + assert.commandWorked(recipientPrimary.adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); + }, false); +})(); + +(() => { + jsTest.log("Test that the migration resumes after restart"); + testRecipientSyncDataInterrupt((recipientRst) => { + recipientRst.stopSet(null /* signal */, true /*forRestart */); + recipientRst.startSet({restart: true}); + recipientRst.awaitSecondaryNodes(); + recipientRst.getPrimary(); + }, true); +})(); + +(() => { + jsTest.log("Test that the recipientForgetMigration command can be retried on stepup"); + testRecipientForgetMigrationInterrupt((recipientRst) => { + // Force the primary to step down but make it likely to step back up. + const recipientPrimary = recipientRst.getPrimary(); + assert.commandWorked(recipientPrimary.adminCommand( + {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); + assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); + }); +})(); + +(() => { + jsTest.log("Test that the recipientForgetMigration command can be retried after restart"); + testRecipientForgetMigrationInterrupt((recipientRst) => { + recipientRst.stopSet(null /* signal */, true /*forRestart */); + recipientRst.startSet({restart: true}); + recipientRst.awaitSecondaryNodes(); + recipientRst.getPrimary(); + }); +})(); diff --git a/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js b/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js new file mode 100644 index 00000000000..03b5f7eb972 --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_retryable_writes_failover.js @@ -0,0 +1,110 @@ +/** + * Tests whether the recipient correctly clears its oplog buffer if the recipient primary + * fails over while fetching retryable writes oplog entries from the donor. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject(). +load("jstests/libs/fail_point_util.js"); // For configureFailPoint(). + +const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), sharedOptions: {nodes: 2}}); + +const kMigrationId = UUID(); +const kTenantId = ObjectId().str; +const kDbName = tenantMigrationTest.tenantDB(kTenantId, "testDb"); +const kCollName = "testColl"; +const migrationOpts = { + migrationIdString: extractUUIDFromObject(kMigrationId), + tenantId: kTenantId, +}; + +const donorRst = tenantMigrationTest.getDonorRst(); +const donorPrimary = tenantMigrationTest.getDonorPrimary(); +const rsConn = new Mongo(donorRst.getURL()); +const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + +const session = rsConn.startSession({retryWrites: true}); +const sessionColl = session.getDatabase(kDbName)[kCollName]; + +const session2 = rsConn.startSession({retryWrites: true}); +const sessionColl2 = session2.getDatabase(kDbName)[kCollName]; + +jsTestLog("Run retryable writes prior to the migration."); +assert.commandWorked(sessionColl.insert({_id: "retryableWrite1"})); +assert.commandWorked(sessionColl2.insert({_id: "retryableWrite2"})); + +jsTestLog("Setting up failpoints."); +// Use `pauseAfterRetrievingRetryableWritesBatch` to hang after inserting the first batch of results +// from the aggregation request into the oplog buffer. +const fpPauseAfterRetrievingRetryableWritesBatch = + configureFailPoint(recipientPrimary, "pauseAfterRetrievingRetryableWritesBatch"); + +// Set aggregation request batch size to 1 so that we can failover in between batches. +const fpSetSmallAggregationBatchSize = + configureFailPoint(recipientPrimary, "fpSetSmallAggregationBatchSize"); + +jsTestLog("Starting tenant migration with migrationId: " + kMigrationId + + ", tenantId: " + kTenantId); +assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + +jsTestLog("Waiting until the recipient primary fetches a batch of retryable writes oplog entries."); +fpSetSmallAggregationBatchSize.wait(); +fpPauseAfterRetrievingRetryableWritesBatch.wait(); + +// Check that the oplog buffer is correctly populated. +const kOplogBufferNS = "repl.migration.oplog_" + migrationOpts.migrationIdString; +let recipientOplogBuffer = recipientPrimary.getDB("config")[kOplogBufferNS]; +// We expect to have only retryableWrite1 since the cursor batch size is 1 and we paused after +// inserting the first branch of results from the aggregation request. +let cursor = recipientOplogBuffer.find(); +assert.eq(cursor.itcount(), 1, "Incorrect number of oplog entries in buffer: " + cursor.toArray()); + +// Check that we haven't completed the retryable writes fetching stage yet. +let recipientConfigColl = recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS); +let recipientDoc = recipientConfigColl.find({"_id": kMigrationId}).toArray(); +assert.eq(recipientDoc.length, 1); +assert.eq(recipientDoc[0].completedFetchingRetryableWritesBeforeStartOpTime, false); + +jsTestLog("Stepping a new primary up."); +const recipientRst = tenantMigrationTest.getRecipientRst(); +const recipientSecondary = recipientRst.getSecondary(); +// Use `fpAfterFetchingRetryableWritesEntriesBeforeStartOpTime` to hang after populating the oplog +// buffer with retryable writes entries. Set this before stepping up instead of after so that the +// new primary will not be able to pass this stage without the failpoint being set. +const fpAfterFetchingRetryableWritesEntries = configureFailPoint( + recipientSecondary, "fpAfterFetchingRetryableWritesEntriesBeforeStartOpTime", {action: "hang"}); + +recipientRst.stepUp(recipientSecondary); + +fpPauseAfterRetrievingRetryableWritesBatch.off(); +const newRecipientPrimary = recipientRst.getPrimary(); + +fpAfterFetchingRetryableWritesEntries.wait(); +// The new primary should have cleared its oplog buffer and refetched both retryableWrite1 and +// retryableWrite2. Otherwise, we will invariant when trying to add those entries. +recipientOplogBuffer = newRecipientPrimary.getDB("config")[kOplogBufferNS]; +cursor = recipientOplogBuffer.find(); +assert.eq(cursor.itcount(), 2, "Incorrect number of oplog entries in buffer: " + cursor.toArray()); + +recipientConfigColl = newRecipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS); +recipientDoc = recipientConfigColl.find({"_id": kMigrationId}).toArray(); +assert.eq(recipientDoc.length, 1); +assert.eq(recipientDoc[0].completedFetchingRetryableWritesBeforeStartOpTime, true); + +fpAfterFetchingRetryableWritesEntries.off(); +fpSetSmallAggregationBatchSize.off(); + +jsTestLog("Waiting for migration to complete."); +TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + +tenantMigrationTest.stop(); diff --git a/jstests/replsets/tenant_migration_recipient_rollback_recovery.js b/jstests/replsets/tenant_migration_recipient_rollback_recovery.js new file mode 100644 index 00000000000..ffb2cc4919e --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_rollback_recovery.js @@ -0,0 +1,335 @@ +/** + * Tests that tenant migrations that go through recipient rollback are recovered correctly. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import { + forgetMigrationAsync, + makeX509OptionsForTest, + runMigrationAsync, +} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/libs/parallelTester.js"); +load("jstests/replsets/libs/rollback_test.js"); +load("jstests/replsets/rslib.js"); // 'createRstArgs' + +const kTenantId = ObjectId().str; + +const kMaxSleepTimeMS = 250; + +// Set the delay before a state doc is garbage collected to be short to speed up the test but long +// enough for the state doc to still be around after the recipient is back in the replication steady +// state. +const kGarbageCollectionDelayMS = 30 * 1000; + +const migrationX509Options = makeX509OptionsForTest(); + +function makeMigrationOpts(tenantMigrationTest, migrationId, tenantId) { + return { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: tenantId, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + readPreference: {mode: "primary"}, + }; +} + +/** + * Starts a recipient ReplSetTest and creates a TenantMigrationTest for it. Runs 'setUpFunc' after + * initiating the recipient. Then, runs 'rollbackOpsFunc' while replication is disabled on the + * secondaries, shuts down the primary and restarts it after re-election to force the operations in + * 'rollbackOpsFunc' to be rolled back. Finally, runs 'steadyStateFunc' after it is back in the + * replication steady state. + */ +function testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc) { + const donorRst = new ReplSetTest({ + name: "donorRst", + nodes: 1, + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.donor, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: 1, + } + }) + }); + donorRst.startSet(); + donorRst.initiate(); + + const donorRstArgs = createRstArgs(donorRst); + + const recipientRst = new ReplSetTest({ + name: "recipientRst", + nodes: 3, + serverless: true, + nodeOptions: Object.assign({}, migrationX509Options.recipient, { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: 1, + } + }) + }); + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst}); + setUpFunc(tenantMigrationTest, donorRstArgs); + + let originalRecipientPrimary = recipientRst.getPrimary(); + const originalRecipientSecondaries = recipientRst.getSecondaries(); + // The default WC is majority and stopServerReplication will prevent satisfying any majority + // writes. + assert.commandWorked(originalRecipientPrimary.adminCommand( + {setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}})); + recipientRst.awaitLastOpCommitted(); + + // Disable replication on the secondaries so that writes during this step will be rolled back. + stopServerReplication(originalRecipientSecondaries); + rollbackOpsFunc(tenantMigrationTest, donorRstArgs); + + // Shut down the primary and re-enable replication to allow one of the secondaries to get + // elected, and make the writes above get rolled back on the original primary when it comes + // back up. + recipientRst.stop(originalRecipientPrimary); + restartServerReplication(originalRecipientSecondaries); + const newRecipientPrimary = recipientRst.getPrimary(); + assert.neq(originalRecipientPrimary, newRecipientPrimary); + + // Restart the original primary. + originalRecipientPrimary = + recipientRst.start(originalRecipientPrimary, {waitForConnect: true}, true /* restart */); + originalRecipientPrimary.setSecondaryOk(); + recipientRst.awaitReplication(); + + steadyStateFunc(tenantMigrationTest); + + donorRst.stopSet(); + recipientRst.stopSet(); +} + +/** + * Starts a migration and waits for the recipient's primary to insert the recipient's state doc. + * Forces the write to be rolled back. After the replication steady state is reached, asserts that + * recipientSyncData can restart the migration on the new primary. + */ +function testRollbackInitialState() { + const migrationId = UUID(); + let migrationOpts; + let migrationThread; + + let setUpFunc = (tenantMigrationTest, donorRstArgs) => {}; + + let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => { + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + // Start the migration asynchronously and wait for the primary to insert the state doc. + migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str); + migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs); + migrationThread.start(); + assert.soon(() => { + return 1 === + recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS).count({ + _id: migrationId + }); + }); + }; + + let steadyStateFunc = (tenantMigrationTest) => { + // Verify that the migration restarted successfully on the new primary despite rollback. + TenantMigrationTest.assertCommitted(migrationThread.returnData()); + tenantMigrationTest.assertRecipientNodesInExpectedState({ + nodes: tenantMigrationTest.getRecipientRst().nodes, + migrationId: migrationId, + tenantId: migrationOpts.tenantId, + expectedState: TenantMigrationTest.RecipientState.kConsistent, + expectedAccessState: TenantMigrationTest.RecipientAccessState.kRejectBefore + }); + assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + }; + + testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); +} + +/** + * Starts a migration after enabling 'pauseFailPoint' (must pause the migration) and + * 'setUpFailPoints' on the recipient's primary. Waits for the primary to do the write to transition + * to 'nextState' after reaching 'pauseFailPoint' (i.e. the state doc matches 'query'), then forces + * the write to be rolled back. After the replication steady state is reached, asserts that the + * migration is resumed successfully by new primary regardless of what the rolled back state + * transition is. + */ +function testRollBackStateTransition(pauseFailPoint, setUpFailPoints, nextState, query) { + jsTest.log(`Test roll back the write to transition to state "${ + nextState}" after reaching failpoint "${pauseFailPoint}"`); + + const migrationId = UUID(); + let migrationOpts; + let migrationThread, pauseFp; + + let setUpFunc = (tenantMigrationTest, donorRstArgs) => { + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + setUpFailPoints.forEach(failPoint => configureFailPoint(recipientPrimary, failPoint)); + pauseFp = configureFailPoint(recipientPrimary, pauseFailPoint, {action: "hang"}); + + migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str); + migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs); + migrationThread.start(); + pauseFp.wait(); + }; + + let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => { + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + // Resume the migration and wait for the primary to do the write for the state transition. + pauseFp.off(); + assert.soon(() => { + return 1 === + recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS) + .count(Object.assign({_id: migrationId}, query)); + }); + }; + + let steadyStateFunc = (tenantMigrationTest) => { + // Verify that the migration resumed successfully on the new primary despite the rollback. + TenantMigrationTest.assertCommitted(migrationThread.returnData()); + tenantMigrationTest.waitForRecipientNodesToReachState( + tenantMigrationTest.getRecipientRst().nodes, + migrationId, + migrationOpts.tenantId, + TenantMigrationTest.RecipientState.kConsistent, + TenantMigrationTest.RecipientAccessState.kRejectBefore); + assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + }; + + testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); +} + +/** + * Runs donorForgetMigration after completing a migration. Waits for the recipient's primary to + * mark the recipient's state doc as garbage collectable, then forces the write to be rolled back. + * After the replication steady state is reached, asserts that recipientForgetMigration can be + * retried on the new primary and that the state doc is eventually garbage collected. + */ +function testRollBackMarkingStateGarbageCollectable() { + const migrationId = UUID(); + let migrationOpts; + let forgetMigrationThread; + + let setUpFunc = (tenantMigrationTest, donorRstArgs) => { + migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str); + TenantMigrationTest.assertCommitted( + tenantMigrationTest.runMigration(migrationOpts, {automaticForgetMigration: false})); + }; + + let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => { + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + // Run donorForgetMigration and wait for the primary to do the write to mark the state doc + // as garbage collectable. + forgetMigrationThread = new Thread(forgetMigrationAsync, + migrationOpts.migrationIdString, + donorRstArgs, + false /* retryOnRetryableErrors */); + forgetMigrationThread.start(); + assert.soon(() => { + return 1 === + recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS) + .count({_id: migrationId, expireAt: {$exists: 1}}); + }); + }; + + let steadyStateFunc = (tenantMigrationTest) => { + // Verify that the migration state got garbage collected successfully despite the rollback. + assert.commandWorked(forgetMigrationThread.returnData()); + tenantMigrationTest.waitForMigrationGarbageCollection( + migrationId, + migrationOpts.tenantId, + tenantMigrationTest.getDonorRst().nodes, + tenantMigrationTest.getRecipientRst().nodes); + }; + + testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); +} + +/** + * Starts a migration and forces the recipient's primary to go through rollback after a random + * amount of time. After the replication steady state is reached, asserts that the migration is + * resumed successfully. + */ +function testRollBackRandom() { + const migrationId = UUID(); + let migrationOpts; + let migrationThread; + + let setUpFunc = (tenantMigrationTest, donorRstArgs) => { + migrationOpts = makeMigrationOpts(tenantMigrationTest, migrationId, ObjectId().str); + migrationThread = new Thread(async (donorRstArgs, migrationOpts) => { + const {runMigrationAsync, forgetMigrationAsync} = + await import("jstests/replsets/libs/tenant_migration_util.js"); + assert.commandWorked(await runMigrationAsync(migrationOpts, donorRstArgs)); + assert.commandWorked(await forgetMigrationAsync( + migrationOpts.migrationIdString, donorRstArgs, false /* retryOnRetryableErrors */)); + }, donorRstArgs, migrationOpts); + + // Start the migration and wait for a random amount of time before transitioning to the + // rollback operations state. + migrationThread.start(); + sleep(Math.random() * kMaxSleepTimeMS); + }; + + let rollbackOpsFunc = (tenantMigrationTest, donorRstArgs) => { + // Let the migration run in the rollback operations state for a random amount of time. + sleep(Math.random() * kMaxSleepTimeMS); + }; + + let steadyStateFunc = (tenantMigrationTest) => { + // Verify that the migration completed and was garbage collected successfully despite the + // rollback. + migrationThread.join(); + tenantMigrationTest.waitForRecipientNodesToReachState( + tenantMigrationTest.getRecipientRst().nodes, + migrationId, + migrationOpts.tenantId, + TenantMigrationTest.RecipientState.kDone, + TenantMigrationTest.RecipientAccessState.kRejectBefore); + tenantMigrationTest.waitForMigrationGarbageCollection( + migrationId, + migrationOpts.tenantId, + tenantMigrationTest.getDonorRst().nodes, + tenantMigrationTest.getRecipientRst().nodes); + }; + + testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); +} + +jsTest.log("Test roll back recipient's state doc insert"); +testRollbackInitialState(); + +jsTest.log("Test roll back recipient's state doc update"); +[{ + pauseFailPoint: "fpBeforeMarkingCloneSuccess", + nextState: "reject", + query: {dataConsistentStopDonorOpTime: {$exists: 1}} +}, + { + pauseFailPoint: "fpBeforePersistingRejectReadsBeforeTimestamp", + nextState: "rejectBefore", + query: {rejectReadsBeforeTimestamp: {$exists: 1}} + }].forEach(({pauseFailPoint, setUpFailPoints = [], nextState, query}) => { + testRollBackStateTransition(pauseFailPoint, setUpFailPoints, nextState, query); +}); + +jsTest.log("Test roll back marking the donor's state doc as garbage collectable"); +testRollBackMarkingStateGarbageCollectable(); + +jsTest.log("Test roll back random"); +testRollBackRandom(); diff --git a/jstests/replsets/tenant_migration_recipient_sync_donor_timestamp.js b/jstests/replsets/tenant_migration_recipient_sync_donor_timestamp.js new file mode 100644 index 00000000000..81422e28454 --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_sync_donor_timestamp.js @@ -0,0 +1,94 @@ +/** + * Exercises the code path for the recipientSyncData command that waits until a timestamp provided + * by the donor is majority committed: make sure that in this code path, when the recipient is + * interrupted by a primary step down, the recipient properly swaps the error code to the true code + * (like primary step down) that the donor can retry on. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_persistence, + * requires_replication, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject() + +// Make the batch size small so that we can pause before all the batches are applied. +const tenantMigrationTest = new TenantMigrationTest( + {name: jsTestName(), sharedOptions: {setParameter: {tenantApplierBatchSizeOps: 2}}}); + +const kMigrationId = UUID(); +const kTenantId = ObjectId().str; +const kReadPreference = { + mode: "primary" +}; +const migrationOpts = { + migrationIdString: extractUUIDFromObject(kMigrationId), + tenantId: kTenantId, + readPreference: kReadPreference +}; + +const dbName = tenantMigrationTest.tenantDB(kTenantId, "testDB"); +const collName = jsTestName() + "_collection"; + +const recipientRst = tenantMigrationTest.getRecipientRst(); +const recipientPrimary = recipientRst.getPrimary(); + +// FailPoint to pause right before the data consistent promise is fulfilled. +const fpBeforeDataConsistent = configureFailPoint( + recipientPrimary, "fpBeforeFulfillingDataConsistentPromise", {action: "hang"}); +const fpBeforeApplierFutureCalled = + configureFailPoint(recipientPrimary, "fpWaitUntilTimestampMajorityCommitted"); + +tenantMigrationTest.insertDonorDB(dbName, collName); + +jsTestLog("Starting migration."); +// Start the migration, and allow it to progress to the point where the _dataConsistentPromise has +// been fulfilled. +tenantMigrationTest.startMigration(migrationOpts); + +jsTestLog("Waiting for data consistent promise."); +// Pause right before the _dataConsistentPromise is fulfilled. Therefore, the applier has +// finished applying entries at least until dataConsistentStopDonorOpTime. +fpBeforeDataConsistent.wait(); + +jsTestLog("Pausing the tenant oplog applier."); +// Pause the applier now. All the entries that the applier cannot process now are past the +// dataConsistentStopDonorOpTime. +const fpPauseOplogApplier = + configureFailPoint(recipientPrimary, "fpBeforeTenantOplogApplyingBatch"); + +jsTestLog("Writing to donor db."); +// Send writes to the donor. The applier will not be able to process these as it is paused. +const docsToApply = [...Array(10).keys()].map((i) => ({a: i})); +tenantMigrationTest.insertDonorDB(dbName, collName, docsToApply); + +jsTestLog("Waiting to hit failpoint in tenant oplog applier."); +fpPauseOplogApplier.wait(); + +jsTestLog("Allowing recipient to respond."); +// Allow the recipient to respond to the donor for the recipientSyncData command that waits on the +// fulfillment of the _dataConsistentPromise. The donor will then send another recipientSyncData +// command that waits on the provided donor timestamp to be majority committed. +fpBeforeDataConsistent.off(); + +jsTestLog("Reach the point where we are waiting for the tenant oplog applier to catch up."); +fpBeforeApplierFutureCalled.wait(); +fpBeforeApplierFutureCalled.off(); + +jsTestLog("Stepping another node up."); +// Make a new recipient primary step up. This will ask the applier to shutdown. +recipientRst.stepUp(recipientRst.getSecondaries()[0]); + +jsTestLog("Release the tenant oplog applier failpoint."); +fpPauseOplogApplier.off(); + +jsTestLog("Waiting for migration to complete."); +TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + +tenantMigrationTest.stop(); diff --git a/jstests/replsets/tenant_migration_recipient_sync_source_reconnect_delayed_secondary.js b/jstests/replsets/tenant_migration_recipient_sync_source_reconnect_delayed_secondary.js new file mode 100644 index 00000000000..c95fb70d474 --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_sync_source_reconnect_delayed_secondary.js @@ -0,0 +1,66 @@ +/** + * Tests that a migration will continuously retry sync source selection when there are no available + * donor hosts. Also checks that a donor host is considered an uneligible sync source when it has a + * majority OpTime earlier than the recipient's stored 'startApplyingDonorOpTime'. + * + * Tests that if the stale donor host advances its majority OpTime to 'startApplyingDonorOpTime' + * or later, the recipient will successfully choose that donor as sync source and resume the + * migration. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * # The currentOp output field 'dataSyncCompleted' was renamed to 'migrationCompleted'. + * requires_fcv_70, + * serverless, + * ] + */ + +import { + setUpMigrationSyncSourceTest +} from "jstests/replsets/libs/tenant_migration_recipient_sync_source.js"; +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; + +// After this setUp() call, we should have a migration with 'secondary' read preference. The +// recipient should be continuously retrying sync source selection, unable to choose +// 'delayedSecondary' because it is too stale and 'donorSecondary' because it is down. +const { + tenantMigrationTest, + migrationOpts, + donorSecondary, + delayedSecondary, + hangAfterCreatingConnections +} = setUpMigrationSyncSourceTest(); + +if (!tenantMigrationTest) { + // Feature flag was not enabled. + quit(); +} + +jsTestLog("Restarting replication on 'delayedSecondary'"); +restartServerReplication(delayedSecondary); + +// The recipient should eventually be able to connect to the lagged secondary, after the secondary +// has caught up and the exclude timeout has expired. +hangAfterCreatingConnections.wait(); + +const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); +const res = recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); +const currOp = res.inprog[0]; +assert.eq(delayedSecondary.host, + currOp.donorSyncSource, + `the recipient should only be able to choose 'delayedSecondary' as sync source`); + +hangAfterCreatingConnections.off(); + +TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); +assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + +// Remove 'donorSecondary' so that the test can complete properly. +const donorRst = tenantMigrationTest.getDonorRst(); +donorRst.remove(donorSecondary); +donorRst.stopSet(); +tenantMigrationTest.stop(); diff --git a/jstests/replsets/tenant_migration_recipient_sync_source_restart_donor_secondary.js b/jstests/replsets/tenant_migration_recipient_sync_source_restart_donor_secondary.js new file mode 100644 index 00000000000..10ded49a34b --- /dev/null +++ b/jstests/replsets/tenant_migration_recipient_sync_source_restart_donor_secondary.js @@ -0,0 +1,67 @@ +/** + * Tests that a migration will continuously retry sync source selection when there are no available + * donor hosts. Also checks that a donor host is considered an uneligible sync source when it has a + * majority OpTime earlier than the recipient's stored 'startApplyingDonorOpTime'. + * + * Tests that if a donor host becomes available, the recipient will successfully choose it as a + * sync source and resume the migration. + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * # The currentOp output field 'dataSyncCompleted' was renamed to 'migrationCompleted'. + * requires_fcv_70, + * serverless, + * ] + */ + +import { + setUpMigrationSyncSourceTest +} from "jstests/replsets/libs/tenant_migration_recipient_sync_source.js"; +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; + +// After this setUp() call, we should have a migration with 'secondary' read preference. The +// recipient should be continuously retrying sync source selection, unable to choose +// 'delayedSecondary' because it is too stale and 'donorSecondary' because it is down. +const { + tenantMigrationTest, + migrationOpts, + donorSecondary, + delayedSecondary, + hangAfterCreatingConnections +} = setUpMigrationSyncSourceTest(); + +if (!tenantMigrationTest) { + // Feature flag was not enabled. + quit(); +} + +const donorRst = tenantMigrationTest.getDonorRst(); + +jsTestLog("Restarting 'donorSecondary'"); +donorRst.start(donorSecondary, null /* options */, true /* restart */); + +// The recipient should eventually be able to connect to the donor secondary, after the node reaches +// 'secondary' state. +hangAfterCreatingConnections.wait(); + +const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); +const res = recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); +const currOp = res.inprog[0]; +// 'donorSecondary' should always be the chosen sync source, since read preference is 'secondary' +// and 'delayedSecondary' cannot be chosen because it is too stale. +assert.eq(donorSecondary.host, + currOp.donorSyncSource, + `the recipient should only be able to choose 'donorSecondary' as sync source`); + +hangAfterCreatingConnections.off(); +restartServerReplication(delayedSecondary); + +TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); +assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + +donorRst.stopSet(); +tenantMigrationTest.stop(); diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js new file mode 100644 index 00000000000..ea9654d97ac --- /dev/null +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover.js @@ -0,0 +1,132 @@ +/** + * Tests that in tenant migration, the recipient set can resume collection cloning from the last + * document cloned after a failover. + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import { + checkTenantDBHashes, + makeX509OptionsForTest, +} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject' + +const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn, docs) { + const batchSize = 2; + const recipientRst = new ReplSetTest({ + nodes: 2, + name: jsTestName() + "_recipient", + serverless: true, + nodeOptions: Object.assign(makeX509OptionsForTest().recipient, { + setParameter: { + // Use a batch size of 2 so that collection cloner requires more than a single + // batch to complete. + collectionClonerBatchSize: batchSize, + // Allow reads on recipient before migration completes for testing. + 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}), + } + }) + }); + + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst}); + const donorPrimary = tenantMigrationTest.getDonorPrimary(); + + const tenantId = ObjectId().str; + const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); + const donorDB = donorPrimary.getDB(dbName); + const collName = "testColl"; + + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + // Create collection and insert documents. + assert.commandWorked(createCollFn(donorDB, collName)); + tenantMigrationTest.insertDonorDB(dbName, collName, docs); + + const migrationId = UUID(); + const migrationIdString = extractUUIDFromObject(migrationId); + const migrationOpts = { + migrationIdString: migrationIdString, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + tenantId, + }; + + // Configure a fail point to have the recipient primary hang after cloning 2 documents. + const recipientDb = recipientPrimary.getDB(dbName); + let recipientColl = isTimeSeries ? recipientDb.getCollection("system.buckets." + collName) + : recipientDb.getCollection(collName); + + const hangDuringCollectionClone = + configureFailPoint(recipientDb, + "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse", + {nss: recipientColl.getFullName()}); + + // Start a migration and wait for recipient to hang after cloning 2 documents. + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + hangDuringCollectionClone.wait(); + assert.soon(() => recipientColl.find().itcount() === batchSize); + + // Insert some documents that will be fetched by the recipient. This is to test that on + // failover, the fetcher will resume fetching from where it left off. The system is expected + // to crash if the recipient fetches a duplicate oplog entry upon resuming the migration. + tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]); + assert.soon(() => { + const configDb = recipientPrimary.getDB("config"); + const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString); + return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1; + }); + + // Step up a new node in the recipient set and trigger a failover. The new primary should resume + // cloning starting from the third document. + const newRecipientPrimary = recipientRst.getSecondaries()[0]; + recipientRst.stepUp(newRecipientPrimary); + hangDuringCollectionClone.off(); + recipientRst.getPrimary(); + + // The migration should go through after recipient failover. + TenantMigrationTest.assertCommitted( + tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + + // Check that recipient has cloned all documents in the collection. + recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collName); + assert.eq(docs.length, recipientColl.find().itcount()); + assert.docEq(docs, recipientColl.find().sort({_id: 1}).toArray()); + checkTenantDBHashes({ + donorRst: tenantMigrationTest.getDonorRst(), + recipientRst: tenantMigrationTest.getRecipientRst(), + tenantId + }); + + tenantMigrationTest.stop(); + recipientRst.stopSet(); +}; + +jsTestLog("Running tenant migration test for time-series collection"); +tenantMigrationFailoverTest(true, + (db, collName) => db.createCollection( + collName, {timeseries: {timeField: "time", metaField: "bucket"}}), + [ + // Group each document in its own bucket in order to work with the + // collectionClonerBatchSize we set at the recipient replSet. + {_id: 1, time: ISODate(), bucket: "a"}, + {_id: 2, time: ISODate(), bucket: "b"}, + {_id: 3, time: ISODate(), bucket: "c"}, + {_id: 4, time: ISODate(), bucket: "d"} + ]); + +jsTestLog("Running tenant migration test for regular collection"); +tenantMigrationFailoverTest(false, + (db, collName) => db.createCollection(collName), + [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]); diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js new file mode 100644 index 00000000000..262e40edf00 --- /dev/null +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_recipient_failover_with_dropped_views.js @@ -0,0 +1,133 @@ +/** + * Tests that in tenant migration, the collection recreated on a dropped view namespace is handled + * correctly on resuming the logical tenant collection cloning phase due to recipient failover. + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import {makeX509OptionsForTest} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject' + +const tenantMigrationFailoverTest = function(isTimeSeries, createCollFn) { + const recipientRst = new ReplSetTest({ + nodes: 2, + name: jsTestName() + "_recipient", + serverless: true, + nodeOptions: Object.assign(makeX509OptionsForTest().recipient, { + setParameter: { + // Allow reads on recipient before migration completes for testing. + 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}), + } + }) + }); + + recipientRst.startSet(); + recipientRst.initiate(); + + const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst}); + + const donorRst = tenantMigrationTest.getDonorRst(); + const donorPrimary = donorRst.getPrimary(); + + const tenantId = ObjectId().str; + const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); + const donorDB = donorPrimary.getDB(dbName); + const collName = "testColl"; + const donorColl = donorDB[collName]; + + let getCollectionInfo = function(conn) { + return conn.getDB(dbName).getCollectionInfos().filter(coll => { + return coll.name === collName; + }); + }; + + // Create a timeseries collection or a regular view. + assert.commandWorked(createCollFn(donorDB, collName)); + donorRst.awaitReplication(); + + const migrationId = UUID(); + const migrationIdString = extractUUIDFromObject(migrationId); + const migrationOpts = { + migrationIdString: migrationIdString, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + tenantId, + }; + + const recipientPrimary = recipientRst.getPrimary(); + const recipientDb = recipientPrimary.getDB(dbName); + const recipientSystemViewsColl = recipientDb.getCollection("system.views"); + + // Configure a fail point to have the recipient primary hang after cloning + // "<OID>_testDB.system.views" collection. + const hangDuringCollectionClone = + configureFailPoint(recipientPrimary, + "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse", + {nss: recipientSystemViewsColl.getFullName()}); + + // Start the migration and wait for the migration to hang after cloning + // "<OID>_testDB.system.views" collection. + assert.commandWorked(tenantMigrationTest.startMigration(migrationOpts)); + hangDuringCollectionClone.wait(); + + assert.soon(() => recipientSystemViewsColl.find().itcount() >= 1); + recipientRst.awaitLastOpCommitted(); + const newRecipientPrimary = recipientRst.getSecondaries()[0]; + + // Verify that a view has been registered for "<OID>_testDB.testColl" on the new + // recipient primary. + let collectionInfo = getCollectionInfo(newRecipientPrimary); + assert.eq(1, collectionInfo.length); + assert(collectionInfo[0].type === (isTimeSeries ? "timeseries" : "view"), + "data store type mismatch: " + tojson(collectionInfo[0])); + + // Drop the view and create a regular collection with the same namespace as the + // dropped view on donor. + assert(donorColl.drop()); + assert.commandWorked(donorDB.createCollection(collName)); + + // We need to skip TenantDatabaseCloner::listExistingCollectionsStage() to make sure + // the recipient always clone the above newly created regular collection after the failover. + // Currently, we restart cloning after a failover, only from the collection whose UUID is + // greater than or equal to the last collection we have on disk. + const skiplistExistingCollectionsStage = + configureFailPoint(newRecipientPrimary, "skiplistExistingCollectionsStage"); + + // Step up a new node in the recipient set and trigger a failover. + recipientRst.stepUp(newRecipientPrimary); + hangDuringCollectionClone.off(); + + // The migration should go through after recipient failover. + TenantMigrationTest.assertCommitted( + tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); + + // Check that recipient has dropped the view and and re-created the regular collection as part + // of migration oplog catchup phase. + collectionInfo = getCollectionInfo(newRecipientPrimary); + assert.eq(1, collectionInfo.length); + assert(collectionInfo[0].type === "collection", + "data store type mismatch: " + tojson(collectionInfo[0])); + + tenantMigrationTest.stop(); + recipientRst.stopSet(); +}; + +jsTestLog("Running tenant migration test for time-series collection"); +// Creating a timeseries collection, implicity creates a view on the 'collName' collection +// namespace. +tenantMigrationFailoverTest(true, + (db, collName) => db.createCollection( + collName, {timeseries: {timeField: "time", metaField: "bucket"}})); + +jsTestLog("Running tenant migration test for regular view"); +tenantMigrationFailoverTest(false, + (db, collName) => db.createView(collName, "sourceCollection", [])); diff --git a/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js new file mode 100644 index 00000000000..0919240cf24 --- /dev/null +++ b/jstests/replsets/tenant_migration_resume_collection_cloner_after_rename.js @@ -0,0 +1,124 @@ +/** + * Tests that in tenant migration, the recipient set can resume collection cloning from the last + * document cloned after a failover even if the collection has been renamed on the donor. + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import { + checkTenantDBHashes, + makeX509OptionsForTest, + runMigrationAsync +} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject' +load("jstests/libs/parallelTester.js"); // for 'Thread' +load('jstests/replsets/rslib.js'); // 'createRstArgs' + +const recipientRst = new ReplSetTest({ + nodes: 2, + name: jsTestName() + "_recipient", + serverless: true, + nodeOptions: Object.assign(makeX509OptionsForTest().recipient, { + setParameter: { + // Use a batch size of 2 so that collection cloner requires more than a single batch to + // complete. + collectionClonerBatchSize: 2, + // Allow reads on recipient before migration completes for testing. + 'failpoint.tenantMigrationRecipientNotRejectReads': tojson({mode: 'alwaysOn'}), + } + }) +}); + +recipientRst.startSet(); +recipientRst.initiate(); + +const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst}); +const tenantId = ObjectId().str; +const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); +const collName = "testColl"; + +const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); +const donorPrimary = tenantMigrationTest.getDonorPrimary(); + +// Test _id with mixed bson types. +const docs = [{_id: 0}, {_id: "string"}, {_id: UUID()}, {_id: new Date()}]; +tenantMigrationTest.insertDonorDB(dbName, collName, docs); + +const migrationId = UUID(); +const migrationIdString = extractUUIDFromObject(migrationId); +const migrationOpts = { + migrationIdString: migrationIdString, + recipientConnString: tenantMigrationTest.getRecipientConnString(), + tenantId, +}; + +// Configure a fail point to have the recipient primary hang after cloning 2 documents. +const recipientDb = recipientPrimary.getDB(dbName); +let recipientColl = recipientDb.getCollection(collName); +const hangDuringCollectionClone = + configureFailPoint(recipientDb, + "tenantMigrationHangCollectionClonerAfterHandlingBatchResponse", + {nss: recipientColl.getFullName()}); + +// Start a migration and wait for recipient to hang after cloning 2 documents. +const donorRstArgs = createRstArgs(tenantMigrationTest.getDonorRst()); +const migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs); +migrationThread.start(); +hangDuringCollectionClone.wait(); +assert.soon(() => recipientColl.find().itcount() === 2); + +// Insert some documents that will be fetched by the recipient. This is to test that on failover, +// the fetcher will resume fetching from where it left off. The system is expected to crash if +// the recipient fetches a duplicate oplog entry upon resuming the migration. +tenantMigrationTest.insertDonorDB(dbName, "aNewColl", [{_id: "docToBeFetched"}]); +assert.soon(() => { + const configDb = recipientPrimary.getDB("config"); + const oplogBuffer = configDb.getCollection("repl.migration.oplog_" + migrationIdString); + return oplogBuffer.find({"entry.o._id": "docToBeFetched"}).count() === 1; +}); + +recipientRst.awaitLastOpCommitted(); + +// Set a failpoint to prevent the new recipient primary from completing the migration before the +// donor renames the collection. +const newRecipientPrimary = recipientRst.getSecondaries()[0]; +const fpPauseAtStartOfMigration = + configureFailPoint(newRecipientPrimary, "pauseAfterRunTenantMigrationRecipientInstance"); + +// Step up a new node in the recipient set and trigger a failover. The new primary should resume +// cloning starting from the third document. +recipientRst.stepUp(newRecipientPrimary); +hangDuringCollectionClone.off(); +recipientRst.getPrimary(); + +// Rename the collection on the donor. +const donorColl = donorPrimary.getDB(dbName).getCollection(collName); +const collNameRenamed = collName + "_renamed"; +assert.commandWorked(donorColl.renameCollection(collNameRenamed)); + +// The migration should go through after recipient failover. +fpPauseAtStartOfMigration.off(); +TenantMigrationTest.assertCommitted(migrationThread.returnData()); + +// Check that recipient has cloned all documents in the renamed collection. +recipientColl = newRecipientPrimary.getDB(dbName).getCollection(collNameRenamed); +assert.eq(4, recipientColl.find().itcount()); +assert.eq(recipientColl.find().sort({_id: 1}).toArray(), docs); +checkTenantDBHashes({ + donorRst: tenantMigrationTest.getDonorRst(), + recipientRst: tenantMigrationTest.getRecipientRst(), + tenantId +}); + +tenantMigrationTest.stop(); +recipientRst.stopSet(); diff --git a/jstests/replsets/tenant_migration_resume_oplog_application.js b/jstests/replsets/tenant_migration_resume_oplog_application.js new file mode 100644 index 00000000000..70849750914 --- /dev/null +++ b/jstests/replsets/tenant_migration_resume_oplog_application.js @@ -0,0 +1,119 @@ +/** + * Tests that in a tenant migration, the recipient primary will resume oplog application on + * failover. + * @tags: [ + * incompatible_with_macos, + * incompatible_with_shard_merge, + * incompatible_with_windows_tls, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +import {TenantMigrationTest} from "jstests/replsets/libs/tenant_migration_test.js"; +import { + checkTenantDBHashes, + makeX509OptionsForTest, + runMigrationAsync, +} from "jstests/replsets/libs/tenant_migration_util.js"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); // for 'extractUUIDFromObject' +load("jstests/libs/parallelTester.js"); // for 'Thread' +load("jstests/libs/write_concern_util.js"); // for 'stopReplicationOnSecondaries' +load("jstests/aggregation/extras/utils.js"); // For assertArrayEq. +load('jstests/replsets/rslib.js'); // For 'createRstArgs' + +const recipientRst = new ReplSetTest({ + nodes: 3, + name: jsTestName() + "_recipient", + serverless: true, + // Use a batch size of 2 so that we can hang in the middle of tenant oplog application. + nodeOptions: Object.assign(makeX509OptionsForTest().recipient, + {setParameter: {tenantApplierBatchSizeOps: 2}}) +}); + +recipientRst.startSet(); +recipientRst.initiate(); + +const tenantMigrationTest = + new TenantMigrationTest({name: jsTestName(), recipientRst: recipientRst}); + +const tenantId = ObjectId().str; +const dbName = tenantMigrationTest.tenantDB(tenantId, "testDB"); +const collName = "testColl"; + +const donorPrimary = tenantMigrationTest.getDonorPrimary(); +const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); +const donorRst = tenantMigrationTest.getDonorRst(); +const donorTestColl = donorPrimary.getDB(dbName).getCollection(collName); + +// Populate the donor replica set with some initial data and make sure it is majority committed. +const majorityCommittedDocs = [{_id: 0, x: 0}, {_id: 1, x: 1}]; +assert.commandWorked(donorTestColl.insert(majorityCommittedDocs, {writeConcern: {w: "majority"}})); +assert.eq(2, donorTestColl.find().readConcern("majority").itcount()); + +const migrationId = UUID(); +const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + recipientConnString: tenantMigrationTest.getRecipientConnString(), + tenantId, +}; + +// Configure fail point to have the recipient primary hang after the cloner completes and the oplog +// applier has started. +let waitAfterDatabaseClone = configureFailPoint( + recipientPrimary, "fpAfterStartingOplogApplierMigrationRecipientInstance", {action: "hang"}); +// Configure fail point to hang the tenant oplog applier after it applies the first batch. +let waitInOplogApplier = configureFailPoint(recipientPrimary, "hangInTenantOplogApplication"); + +// Start a migration and wait for recipient to hang in the tenant database cloner. +const donorRstArgs = createRstArgs(donorRst); +const migrationThread = new Thread(runMigrationAsync, migrationOpts, donorRstArgs); +migrationThread.start(); +waitAfterDatabaseClone.wait(); + +// Insert some writes that will eventually be picked up by the tenant oplog applier on the +// recipient. +const docsToApply = [{_id: 2, x: 2}, {_id: 3, x: 3}, {_id: 4, x: 4}]; +tenantMigrationTest.insertDonorDB(dbName, collName, docsToApply); + +// Wait for the applied oplog batch to be replicated. +waitInOplogApplier.wait(); +recipientRst.awaitReplication(); +let local = recipientPrimary.getDB("local"); +let appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"}); +let resultsArr = appliedNoOps.toArray(); +// It is possible that the first batch applied includes a resume no-op token. We do not write no-op +// entries for resume token entries in tenant migrations. +assert.gt(appliedNoOps.count(), 0, resultsArr); +assert.lte(appliedNoOps.count(), 2, resultsArr); +assert.eq(docsToApply[0], resultsArr[0].o2.o, resultsArr); +if (appliedNoOps.count() === 2) { + assert.eq(docsToApply[1], resultsArr[1].o2.o, resultsArr); +} +// Step up a new node in the recipient set and trigger a failover. The new primary should resume +// fetching starting from the unapplied documents. +const newRecipientPrimary = recipientRst.getSecondaries()[0]; +recipientRst.stepUp(newRecipientPrimary); +waitAfterDatabaseClone.off(); +waitInOplogApplier.off(); +recipientRst.getPrimary(); + +// The migration should go through after recipient failover. +TenantMigrationTest.assertCommitted(migrationThread.returnData()); +// Validate that the last no-op entry is applied. +local = newRecipientPrimary.getDB("local"); +appliedNoOps = local.oplog.rs.find({fromTenantMigration: migrationId, op: "n"}); +resultsArr = appliedNoOps.toArray(); +assert.eq(3, appliedNoOps.count(), appliedNoOps); +assert.eq(docsToApply[2], resultsArr[2].o2.o, resultsArr); + +checkTenantDBHashes({ + donorRst: tenantMigrationTest.getDonorRst(), + recipientRst: tenantMigrationTest.getRecipientRst(), + tenantId +}); +tenantMigrationTest.stop(); +recipientRst.stopSet(); diff --git a/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js b/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js index a9ee61f0993..245673daa62 100644 --- a/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js +++ b/jstests/replsets/tenant_migration_retryable_write_retry_on_recipient.js @@ -186,7 +186,6 @@ assert.commandWorked( jsTest.log("Waiting for migration to complete"); waitBeforeFetchingTransactions.off(); TenantMigrationTest.assertCommitted(migrationThread.returnData()); -tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); // Print the no-op oplog entries for debugging purposes. jsTestLog("Recipient oplog migration entries."); @@ -263,7 +262,6 @@ function testRecipientRetryableWrites(db, writes) { jsTestLog("Run retryable write on primary after the migration"); testRecipientRetryableWrites(recipientDb, beforeWrites); testRecipientRetryableWrites(recipientDb, duringWrites); - jsTestLog("Step up secondary"); const recipientRst = tenantMigrationTest.getRecipientRst(); recipientRst.stepUp(recipientRst.getSecondary()); @@ -271,6 +269,8 @@ jsTestLog("Run retryable write on secondary after the migration"); testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), beforeWrites); testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), duringWrites); +tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); + jsTestLog("Trying a back-to-back migration"); const tenantMigrationTest2 = new TenantMigrationTest( {name: jsTestName() + "2", donorRst: tenantMigrationTest.getRecipientRst()}); diff --git a/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js b/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js index b5a466046f8..4a3475178f5 100644 --- a/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js +++ b/jstests/replsets/tenant_migration_timeseries_retryable_write_retry_on_recipient.js @@ -105,7 +105,6 @@ function testRetryOnRecipient(ordered) { jsTest.log("Waiting for migration to complete"); pauseTenantMigrationBeforeLeavingDataSyncState.off(); TenantMigrationTest.assertCommitted(migrationThread.returnData()); - tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); // Print the no-op oplog entries for debugging purposes. jsTestLog("Recipient oplog migration entries."); @@ -133,6 +132,8 @@ function testRetryOnRecipient(ordered) { testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), beforeWrites); testRecipientRetryableWrites(recipientRst.getPrimary().getDB(kDbName), duringWrites); + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString); + jsTestLog("Trying a back-to-back migration"); const tenantMigrationTest2 = new TenantMigrationTest( {name: jsTestName() + "2", donorRst: tenantMigrationTest.getRecipientRst()}); |