diff options
9 files changed, 88 insertions, 35 deletions
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js index b7b81e3f01d..b12512b2b8d 100644 --- a/jstests/replsets/libs/tenant_migration_test.js +++ b/jstests/replsets/libs/tenant_migration_test.js @@ -38,6 +38,7 @@ function TenantMigrationTest({ initiateRstWithHighElectionTimeout = true, quickGarbageCollection = false, insertDataForTenant, + optimizeMigrations = true, }) { const donorPassedIn = (donorRst !== undefined); const recipientPassedIn = (recipientRst !== undefined); @@ -47,9 +48,15 @@ function TenantMigrationTest({ const nodes = sharedOptions.nodes || 2; const setParameterOpts = sharedOptions.setParameter || {}; + if (optimizeMigrations) { + // A tenant migration recipient's `OplogFetcher` uses aggregation which does not support + // tailable awaitdata cursors. For aggregation commands `OplogFetcher` will default to half + // the election timeout (e.g: 5 seconds) between getMores. That wait is largely unnecessary. + setParameterOpts["failpoint.setSmallOplogGetMoreMaxTimeMS"] = tojson({"mode": "alwaysOn"}); + } if (quickGarbageCollection) { - setParameterOpts.tenantMigrationGarbageCollectionDelayMS = 3 * 1000; - setParameterOpts.ttlMonitorSleepSecs = 3; + setParameterOpts.tenantMigrationGarbageCollectionDelayMS = 0; + setParameterOpts.ttlMonitorSleepSecs = 1; } donorRst = donorPassedIn ? donorRst : performSetUp(true /* isDonor */); diff --git a/jstests/replsets/tenant_migration_collection_ttl.js b/jstests/replsets/tenant_migration_collection_ttl.js index 13adf04efa7..df5c44557e0 100644 --- a/jstests/replsets/tenant_migration_collection_ttl.js +++ b/jstests/replsets/tenant_migration_collection_ttl.js @@ -32,8 +32,14 @@ const garbageCollectionOpts = { 'failpoint.tenantMigrationDonorAllowsNonTimestampedReads': tojson({mode: 'alwaysOn'}), }; -const tenantMigrationTest = new TenantMigrationTest( - {name: jsTestName(), sharedOptions: {setParameter: garbageCollectionOpts}}); +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + sharedOptions: {setParameter: garbageCollectionOpts}, + // This test relies on ttl monitor deletion to be delayed long enough to observe documents prior + // to being deleted. That result is unintuitively achieved better with a large awaitData timeout + // than a slow ttl monitor. + optimizeMigrations: false +}); const collName = "testColl"; diff --git a/jstests/replsets/tenant_migration_commit_transaction_retry.js b/jstests/replsets/tenant_migration_commit_transaction_retry.js index e124868af42..edf72d5a85b 100644 --- a/jstests/replsets/tenant_migration_commit_transaction_retry.js +++ b/jstests/replsets/tenant_migration_commit_transaction_retry.js @@ -18,15 +18,6 @@ load("jstests/replsets/libs/tenant_migration_util.js"); load("jstests/replsets/rslib.js"); load("jstests/libs/uuid_util.js"); -const kGarbageCollectionParams = { - // Set the delay before a donor state doc is garbage collected to be short to speed up - // the test. - tenantMigrationGarbageCollectionDelayMS: 3 * 1000, - - // Set the TTL monitor to run at a smaller interval to speed up the test. - ttlMonitorSleepSecs: 1, -}; - const tenantMigrationTest = new TenantMigrationTest( {name: jsTestName(), sharedOptions: {nodes: 1}, quickGarbageCollection: true}); @@ -95,7 +86,11 @@ pauseTenantMigrationBeforeLeavingDataSyncState.off(); waitInOplogApplier.off(); TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); -assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); +// With `quickGarbageCollection` it's likely that forgetting the migration will race with its +// natural destruction. +assert.commandWorkedOrFailedWithCode( + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString), + [ErrorCodes.NoSuchTenantMigration]); tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, kTenantId); // Test the client can retry commitTransaction against the recipient for transactions that committed @@ -114,7 +109,8 @@ jsTestLog("Running a back-to-back migration"); const tenantMigrationTest2 = new TenantMigrationTest({ name: jsTestName() + "2", donorRst: tenantMigrationTest.getRecipientRst(), - sharedOptions: {nodes: 1, setParameter: kGarbageCollectionParams} + sharedOptions: {nodes: 1}, + quickGarbageCollection: true, }); const migrationId2 = UUID(); const migrationOpts2 = { @@ -131,7 +127,11 @@ donorTxnEntries.forEach((txnEntry) => { assert.commandWorked(recipientPrimary2.adminCommand( {commitTransaction: 1, lsid: txnEntry._id, txnNumber: txnEntry.txnNum, autocommit: false})); }); -assert.commandWorked(tenantMigrationTest2.forgetMigration(migrationOpts2.migrationIdString)); +// With `quickGarbageCollection` it's likely that forgetting the migration will race with its +// natural destruction. +assert.commandWorkedOrFailedWithCode( + tenantMigrationTest2.forgetMigration(migrationOpts2.migrationIdString), + [ErrorCodes.NoSuchTenantMigration]); tenantMigrationTest2.waitForMigrationGarbageCollection(migrationId2, kTenantId); tenantMigrationTest2.stop(); diff --git a/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js b/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js index 2915eebc65f..71d12c1f044 100644 --- a/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js +++ b/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js @@ -90,6 +90,7 @@ function testRejectAllReadsAfterCloningDone({testCase, dbName, collName, tenantM beforeFetchingTransactionsFp.off(); TenantMigrationTest.assertCommitted(runMigrationThread.returnData()); assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationOpts.migrationIdString); } /** @@ -169,6 +170,7 @@ function testRejectOnlyReadsWithAtClusterTimeLessThanRejectReadsBeforeTimestamp( }); assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationOpts.migrationIdString); } /** @@ -221,6 +223,7 @@ function testDoNotRejectReadsAfterMigrationAbortedBeforeReachingRejectReadsBefor runCommand(db, testCase.command(collName), null); } }); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationOpts.migrationIdString); } /** @@ -412,25 +415,34 @@ const testFuncs = { testDoNotRejectReadsAfterMigrationAbortedAfterReachingRejectReadsBeforeTimestamp }; +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + quickGarbageCollection: true, +}); for (const [testName, testFunc] of Object.entries(testFuncs)) { for (const [testCaseName, testCase] of Object.entries(testCases)) { jsTest.log("Testing " + testName + " with testCase " + testCaseName); let tenantId = `${testCaseName}-${testName}`; + let migrationDb = `${tenantId}_test`; + tenantMigrationTest.insertDonorDB(migrationDb, "test"); let dbName = `${tenantId}_${kTenantDefinedDbName}`; - const tenantMigrationTest = new TenantMigrationTest({ - name: jsTestName(), - quickGarbageCollection: true, - insertDataForTenant: tenantId, - }); - // Force the recipient to preserve all snapshot history to ensure that snapshot reads do not - // fail with SnapshotTooOld due to snapshot being unavailable. + // Force the recipient to preserve all snapshot history to ensure that snapshot reads do + // not fail with SnapshotTooOld due to snapshot being unavailable. tenantMigrationTest.getRecipientRst().nodes.forEach(node => { configureFailPoint(node, "WTPreserveSnapshotHistoryIndefinitely"); }); testFunc({testCase, dbName, collName: kCollName, tenantMigrationTest}); - tenantMigrationTest.stop(); + + // ShardMerge is not robust to migrating the twice in quick succession. We drop the data + // files to ensure a subsequent tenant migration will avoid trying to merge files from the + // previous migration. + assert.commandWorked( + tenantMigrationTest.getDonorRst().getPrimary().getDB(migrationDb).dropDatabase()); + assert.commandWorked( + tenantMigrationTest.getRecipientRst().getPrimary().getDB(migrationDb).dropDatabase()); } } +tenantMigrationTest.stop(); })(); diff --git a/jstests/replsets/tenant_migration_donor_retry.js b/jstests/replsets/tenant_migration_donor_retry.js index 7ac00d9471e..6229c81b153 100644 --- a/jstests/replsets/tenant_migration_donor_retry.js +++ b/jstests/replsets/tenant_migration_donor_retry.js @@ -43,8 +43,11 @@ function setup() { donorRst.startSet(); donorRst.initiate(); - const tenantMigrationTest = new TenantMigrationTest( - {name: jsTestName(), donorRst: donorRst, quickGarbageCollection: true}); + const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + donorRst: donorRst, + sharedOptions: {setParameter: garbageCollectionOpts}, + }); return { tenantMigrationTest, teardown: function() { diff --git a/jstests/replsets/tenant_migration_donor_rollback_recovery.js b/jstests/replsets/tenant_migration_donor_rollback_recovery.js index eeb1516bf7a..a3f082e8f26 100644 --- a/jstests/replsets/tenant_migration_donor_rollback_recovery.js +++ b/jstests/replsets/tenant_migration_donor_rollback_recovery.js @@ -236,10 +236,20 @@ function testRollBackMarkingStateGarbageCollectable() { true /* retryOnRetryableErrors */); forgetMigrationThread.start(); assert.soon(() => { + let docs = + donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).find().toArray(); + // There is a ttl index on `expireAt`. Thus we know the state doc is marked as garbage + // collectible either when: + // + // 1) It has an `expireAt`. + // 2) The document is deleted/the collection is empty. return 1 === donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).count({ _id: migrationId, expireAt: {$exists: 1} - }); + }) || + donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).count({ + _id: migrationId + }) === 0; }); }; diff --git a/jstests/replsets/tenant_migration_recipient_current_op.js b/jstests/replsets/tenant_migration_recipient_current_op.js index a2ce3cb4a2f..19d1bbdbd6d 100644 --- a/jstests/replsets/tenant_migration_recipient_current_op.js +++ b/jstests/replsets/tenant_migration_recipient_current_op.js @@ -24,7 +24,12 @@ load("jstests/libs/parallelTester.js"); // For the Thread(). load("jstests/replsets/libs/tenant_migration_test.js"); load("jstests/replsets/libs/tenant_migration_util.js"); -const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + // This test relies on a large awaitData timeout keeping a window open such that failpoints + // configured for hanging are hit. + optimizeMigrations: false, +}); const kMigrationId = UUID(); const kTenantId = 'testTenantId'; @@ -325,4 +330,4 @@ forgetMigrationThread.start(); } tenantMigrationTest.stop(); -})();
\ No newline at end of file +})(); diff --git a/jstests/replsets/tenant_migration_recipient_ttl.js b/jstests/replsets/tenant_migration_recipient_ttl.js index 5ab8658a444..7112c8f58ed 100644 --- a/jstests/replsets/tenant_migration_recipient_ttl.js +++ b/jstests/replsets/tenant_migration_recipient_ttl.js @@ -17,8 +17,18 @@ load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject(). load("jstests/replsets/libs/tenant_migration_test.js"); load("jstests/replsets/libs/tenant_migration_util.js"); -const tenantMigrationTest = - new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true}); +const kGarbageCollectionParams = { + // Set the delay to 20s so that we can see the `expireAt` set prior to the document vanishing. + tenantMigrationGarbageCollectionDelayMS: 20 * 1000, + + // Set the TTL monitor to run at a smaller interval to speed up the test. + ttlMonitorSleepSecs: 1 +}; + +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + sharedOptions: {setParameter: kGarbageCollectionParams}, +}); const kRecipientTTLIndexName = "TenantMigrationRecipientTTLIndex"; @@ -70,8 +80,7 @@ assert(stateDocQuery[0].hasOwnProperty("expireAt"), tojson(stateDocQuery)); // Sleep past the garbage collection delay time, and then make sure the state document for our // migration does not exist. -jsTestLog("Sleeping and then expecting the state document to have been deleted."); -sleep(30000); // The garbage collection delay is 30s. +jsTestLog("Waiting for the state document to have been deleted."); tenantMigrationTest.waitForMigrationGarbageCollection(kMigrationId, kTenantId); tenantMigrationTest.stop(); diff --git a/jstests/replsets/tenant_migration_shard_merge_recipient_current_op.js b/jstests/replsets/tenant_migration_shard_merge_recipient_current_op.js index 5b3908b659f..977a0340534 100644 --- a/jstests/replsets/tenant_migration_shard_merge_recipient_current_op.js +++ b/jstests/replsets/tenant_migration_shard_merge_recipient_current_op.js @@ -106,6 +106,9 @@ jsTestLog("Starting tenant migration with migrationId: " + kMigrationId + assert.commandWorked( tenantMigrationTest.startMigration(migrationOpts, {enableDonorStartMigrationFsync: true})); +const fpBeforePersistingRejectReadsBeforeTimestamp = configureFailPoint( + recipientPrimary, "fpBeforePersistingRejectReadsBeforeTimestamp", {action: "hang"}); + { // Wait until a current operation corresponding to "tenant recipient migration" with state // kStarted is visible on the recipientPrimary. @@ -164,8 +167,6 @@ assert.commandWorked( // Wait for the "kConsistent" state to be reached. jsTestLog("Waiting for the kConsistent state to be reached."); fpAfterDataConsistent.wait(); - const fpBeforePersistingRejectReadsBeforeTimestamp = configureFailPoint( - recipientPrimary, "fpBeforePersistingRejectReadsBeforeTimestamp", {action: "hang"}); let res = recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"}); checkStandardFieldsOK(res); |