diff options
author | Daniel Gottlieb <daniel.gottlieb@mongodb.com> | 2022-05-10 10:52:20 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-07-27 01:26:47 +0000 |
commit | 14abee8f34e26c5f60b363e36b4fc7812b4f8901 (patch) | |
tree | 28dbb196e5a27ad5d78973241509b146206b2df0 | |
parent | a867b133ed409fddee89a037c44cb2589b510e12 (diff) | |
download | mongo-14abee8f34e26c5f60b363e36b4fc7812b4f8901.tar.gz |
SERVER-66027: Speed up tenant migration tests.
(cherry picked from commit 15aa9218aaed0b522b6c672cac6324c2a15458f8)
8 files changed, 84 insertions, 32 deletions
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js index b7b81e3f01d..b12512b2b8d 100644 --- a/jstests/replsets/libs/tenant_migration_test.js +++ b/jstests/replsets/libs/tenant_migration_test.js @@ -38,6 +38,7 @@ function TenantMigrationTest({ initiateRstWithHighElectionTimeout = true, quickGarbageCollection = false, insertDataForTenant, + optimizeMigrations = true, }) { const donorPassedIn = (donorRst !== undefined); const recipientPassedIn = (recipientRst !== undefined); @@ -47,9 +48,15 @@ function TenantMigrationTest({ const nodes = sharedOptions.nodes || 2; const setParameterOpts = sharedOptions.setParameter || {}; + if (optimizeMigrations) { + // A tenant migration recipient's `OplogFetcher` uses aggregation which does not support + // tailable awaitdata cursors. For aggregation commands `OplogFetcher` will default to half + // the election timeout (e.g: 5 seconds) between getMores. That wait is largely unnecessary. + setParameterOpts["failpoint.setSmallOplogGetMoreMaxTimeMS"] = tojson({"mode": "alwaysOn"}); + } if (quickGarbageCollection) { - setParameterOpts.tenantMigrationGarbageCollectionDelayMS = 3 * 1000; - setParameterOpts.ttlMonitorSleepSecs = 3; + setParameterOpts.tenantMigrationGarbageCollectionDelayMS = 0; + setParameterOpts.ttlMonitorSleepSecs = 1; } donorRst = donorPassedIn ? donorRst : performSetUp(true /* isDonor */); diff --git a/jstests/replsets/tenant_migration_collection_ttl.js b/jstests/replsets/tenant_migration_collection_ttl.js index 1f4515f568e..def0c2da923 100644 --- a/jstests/replsets/tenant_migration_collection_ttl.js +++ b/jstests/replsets/tenant_migration_collection_ttl.js @@ -33,8 +33,14 @@ const garbageCollectionOpts = { 'failpoint.tenantMigrationDonorAllowsNonTimestampedReads': tojson({mode: 'alwaysOn'}), }; -const tenantMigrationTest = new TenantMigrationTest( - {name: jsTestName(), sharedOptions: {setParameter: garbageCollectionOpts}}); +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + sharedOptions: {setParameter: garbageCollectionOpts}, + // This test relies on ttl monitor deletion to be delayed long enough to observe documents prior + // to being deleted. That result is unintuitively achieved better with a large awaitData timeout + // than a slow ttl monitor. + optimizeMigrations: false +}); const collName = "testColl"; diff --git a/jstests/replsets/tenant_migration_commit_transaction_retry.js b/jstests/replsets/tenant_migration_commit_transaction_retry.js index 2d2eedb4bfe..89f0bb1a04c 100644 --- a/jstests/replsets/tenant_migration_commit_transaction_retry.js +++ b/jstests/replsets/tenant_migration_commit_transaction_retry.js @@ -19,15 +19,6 @@ load("jstests/replsets/libs/tenant_migration_util.js"); load("jstests/replsets/rslib.js"); load("jstests/libs/uuid_util.js"); -const kGarbageCollectionParams = { - // Set the delay before a donor state doc is garbage collected to be short to speed up - // the test. - tenantMigrationGarbageCollectionDelayMS: 3 * 1000, - - // Set the TTL monitor to run at a smaller interval to speed up the test. - ttlMonitorSleepSecs: 1, -}; - const tenantMigrationTest = new TenantMigrationTest( {name: jsTestName(), sharedOptions: {nodes: 1}, quickGarbageCollection: true}); @@ -96,7 +87,11 @@ waitAfterStartingOplogApplier.off(); waitInOplogApplier.off(); TenantMigrationTest.assertCommitted(tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); -assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); +// With `quickGarbageCollection` it's likely that forgetting the migration will race with its +// natural destruction. +assert.commandWorkedOrFailedWithCode( + tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString), + [ErrorCodes.NoSuchTenantMigration]); tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, kTenantId); // Test the client can retry commitTransaction against the recipient for transactions that committed @@ -115,7 +110,8 @@ jsTestLog("Running a back-to-back migration"); const tenantMigrationTest2 = new TenantMigrationTest({ name: jsTestName() + "2", donorRst: tenantMigrationTest.getRecipientRst(), - sharedOptions: {nodes: 1, setParameter: kGarbageCollectionParams} + sharedOptions: {nodes: 1}, + quickGarbageCollection: true, }); const migrationId2 = UUID(); const migrationOpts2 = { @@ -132,7 +128,11 @@ donorTxnEntries.forEach((txnEntry) => { assert.commandWorked(recipientPrimary2.adminCommand( {commitTransaction: 1, lsid: txnEntry._id, txnNumber: txnEntry.txnNum, autocommit: false})); }); -assert.commandWorked(tenantMigrationTest2.forgetMigration(migrationOpts2.migrationIdString)); +// With `quickGarbageCollection` it's likely that forgetting the migration will race with its +// natural destruction. +assert.commandWorkedOrFailedWithCode( + tenantMigrationTest2.forgetMigration(migrationOpts2.migrationIdString), + [ErrorCodes.NoSuchTenantMigration]); tenantMigrationTest2.waitForMigrationGarbageCollection(migrationId2, kTenantId); tenantMigrationTest2.stop(); diff --git a/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js b/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js index 761e5967446..70f82a3a2db 100644 --- a/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js +++ b/jstests/replsets/tenant_migration_concurrent_reads_on_recipient.js @@ -91,6 +91,7 @@ function testRejectAllReadsAfterCloningDone({testCase, dbName, collName, tenantM beforeFetchingTransactionsFp.off(); TenantMigrationTest.assertCommitted(runMigrationThread.returnData()); assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationOpts.migrationIdString); } /** @@ -170,6 +171,7 @@ function testRejectOnlyReadsWithAtClusterTimeLessThanRejectReadsBeforeTimestamp( }); assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationOpts.migrationIdString); } /** @@ -222,6 +224,7 @@ function testDoNotRejectReadsAfterMigrationAbortedBeforeReachingRejectReadsBefor runCommand(db, testCase.command(collName), null); } }); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationOpts.migrationIdString); } /** @@ -413,25 +416,34 @@ const testFuncs = { testDoNotRejectReadsAfterMigrationAbortedAfterReachingRejectReadsBeforeTimestamp }; +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + quickGarbageCollection: true, +}); for (const [testName, testFunc] of Object.entries(testFuncs)) { for (const [testCaseName, testCase] of Object.entries(testCases)) { jsTest.log("Testing " + testName + " with testCase " + testCaseName); let tenantId = `${testCaseName}-${testName}`; + let migrationDb = `${tenantId}_test`; + tenantMigrationTest.insertDonorDB(migrationDb, "test"); let dbName = `${tenantId}_${kTenantDefinedDbName}`; - const tenantMigrationTest = new TenantMigrationTest({ - name: jsTestName(), - quickGarbageCollection: true, - insertDataForTenant: tenantId, - }); - // Force the recipient to preserve all snapshot history to ensure that snapshot reads do not - // fail with SnapshotTooOld due to snapshot being unavailable. + // Force the recipient to preserve all snapshot history to ensure that snapshot reads do + // not fail with SnapshotTooOld due to snapshot being unavailable. tenantMigrationTest.getRecipientRst().nodes.forEach(node => { configureFailPoint(node, "WTPreserveSnapshotHistoryIndefinitely"); }); testFunc({testCase, dbName, collName: kCollName, tenantMigrationTest}); - tenantMigrationTest.stop(); + + // ShardMerge is not robust to migrating the twice in quick succession. We drop the data + // files to ensure a subsequent tenant migration will avoid trying to merge files from the + // previous migration. + assert.commandWorked( + tenantMigrationTest.getDonorRst().getPrimary().getDB(migrationDb).dropDatabase()); + assert.commandWorked( + tenantMigrationTest.getRecipientRst().getPrimary().getDB(migrationDb).dropDatabase()); } } +tenantMigrationTest.stop(); })(); diff --git a/jstests/replsets/tenant_migration_donor_retry.js b/jstests/replsets/tenant_migration_donor_retry.js index 281c7a22412..35e705331d5 100644 --- a/jstests/replsets/tenant_migration_donor_retry.js +++ b/jstests/replsets/tenant_migration_donor_retry.js @@ -44,8 +44,11 @@ function setup() { donorRst.startSet(); donorRst.initiate(); - const tenantMigrationTest = new TenantMigrationTest( - {name: jsTestName(), donorRst: donorRst, quickGarbageCollection: true}); + const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + donorRst: donorRst, + sharedOptions: {setParameter: garbageCollectionOpts}, + }); return { tenantMigrationTest, teardown: function() { diff --git a/jstests/replsets/tenant_migration_donor_rollback_recovery.js b/jstests/replsets/tenant_migration_donor_rollback_recovery.js index bccd2cd0c5f..543b3f424c5 100644 --- a/jstests/replsets/tenant_migration_donor_rollback_recovery.js +++ b/jstests/replsets/tenant_migration_donor_rollback_recovery.js @@ -237,10 +237,20 @@ function testRollBackMarkingStateGarbageCollectable() { true /* retryOnRetryableErrors */); forgetMigrationThread.start(); assert.soon(() => { + let docs = + donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).find().toArray(); + // There is a ttl index on `expireAt`. Thus we know the state doc is marked as garbage + // collectible either when: + // + // 1) It has an `expireAt`. + // 2) The document is deleted/the collection is empty. return 1 === donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).count({ _id: migrationId, expireAt: {$exists: 1} - }); + }) || + donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).count({ + _id: migrationId + }) === 0; }); }; diff --git a/jstests/replsets/tenant_migration_recipient_current_op.js b/jstests/replsets/tenant_migration_recipient_current_op.js index c5d88734a00..5dabe5dad5e 100644 --- a/jstests/replsets/tenant_migration_recipient_current_op.js +++ b/jstests/replsets/tenant_migration_recipient_current_op.js @@ -24,7 +24,12 @@ load("jstests/libs/parallelTester.js"); // For the Thread(). load("jstests/replsets/libs/tenant_migration_test.js"); load("jstests/replsets/libs/tenant_migration_util.js"); -const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + // This test relies on a large awaitData timeout keeping a window open such that failpoints + // configured for hanging are hit. + optimizeMigrations: false, +}); const kMigrationId = UUID(); const kTenantId = 'testTenantId'; diff --git a/jstests/replsets/tenant_migration_recipient_ttl.js b/jstests/replsets/tenant_migration_recipient_ttl.js index f0f2de2281f..304ddd1fdab 100644 --- a/jstests/replsets/tenant_migration_recipient_ttl.js +++ b/jstests/replsets/tenant_migration_recipient_ttl.js @@ -18,8 +18,18 @@ load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject(). load("jstests/replsets/libs/tenant_migration_test.js"); load("jstests/replsets/libs/tenant_migration_util.js"); -const tenantMigrationTest = - new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true}); +const kGarbageCollectionParams = { + // Set the delay to 20s so that we can see the `expireAt` set prior to the document vanishing. + tenantMigrationGarbageCollectionDelayMS: 20 * 1000, + + // Set the TTL monitor to run at a smaller interval to speed up the test. + ttlMonitorSleepSecs: 1 +}; + +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + sharedOptions: {setParameter: kGarbageCollectionParams}, +}); const kRecipientTTLIndexName = "TenantMigrationRecipientTTLIndex"; @@ -71,8 +81,7 @@ assert(stateDocQuery[0].hasOwnProperty("expireAt"), tojson(stateDocQuery)); // Sleep past the garbage collection delay time, and then make sure the state document for our // migration does not exist. -jsTestLog("Sleeping and then expecting the state document to have been deleted."); -sleep(30000); // The garbage collection delay is 30s. +jsTestLog("Waiting for the state document to have been deleted."); tenantMigrationTest.waitForMigrationGarbageCollection(kMigrationId, kTenantId); tenantMigrationTest.stop(); |