diff options
4 files changed, 98 insertions, 85 deletions
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js index 296b9eb0cd1..8fd5409b13b 100644 --- a/jstests/replsets/libs/tenant_migration_test.js +++ b/jstests/replsets/libs/tenant_migration_test.js @@ -437,7 +437,7 @@ function TenantMigrationTest({ */ this.isDonorNodeInExpectedState = function(node, migrationId, tenantId, expectedState) { const configDonorsColl = - this.getDonorPrimary().getCollection("config.tenantMigrationDonors"); + this.getDonorPrimary().getCollection(TenantMigrationTest.kConfigDonorsNS); if (configDonorsColl.findOne({_id: migrationId}).state !== expectedState) { return false; } diff --git a/jstests/replsets/libs/tenant_migration_util.js b/jstests/replsets/libs/tenant_migration_util.js index dc3f5e33d50..de4ca29b447 100644 --- a/jstests/replsets/libs/tenant_migration_util.js +++ b/jstests/replsets/libs/tenant_migration_util.js @@ -3,6 +3,7 @@ */ var TenantMigrationUtil = (function() { const kExternalKeysNs = "config.external_validation_keys"; + const kCreateRstRetryIntervalMS = 100; /** * Returns the external keys for the given migration id. @@ -95,7 +96,7 @@ var TenantMigrationUtil = (function() { */ function runMigrationAsync(migrationOpts, donorRstArgs, retryOnRetryableErrors = false) { load("jstests/replsets/libs/tenant_migration_util.js"); - const donorRst = new ReplSetTest({rstArgs: donorRstArgs}); + const donorRst = TenantMigrationUtil.createRst(donorRstArgs, retryOnRetryableErrors); const migrationCertificates = TenantMigrationUtil.makeMigrationCertificatesForTest(); const cmdObj = { @@ -126,7 +127,7 @@ var TenantMigrationUtil = (function() { */ function forgetMigrationAsync(migrationIdString, donorRstArgs, retryOnRetryableErrors = false) { load("jstests/replsets/libs/tenant_migration_util.js"); - const donorRst = new ReplSetTest({rstArgs: donorRstArgs}); + const donorRst = TenantMigrationUtil.createRst(donorRstArgs, retryOnRetryableErrors); const cmdObj = {donorForgetMigration: 1, migrationId: UUID(migrationIdString)}; return TenantMigrationUtil.runTenantMigrationCommand( cmdObj, donorRst, retryOnRetryableErrors); @@ -145,7 +146,7 @@ var TenantMigrationUtil = (function() { */ function tryAbortMigrationAsync(migrationOpts, donorRstArgs, retryOnRetryableErrors = false) { load("jstests/replsets/libs/tenant_migration_util.js"); - const donorRst = new ReplSetTest({rstArgs: donorRstArgs}); + const donorRst = TenantMigrationUtil.createRst(donorRstArgs, retryOnRetryableErrors); const cmdObj = { donorAbortMigration: 1, migrationId: UUID(migrationOpts.migrationIdString), @@ -203,6 +204,27 @@ var TenantMigrationUtil = (function() { } /** + * Returns a new ReplSetTest created based on the given 'rstArgs'. If 'retryOnRetryableErrors' + * is true, retries on retryable errors (e.g. errors caused by shutdown). + */ + function createRst(rstArgs, retryOnRetryableErrors) { + while (true) { + try { + return new ReplSetTest({rstArgs: rstArgs}); + } catch (e) { + if (retryOnRetryableErrors && isNetworkError(e)) { + jsTest.log(`Failed to create ReplSetTest for ${ + rstArgs.name} inside tenant migration thread: ${tojson(e)}. Retrying in ${ + kCreateRstRetryIntervalMS}ms.`); + sleep(kCreateRstRetryIntervalMS); + continue; + } + throw e; + } + } + } + + /** * Returns the TenantMigrationAccessBlocker serverStatus output for the migration for the given * tenant if there one. */ @@ -317,6 +339,7 @@ var TenantMigrationUtil = (function() { forgetMigrationAsync, tryAbortMigrationAsync, createRstArgs, + createRst, runTenantMigrationCommand, isFeatureFlagEnabled, getCertificateAndPrivateKey, diff --git a/jstests/replsets/tenant_migration_donor_rollback_recovery.js b/jstests/replsets/tenant_migration_donor_rollback_recovery.js index 01dc59ed0bf..3c69a1373fb 100644 --- a/jstests/replsets/tenant_migration_donor_rollback_recovery.js +++ b/jstests/replsets/tenant_migration_donor_rollback_recovery.js @@ -32,7 +32,6 @@ const recipientRst = new ReplSetTest({ setParameter: { tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, ttlMonitorSleepSecs: 1, - tenantMigrationDisableX509Auth: true, } }) }); @@ -54,58 +53,56 @@ function makeMigrationOpts(migrationId, tenantId) { } /** - * Starts a donor ReplSetTest and creates a TenantMigrationTest for it. Runs 'setUpFunc' and then - * starts a RollbackTest from the donor ReplSetTest. Runs 'rollbackOpsFunc' while it is in rollback - * operations state (operations run in this state will be rolled back). Finally, runs - * 'steadyStateFunc' after it is back in the replication steady state. - * - * See rollback_test.js for more information about RollbackTest. + * Starts a donor ReplSetTest and creates a TenantMigrationTest for it. Runs 'setUpFunc' after + * initiating the donor. Then, runs 'rollbackOpsFunc' while replication is disabled on the + * secondaries, shuts down the primary and restarts it after re-election to force the operations in + * 'rollbackOpsFunc' to be rolled back. Finally, runs 'steadyStateFunc' after it is back in the + * replication steady state. */ function testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc) { const donorRst = new ReplSetTest({ name: "donorRst", nodes: 3, - useBridge: true, - settings: {chainingAllowed: false}, nodeOptions: Object.assign(migrationX509Options.donor, { setParameter: { tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, ttlMonitorSleepSecs: 1, - tenantMigrationDisableX509Auth: true, } }) }); donorRst.startSet(); - let config = donorRst.getReplSetConfig(); - config.members[2].priority = 0; - donorRst.initiateWithHighElectionTimeout(config); + donorRst.initiate(); const tenantMigrationTest = - new TenantMigrationTest({name: jsTestName(), recipientRst, donorRst}); + new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst}); const donorRstArgs = TenantMigrationUtil.createRstArgs(donorRst); setUpFunc(tenantMigrationTest, donorRstArgs); - const donorRollbackTest = new RollbackTest("donorRst", donorRst); - let donorPrimary = donorRollbackTest.getPrimary(); - donorRollbackTest.awaitLastOpCommitted(); + let originalDonorPrimary = donorRst.getPrimary(); + const originalDonorSecondaries = donorRst.getSecondaries(); + donorRst.awaitLastOpCommitted(); - // Writes during this state will be rolled back. - donorRollbackTest.transitionToRollbackOperations(); + // Disable replication on the secondaries so that writes during this step will be rolled back. + stopServerReplication(originalDonorSecondaries); rollbackOpsFunc(tenantMigrationTest, donorRstArgs); - // Transition to replication steady state. - donorRollbackTest.transitionToSyncSourceOperationsBeforeRollback(); - donorRollbackTest.transitionToSyncSourceOperationsDuringRollback(); - donorRollbackTest.transitionToSteadyStateOperations(); + // Shut down the primary and re-enable replication to allow one of the secondaries to get + // elected, and make the writes above get rolled back on the original primary when it comes + // back up. + donorRst.stop(originalDonorPrimary); + restartServerReplication(originalDonorSecondaries); + const newDonorPrimary = donorRst.getPrimary(); + assert.neq(originalDonorPrimary, newDonorPrimary); + + // Restart the original primary. + originalDonorPrimary = + donorRst.start(originalDonorPrimary, {waitForConnect: true}, true /* restart */); + originalDonorPrimary.setSecondaryOk(); + donorRst.awaitReplication(); - // Get the correct primary and secondary after the topology changes. The donor replica set - // contains 3 nodes, and replication is disabled on the tiebreaker node. So there is only one - // secondary that the primary replicates data onto. - donorPrimary = donorRollbackTest.getPrimary(); - let donorSecondary = donorRollbackTest.getSecondary(); - steadyStateFunc(tenantMigrationTest, donorPrimary, donorSecondary); + steadyStateFunc(tenantMigrationTest); - donorRollbackTest.stop(); + donorRst.stopSet(); } /** @@ -136,12 +133,12 @@ function testRollbackInitialState() { }); }; - let steadyStateFunc = (tenantMigrationTest, donorPrimary, donorSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration restarted successfully on the new primary despite rollback. const stateRes = assert.commandWorked(migrationThread.returnData()); assert.eq(stateRes.state, TenantMigrationTest.DonorState.kCommitted); tenantMigrationTest.assertDonorNodesInExpectedState( - [donorPrimary, donorSecondary], + tenantMigrationTest.getDonorRst().nodes, migrationId, migrationOpts.tenantId, TenantMigrationTest.DonorState.kCommitted); @@ -191,12 +188,12 @@ function testRollBackStateTransition(pauseFailPoint, setUpFailPoints, nextState) }); }; - let steadyStateFunc = (tenantMigrationTest, donorPrimary, donorSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration resumed successfully on the new primary despite the rollback. const stateRes = assert.commandWorked(migrationThread.returnData()); assert.eq(stateRes.state, TenantMigrationTest.DonorState.kCommitted); tenantMigrationTest.waitForDonorNodesToReachState( - [donorPrimary, donorSecondary], + tenantMigrationTest.getDonorRst().nodes, migrationId, migrationOpts.tenantId, TenantMigrationTest.DonorState.kCommitted); @@ -243,11 +240,10 @@ function testRollBackMarkingStateGarbageCollectable() { }); }; - let steadyStateFunc = (tenantMigrationTest, donorPrimary, donorSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration state got garbage collected successfully despite the rollback. assert.commandWorked(forgetMigrationThread.returnData()); - tenantMigrationTest.waitForMigrationGarbageCollection( - migrationId, migrationOpts.tenantId, [donorPrimary, donorSecondary]); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, migrationOpts.tenantId); }; testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); @@ -283,17 +279,16 @@ function testRollBackRandom() { sleep(Math.random() * kMaxSleepTimeMS); }; - let steadyStateFunc = (tenantMigrationTest, donorPrimary, donorSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration completed and was garbage collected successfully despite the // rollback. migrationThread.join(); tenantMigrationTest.waitForDonorNodesToReachState( - [donorPrimary, donorSecondary], + tenantMigrationTest.getDonorRst().nodes, migrationId, migrationOpts.tenantId, TenantMigrationTest.DonorState.kCommitted); - tenantMigrationTest.waitForMigrationGarbageCollection( - migrationId, migrationOpts.tenantId, [donorPrimary, donorSecondary]); + tenantMigrationTest.waitForMigrationGarbageCollection(migrationId, migrationOpts.tenantId); }; testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); diff --git a/jstests/replsets/tenant_migration_recipient_rollback_recovery.js b/jstests/replsets/tenant_migration_recipient_rollback_recovery.js index 99aa9047676..ec5bd9a0401 100644 --- a/jstests/replsets/tenant_migration_recipient_rollback_recovery.js +++ b/jstests/replsets/tenant_migration_recipient_rollback_recovery.js @@ -32,9 +32,6 @@ const donorRst = new ReplSetTest({ setParameter: { tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, ttlMonitorSleepSecs: 1, - // TODO (SERVER-54893): Make tenant_migration_recipient_rollback_recovery.js not use - // RollbackTest. - tenantMigrationDisableX509Auth: true } }) }); @@ -58,57 +55,55 @@ function makeMigrationOpts(tenantMigrationTest, migrationId, tenantId) { } /** - * Starts a recipient ReplSetTest and creates a TenantMigrationTest for it. Runs 'setUpFunc' and - * then starts a RollbackTest from the recipient ReplSetTest. Runs 'rollbackOpsFunc' while it is in - * rollback operations state (operations run in this state will be rolled back). Finally, runs - * 'steadyStateFunc' after it is back in the replication steady state. - * - * See rollback_test.js for more information about RollbackTest. + * Starts a recipient ReplSetTest and creates a TenantMigrationTest for it. Runs 'setUpFunc' after + * initiating the recipient. Then, runs 'rollbackOpsFunc' while replication is disabled on the + * secondaries, shuts down the primary and restarts it after re-election to force the operations in + * 'rollbackOpsFunc' to be rolled back. Finally, runs 'steadyStateFunc' after it is back in the + * replication steady state. */ function testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc) { const recipientRst = new ReplSetTest({ name: "recipientRst", nodes: 3, - useBridge: true, - settings: {chainingAllowed: false}, nodeOptions: Object.assign(migrationX509Options.recipient, { setParameter: { tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, ttlMonitorSleepSecs: 1, - tenantMigrationDisableX509Auth: true } }) }); recipientRst.startSet(); - let config = recipientRst.getReplSetConfig(); - config.members[2].priority = 0; - recipientRst.initiateWithHighElectionTimeout(config); + recipientRst.initiate(); const tenantMigrationTest = new TenantMigrationTest({name: jsTestName(), donorRst, recipientRst}); setUpFunc(tenantMigrationTest, donorRstArgs); - const recipientRollbackTest = new RollbackTest("recipientRst", recipientRst); - let recipientPrimary = recipientRollbackTest.getPrimary(); - recipientRollbackTest.awaitLastOpCommitted(); + let originalRecipientPrimary = recipientRst.getPrimary(); + const originalRecipientSecondaries = recipientRst.getSecondaries(); + recipientRst.awaitLastOpCommitted(); - // Writes during this state will be rolled back. - recipientRollbackTest.transitionToRollbackOperations(); + // Disable replication on the secondaries so that writes during this step will be rolled back. + stopServerReplication(originalRecipientSecondaries); rollbackOpsFunc(tenantMigrationTest, donorRstArgs); - // Transition to replication steady state. - recipientRollbackTest.transitionToSyncSourceOperationsBeforeRollback(); - recipientRollbackTest.transitionToSyncSourceOperationsDuringRollback(); - recipientRollbackTest.transitionToSteadyStateOperations(); + // Shut down the primary and re-enable replication to allow one of the secondaries to get + // elected, and make the writes above get rolled back on the original primary when it comes + // back up. + recipientRst.stop(originalRecipientPrimary); + restartServerReplication(originalRecipientSecondaries); + const newRecipientPrimary = recipientRst.getPrimary(); + assert.neq(originalRecipientPrimary, newRecipientPrimary); + + // Restart the original primary. + originalRecipientPrimary = + recipientRst.start(originalRecipientPrimary, {waitForConnect: true}, true /* restart */); + originalRecipientPrimary.setSecondaryOk(); + recipientRst.awaitReplication(); - // Get the correct primary and secondary after the topology changes. The recipient replica set - // contains 3 nodes, and replication is disabled on the tiebreaker node. So there is only one - // secondary that the primary replicates data onto. - recipientPrimary = recipientRollbackTest.getPrimary(); - let recipientSecondary = recipientRollbackTest.getSecondary(); - steadyStateFunc(tenantMigrationTest, recipientPrimary, recipientSecondary); + steadyStateFunc(tenantMigrationTest); - recipientRollbackTest.stop(); + recipientRst.stopSet(); } /** @@ -141,12 +136,12 @@ function testRollbackInitialState() { }); }; - let steadyStateFunc = (tenantMigrationTest, recipientPrimary, recipientSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration restarted successfully on the new primary despite rollback. const stateRes = assert.commandWorked(migrationThread.returnData()); assert.eq(stateRes.state, TenantMigrationTest.DonorState.kCommitted); tenantMigrationTest.assertRecipientNodesInExpectedState( - [recipientPrimary, recipientSecondary], + tenantMigrationTest.getRecipientRst().nodes, migrationId, migrationOpts.tenantId, TenantMigrationTest.RecipientState.kConsistent, @@ -199,12 +194,12 @@ function testRollBackStateTransition(pauseFailPoint, setUpFailPoints, nextState, }); }; - let steadyStateFunc = (tenantMigrationTest, recipientPrimary, recipientSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration resumed successfully on the new primary despite the rollback. const stateRes = assert.commandWorked(migrationThread.returnData()); assert.eq(stateRes.state, TenantMigrationTest.DonorState.kCommitted); tenantMigrationTest.waitForRecipientNodesToReachState( - [recipientPrimary, recipientSecondary], + tenantMigrationTest.getRecipientRst().nodes, migrationId, migrationOpts.tenantId, TenantMigrationTest.RecipientState.kConsistent, @@ -252,14 +247,14 @@ function testRollBackMarkingStateGarbageCollectable() { }); }; - let steadyStateFunc = (tenantMigrationTest, recipientPrimary, recipientSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration state got garbage collected successfully despite the rollback. assert.commandWorked(forgetMigrationThread.returnData()); tenantMigrationTest.waitForMigrationGarbageCollection( migrationId, migrationOpts.tenantId, tenantMigrationTest.getDonorRst().nodes, - [recipientPrimary, recipientSecondary]); + tenantMigrationTest.getRecipientRst().nodes); }; testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); @@ -296,12 +291,12 @@ function testRollBackRandom() { sleep(Math.random() * kMaxSleepTimeMS); }; - let steadyStateFunc = (tenantMigrationTest, recipientPrimary, recipientSecondary) => { + let steadyStateFunc = (tenantMigrationTest) => { // Verify that the migration completed and was garbage collected successfully despite the // rollback. migrationThread.join(); tenantMigrationTest.waitForRecipientNodesToReachState( - [recipientPrimary, recipientSecondary], + tenantMigrationTest.getRecipientRst().nodes, migrationId, migrationOpts.tenantId, TenantMigrationTest.RecipientState.kDone, @@ -310,7 +305,7 @@ function testRollBackRandom() { migrationId, migrationOpts.tenantId, tenantMigrationTest.getDonorRst().nodes, - [recipientPrimary, recipientSecondary]); + tenantMigrationTest.getRecipientRst().nodes); }; testRollBack(setUpFunc, rollbackOpsFunc, steadyStateFunc); |