diff options
author | Cheahuychou Mao <mao.cheahuychou@gmail.com> | 2020-10-06 21:35:01 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-10-15 23:18:05 +0000 |
commit | b69f93de867415ea1e700fbea462399ed63b309b (patch) | |
tree | a1a7aeeaf08f4df1b19daf69398d066559654b57 | |
parent | 0c904d2a3d9028d171961d88178e3f15c2d5a1e0 (diff) | |
download | mongo-b69f93de867415ea1e700fbea462399ed63b309b.tar.gz |
SERVER-51332 tenant_migration_donor_failover_and_shutdown.js should wait for the primary to receive the command before shutting down the node
-rw-r--r-- | jstests/replsets/tenant_migration_donor_interrupt_on_stepdown_and_shutdown.js (renamed from jstests/replsets/tenant_migration_donor_failover_and_shutdown.js) | 112 | ||||
-rw-r--r-- | jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js | 213 | ||||
-rw-r--r-- | src/mongo/db/repl/tenant_migration_donor_service.cpp | 2 |
3 files changed, 234 insertions, 93 deletions
diff --git a/jstests/replsets/tenant_migration_donor_failover_and_shutdown.js b/jstests/replsets/tenant_migration_donor_interrupt_on_stepdown_and_shutdown.js index 36b3a0e9e6c..d79de93afbf 100644 --- a/jstests/replsets/tenant_migration_donor_failover_and_shutdown.js +++ b/jstests/replsets/tenant_migration_donor_interrupt_on_stepdown_and_shutdown.js @@ -1,5 +1,5 @@ /** - * Tests that the migration is interrupted successfully on stepdown and shutdown. + * Tests that tenant migrations are interrupted successfully on stepdown and shutdown. * * @tags: [requires_fcv_47, requires_majority_read_concern, requires_persistence, * incompatible_with_eft] @@ -12,28 +12,17 @@ load("jstests/libs/parallelTester.js"); load("jstests/libs/uuid_util.js"); load("jstests/replsets/libs/tenant_migration_util.js"); -const kMaxSleepTimeMS = 1000; -const kConfigDonorsNS = "config.tenantMigrationDonors"; +const kMaxSleepTimeMS = 100; const kTenantId = "testTenantId"; -// Set the delay before a donor state doc is garbage collected to be short to speed up the test. -const kGarbageCollectionDelayMS = 30 * 1000; - -// Set the TTL monitor to run at a smaller interval to speed up the test. -const kTTLMonitorSleepSecs = 1; - /** * Runs the donorStartMigration command to start a migration, and interrupts the migration on the * donor using the 'interruptFunc', and verifies the command response using the * 'verifyCmdResponseFunc'. */ -function testDonorStartMigrationInterrupt( - interruptFunc, verifyCmdResponseFunc, numDonorRsNodes = 1) { - const donorRst = new ReplSetTest({ - nodes: numDonorRsNodes, - name: "donorRst", - nodeOptions: {setParameter: {enableTenantMigrations: true}} - }); +function testDonorStartMigrationInterrupt(interruptFunc, verifyCmdResponseFunc) { + const donorRst = new ReplSetTest( + {nodes: 1, name: "donorRst", nodeOptions: {setParameter: {enableTenantMigrations: true}}}); const recipientRst = new ReplSetTest({ nodes: 1, name: "recipientRst", @@ -59,6 +48,11 @@ function testDonorStartMigrationInterrupt( let migrationThread = new Thread(TenantMigrationUtil.startMigration, donorPrimary.host, migrationOpts); migrationThread.start(); + + // Wait for to donorStartMigration command to start. + assert.soon(() => donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"}) + .inprog.length > 0); + sleep(Math.random() * kMaxSleepTimeMS); interruptFunc(donorRst, migrationId, migrationOpts.tenantId); verifyCmdResponseFunc(migrationThread); @@ -72,16 +66,13 @@ function testDonorStartMigrationInterrupt( * the donor using the 'interruptFunc', and verifies the command response using the * 'verifyCmdResponseFunc'. */ -function testDonorForgetMigrationInterrupt( - interruptFunc, verifyCmdResponseFunc, numDonorRsNodes = 1) { +function testDonorForgetMigrationInterrupt(interruptFunc, verifyCmdResponseFunc) { const donorRst = new ReplSetTest({ - nodes: numDonorRsNodes, + nodes: 1, name: "donorRst", nodeOptions: { setParameter: { enableTenantMigrations: true, - tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, - ttlMonitorSleepSecs: kTTLMonitorSleepSecs, } } }); @@ -91,8 +82,6 @@ function testDonorForgetMigrationInterrupt( nodeOptions: { setParameter: { enableTenantMigrations: true, - tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, - ttlMonitorSleepSecs: kTTLMonitorSleepSecs, } } }); @@ -113,12 +102,18 @@ function testDonorForgetMigrationInterrupt( readPreference: {mode: "primary"}, }; - donorPrimary.getCollection(kConfigDonorsNS).createIndex({expireAt: 1}, {expireAfterSeconds: 0}); - assert.commandWorked(TenantMigrationUtil.startMigration(donorPrimary.host, migrationOpts)); let forgetMigrationThread = new Thread( TenantMigrationUtil.forgetMigration, donorPrimary.host, migrationOpts.migrationIdString); forgetMigrationThread.start(); + + // Wait for to donorForgetMigration command to start. + assert.soon(() => { + const res = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})); + return res.inprog[0].expireAt != null; + }); + sleep(Math.random() * kMaxSleepTimeMS); interruptFunc(donorRst, migrationId, migrationOpts.tenantId); verifyCmdResponseFunc(forgetMigrationThread); @@ -152,19 +147,6 @@ function assertCmdSucceededOrInterruptedDueToShutDown(cmdThread) { } } -/** - * If the donor state doc for the migration 'migrationId' exists on the donor (i.e. the donor's - * primary stepped down or shut down after inserting the doc), asserts that the migration - * eventually commits. - */ -function testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId) { - const donorPrimary = donorRst.getPrimary(); - const configDonorsColl = donorPrimary.getCollection(kConfigDonorsNS); - if (configDonorsColl.count({_id: migrationId}) > 0) { - TenantMigrationUtil.waitForMigrationToCommit(donorRst.nodes, migrationId, tenantId); - } -} - (() => { jsTest.log("Test that the donorStartMigration command is interrupted successfully on stepdown"); testDonorStartMigrationInterrupt((donorRst) => { @@ -194,58 +176,4 @@ function testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantI donorRst.stopSet(); }, assertCmdSucceededOrInterruptedDueToShutDown); })(); - -(() => { - jsTest.log("Test that the migration resumes on stepup"); - testDonorStartMigrationInterrupt((donorRst, migrationId, tenantId) => { - // Use a short replSetStepDown seconds to make it more likely for the old primary to - // step back up. - assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true})); - - testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId); - }, assertCmdSucceededOrInterruptedDueToStepDown, 3 /* numDonorRsNodes */); -})(); - -(() => { - jsTest.log("Test that the migration resumes after restart"); - testDonorStartMigrationInterrupt((donorRst, migrationId, tenantId) => { - donorRst.stopSet(null /* signal */, true /*forRestart */); - donorRst.startSet({restart: true, setParameter: {enableTenantMigrations: true}}); - - testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId); - }, assertCmdSucceededOrInterruptedDueToShutDown, 3 /* numDonorRsNodes */); -})(); - -(() => { - jsTest.log("Test that the donorForgetMigration command can be retried on stepup"); - testDonorForgetMigrationInterrupt((donorRst, migrationId, tenantId) => { - let donorPrimary = donorRst.getPrimary(); - - // Use a short replSetStepDown seconds to make it more likely for the old primary to - // step back up. - assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true})); - - donorPrimary = donorRst.getPrimary(); - assert.commandWorked(TenantMigrationUtil.forgetMigration( - donorPrimary.host, extractUUIDFromObject(migrationId))); - - TenantMigrationUtil.waitForMigrationGarbageCollection( - donorRst.nodes, migrationId, tenantId); - }, assertCmdSucceededOrInterruptedDueToStepDown, 3 /* numDonorRsNodes */); -})(); - -(() => { - jsTest.log("Test that the donorForgetMigration command can be retried after restart"); - testDonorForgetMigrationInterrupt((donorRst, migrationId, tenantId) => { - donorRst.stopSet(null /* signal */, true /*forRestart */); - donorRst.startSet({restart: true, setParameter: {enableTenantMigrations: true}}); - - let donorPrimary = donorRst.getPrimary(); - assert.commandWorked(TenantMigrationUtil.forgetMigration( - donorPrimary.host, extractUUIDFromObject(migrationId))); - - TenantMigrationUtil.waitForMigrationGarbageCollection( - donorRst.nodes, migrationId, tenantId); - }, assertCmdSucceededOrInterruptedDueToShutDown, 3 /* numDonorRsNodes */); -})(); })(); diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js new file mode 100644 index 00000000000..19aaaf8398e --- /dev/null +++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js @@ -0,0 +1,213 @@ +/** + * Tests that tenant migrations resume successfully on stepup and restart. + * + * @tags: [requires_fcv_47, requires_majority_read_concern, requires_persistence, + * incompatible_with_eft] + */ + +(function() { +"use strict"; + +load("jstests/libs/parallelTester.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_util.js"); + +const kMaxSleepTimeMS = 100; +const kConfigDonorsNS = "config.tenantMigrationDonors"; +const kTenantId = "testTenantId"; + +// Set the delay before a donor state doc is garbage collected to be short to speed up the test. +const kGarbageCollectionDelayMS = 30 * 1000; + +// Set the TTL monitor to run at a smaller interval to speed up the test. +const kTTLMonitorSleepSecs = 1; + +/** + * If the donor state doc for the migration 'migrationId' exists on the donor (i.e. the donor's + * primary stepped down or shut down after inserting the doc), asserts that the migration + * eventually commits. + */ +function assertMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId) { + const donorPrimary = donorRst.getPrimary(); + const configDonorsColl = donorPrimary.getCollection(kConfigDonorsNS); + if (configDonorsColl.count({_id: migrationId}) > 0) { + TenantMigrationUtil.waitForMigrationToCommit(donorRst.nodes, migrationId, tenantId); + } +} + +/** + * Runs the donorStartMigration command to start a migration, and interrupts the migration on the + * donor using the 'interruptFunc', and asserts that migration eventually commits. + */ +function testDonorStartMigrationInterrupt(interruptFunc) { + const donorRst = new ReplSetTest( + {nodes: 3, name: "donorRst", nodeOptions: {setParameter: {enableTenantMigrations: true}}}); + const recipientRst = new ReplSetTest({ + nodes: 1, + name: "recipientRst", + nodeOptions: {setParameter: {enableTenantMigrations: true}} + }); + + donorRst.startSet(); + donorRst.initiate(); + + recipientRst.startSet(); + recipientRst.initiate(); + + const donorPrimary = donorRst.getPrimary(); + + const donorRstArgs = { + name: donorRst.name, + nodeHosts: donorRst.nodes.map(node => `127.0.0.1:${node.port}`), + nodeOptions: donorRst.nodeOptions, + keyFile: donorRst.keyFile, + host: donorRst.host, + waitForKeys: false, + }; + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + recipientConnString: recipientRst.getURL(), + tenantId: kTenantId, + readPreference: {mode: "primary"}, + }; + + let migrationThread = new Thread( + TenantMigrationUtil.startMigrationRetryOnRetryableErrors, donorRstArgs, migrationOpts); + migrationThread.start(); + + // Wait for to donorStartMigration command to start. + assert.soon(() => donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"}) + .inprog.length > 0); + + sleep(Math.random() * kMaxSleepTimeMS); + interruptFunc(donorRst); + + assert.commandWorked(migrationThread.returnData()); + assertMigrationCommitsIfDurableStateExists(donorRst, migrationId, migrationOpts.tenantId); + + donorRst.stopSet(); + recipientRst.stopSet(); +} + +/** + * Starts a migration and waits for it to commit, then runs the donorForgetMigration, and interrupts + * the donor using the 'interruptFunc', and asserts that the migration state is eventually garbage + * collected. + */ +function testDonorForgetMigrationInterrupt(interruptFunc) { + const donorRst = new ReplSetTest({ + nodes: 3, + name: "donorRst", + nodeOptions: { + setParameter: { + enableTenantMigrations: true, + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + } + }); + const recipientRst = new ReplSetTest({ + nodes: 1, + name: "recipientRst", + nodeOptions: { + setParameter: { + enableTenantMigrations: true, + tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS, + ttlMonitorSleepSecs: kTTLMonitorSleepSecs, + } + } + }); + + donorRst.startSet(); + donorRst.initiate(); + + recipientRst.startSet(); + recipientRst.initiate(); + + let donorPrimary = donorRst.getPrimary(); + + const donorRstArgs = { + name: donorRst.name, + nodeHosts: donorRst.nodes.map(node => `127.0.0.1:${node.port}`), + nodeOptions: donorRst.nodeOptions, + keyFile: donorRst.keyFile, + host: donorRst.host, + waitForKeys: false, + }; + + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + recipientConnString: recipientRst.getURL(), + tenantId: kTenantId, + readPreference: {mode: "primary"}, + }; + + donorPrimary.getCollection(kConfigDonorsNS).createIndex({expireAt: 1}, {expireAfterSeconds: 0}); + + assert.commandWorked(TenantMigrationUtil.startMigration(donorPrimary.host, migrationOpts)); + let forgetMigrationThread = + new Thread(TenantMigrationUtil.forgetMigrationRetryOnRetryableErrors, + donorRstArgs, + migrationOpts.migrationIdString); + forgetMigrationThread.start(); + + // Wait for to donorForgetMigration command to start. + assert.soon(() => { + const res = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})); + return res.inprog[0].expireAt != null; + }); + + sleep(Math.random() * kMaxSleepTimeMS); + interruptFunc(donorRst); + + donorPrimary = donorRst.getPrimary(); + assert.commandWorkedOrFailedWithCode( + TenantMigrationUtil.forgetMigration(donorPrimary.host, extractUUIDFromObject(migrationId)), + ErrorCodes.NoSuchTenantMigration); + + assert.commandWorked(forgetMigrationThread.returnData()); + TenantMigrationUtil.waitForMigrationGarbageCollection( + donorRst.nodes, migrationId, migrationOpts.tenantId); + + donorRst.stopSet(); + recipientRst.stopSet(); +} + +(() => { + jsTest.log("Test that the migration resumes on stepup"); + testDonorStartMigrationInterrupt((donorRst) => { + // Use a short replSetStepDown seconds to make it more likely for the old primary to + // step back up. + assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true})); + }); +})(); + +(() => { + jsTest.log("Test that the migration resumes after restart"); + testDonorStartMigrationInterrupt((donorRst) => { + donorRst.stopSet(null /* signal */, true /*forRestart */); + donorRst.startSet({restart: true}); + }); +})(); + +(() => { + jsTest.log("Test that the donorForgetMigration command can be retried on stepup"); + testDonorForgetMigrationInterrupt((donorRst) => { + // Use a short replSetStepDown seconds to make it more likely for the old primary to + // step back up. + assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true})); + }); +})(); + +(() => { + jsTest.log("Test that the donorForgetMigration command can be retried after restart"); + testDonorForgetMigrationInterrupt((donorRst) => { + donorRst.stopSet(null /* signal */, true /*forRestart */); + donorRst.startSet({restart: true}); + }); +})(); +})(); diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp index a2b2a4ed925..1478ad2cf9c 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp @@ -119,7 +119,7 @@ boost::optional<BSONObj> TenantMigrationDonorService::Instance::reportForCurrent bob.append("migrationCompleted", _completionPromise.getFuture().isReady()); bob.append("instanceID", _stateDoc.getId().toBSON()); bob.append("recipientConnectionString", _stateDoc.getRecipientConnectionString()); - bob.append("lastDurableState", _stateDoc.getState()); + bob.append("lastDurableState", _durableState.state); if (_stateDoc.getExpireAt()) { bob.append("expireAt", _stateDoc.getExpireAt()->toString()); } |