summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCheahuychou Mao <mao.cheahuychou@gmail.com>2020-10-06 21:35:01 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-10-15 23:18:05 +0000
commitb69f93de867415ea1e700fbea462399ed63b309b (patch)
treea1a7aeeaf08f4df1b19daf69398d066559654b57
parent0c904d2a3d9028d171961d88178e3f15c2d5a1e0 (diff)
downloadmongo-b69f93de867415ea1e700fbea462399ed63b309b.tar.gz
SERVER-51332 tenant_migration_donor_failover_and_shutdown.js should wait for the primary to receive the command before shutting down the node
-rw-r--r--jstests/replsets/tenant_migration_donor_interrupt_on_stepdown_and_shutdown.js (renamed from jstests/replsets/tenant_migration_donor_failover_and_shutdown.js)112
-rw-r--r--jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js213
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_service.cpp2
3 files changed, 234 insertions, 93 deletions
diff --git a/jstests/replsets/tenant_migration_donor_failover_and_shutdown.js b/jstests/replsets/tenant_migration_donor_interrupt_on_stepdown_and_shutdown.js
index 36b3a0e9e6c..d79de93afbf 100644
--- a/jstests/replsets/tenant_migration_donor_failover_and_shutdown.js
+++ b/jstests/replsets/tenant_migration_donor_interrupt_on_stepdown_and_shutdown.js
@@ -1,5 +1,5 @@
/**
- * Tests that the migration is interrupted successfully on stepdown and shutdown.
+ * Tests that tenant migrations are interrupted successfully on stepdown and shutdown.
*
* @tags: [requires_fcv_47, requires_majority_read_concern, requires_persistence,
* incompatible_with_eft]
@@ -12,28 +12,17 @@ load("jstests/libs/parallelTester.js");
load("jstests/libs/uuid_util.js");
load("jstests/replsets/libs/tenant_migration_util.js");
-const kMaxSleepTimeMS = 1000;
-const kConfigDonorsNS = "config.tenantMigrationDonors";
+const kMaxSleepTimeMS = 100;
const kTenantId = "testTenantId";
-// Set the delay before a donor state doc is garbage collected to be short to speed up the test.
-const kGarbageCollectionDelayMS = 30 * 1000;
-
-// Set the TTL monitor to run at a smaller interval to speed up the test.
-const kTTLMonitorSleepSecs = 1;
-
/**
* Runs the donorStartMigration command to start a migration, and interrupts the migration on the
* donor using the 'interruptFunc', and verifies the command response using the
* 'verifyCmdResponseFunc'.
*/
-function testDonorStartMigrationInterrupt(
- interruptFunc, verifyCmdResponseFunc, numDonorRsNodes = 1) {
- const donorRst = new ReplSetTest({
- nodes: numDonorRsNodes,
- name: "donorRst",
- nodeOptions: {setParameter: {enableTenantMigrations: true}}
- });
+function testDonorStartMigrationInterrupt(interruptFunc, verifyCmdResponseFunc) {
+ const donorRst = new ReplSetTest(
+ {nodes: 1, name: "donorRst", nodeOptions: {setParameter: {enableTenantMigrations: true}}});
const recipientRst = new ReplSetTest({
nodes: 1,
name: "recipientRst",
@@ -59,6 +48,11 @@ function testDonorStartMigrationInterrupt(
let migrationThread =
new Thread(TenantMigrationUtil.startMigration, donorPrimary.host, migrationOpts);
migrationThread.start();
+
+ // Wait for to donorStartMigration command to start.
+ assert.soon(() => donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})
+ .inprog.length > 0);
+
sleep(Math.random() * kMaxSleepTimeMS);
interruptFunc(donorRst, migrationId, migrationOpts.tenantId);
verifyCmdResponseFunc(migrationThread);
@@ -72,16 +66,13 @@ function testDonorStartMigrationInterrupt(
* the donor using the 'interruptFunc', and verifies the command response using the
* 'verifyCmdResponseFunc'.
*/
-function testDonorForgetMigrationInterrupt(
- interruptFunc, verifyCmdResponseFunc, numDonorRsNodes = 1) {
+function testDonorForgetMigrationInterrupt(interruptFunc, verifyCmdResponseFunc) {
const donorRst = new ReplSetTest({
- nodes: numDonorRsNodes,
+ nodes: 1,
name: "donorRst",
nodeOptions: {
setParameter: {
enableTenantMigrations: true,
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
}
}
});
@@ -91,8 +82,6 @@ function testDonorForgetMigrationInterrupt(
nodeOptions: {
setParameter: {
enableTenantMigrations: true,
- tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
- ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
}
}
});
@@ -113,12 +102,18 @@ function testDonorForgetMigrationInterrupt(
readPreference: {mode: "primary"},
};
- donorPrimary.getCollection(kConfigDonorsNS).createIndex({expireAt: 1}, {expireAfterSeconds: 0});
-
assert.commandWorked(TenantMigrationUtil.startMigration(donorPrimary.host, migrationOpts));
let forgetMigrationThread = new Thread(
TenantMigrationUtil.forgetMigration, donorPrimary.host, migrationOpts.migrationIdString);
forgetMigrationThread.start();
+
+ // Wait for to donorForgetMigration command to start.
+ assert.soon(() => {
+ const res = assert.commandWorked(
+ donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"}));
+ return res.inprog[0].expireAt != null;
+ });
+
sleep(Math.random() * kMaxSleepTimeMS);
interruptFunc(donorRst, migrationId, migrationOpts.tenantId);
verifyCmdResponseFunc(forgetMigrationThread);
@@ -152,19 +147,6 @@ function assertCmdSucceededOrInterruptedDueToShutDown(cmdThread) {
}
}
-/**
- * If the donor state doc for the migration 'migrationId' exists on the donor (i.e. the donor's
- * primary stepped down or shut down after inserting the doc), asserts that the migration
- * eventually commits.
- */
-function testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId) {
- const donorPrimary = donorRst.getPrimary();
- const configDonorsColl = donorPrimary.getCollection(kConfigDonorsNS);
- if (configDonorsColl.count({_id: migrationId}) > 0) {
- TenantMigrationUtil.waitForMigrationToCommit(donorRst.nodes, migrationId, tenantId);
- }
-}
-
(() => {
jsTest.log("Test that the donorStartMigration command is interrupted successfully on stepdown");
testDonorStartMigrationInterrupt((donorRst) => {
@@ -194,58 +176,4 @@ function testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantI
donorRst.stopSet();
}, assertCmdSucceededOrInterruptedDueToShutDown);
})();
-
-(() => {
- jsTest.log("Test that the migration resumes on stepup");
- testDonorStartMigrationInterrupt((donorRst, migrationId, tenantId) => {
- // Use a short replSetStepDown seconds to make it more likely for the old primary to
- // step back up.
- assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true}));
-
- testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId);
- }, assertCmdSucceededOrInterruptedDueToStepDown, 3 /* numDonorRsNodes */);
-})();
-
-(() => {
- jsTest.log("Test that the migration resumes after restart");
- testDonorStartMigrationInterrupt((donorRst, migrationId, tenantId) => {
- donorRst.stopSet(null /* signal */, true /*forRestart */);
- donorRst.startSet({restart: true, setParameter: {enableTenantMigrations: true}});
-
- testMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId);
- }, assertCmdSucceededOrInterruptedDueToShutDown, 3 /* numDonorRsNodes */);
-})();
-
-(() => {
- jsTest.log("Test that the donorForgetMigration command can be retried on stepup");
- testDonorForgetMigrationInterrupt((donorRst, migrationId, tenantId) => {
- let donorPrimary = donorRst.getPrimary();
-
- // Use a short replSetStepDown seconds to make it more likely for the old primary to
- // step back up.
- assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true}));
-
- donorPrimary = donorRst.getPrimary();
- assert.commandWorked(TenantMigrationUtil.forgetMigration(
- donorPrimary.host, extractUUIDFromObject(migrationId)));
-
- TenantMigrationUtil.waitForMigrationGarbageCollection(
- donorRst.nodes, migrationId, tenantId);
- }, assertCmdSucceededOrInterruptedDueToStepDown, 3 /* numDonorRsNodes */);
-})();
-
-(() => {
- jsTest.log("Test that the donorForgetMigration command can be retried after restart");
- testDonorForgetMigrationInterrupt((donorRst, migrationId, tenantId) => {
- donorRst.stopSet(null /* signal */, true /*forRestart */);
- donorRst.startSet({restart: true, setParameter: {enableTenantMigrations: true}});
-
- let donorPrimary = donorRst.getPrimary();
- assert.commandWorked(TenantMigrationUtil.forgetMigration(
- donorPrimary.host, extractUUIDFromObject(migrationId)));
-
- TenantMigrationUtil.waitForMigrationGarbageCollection(
- donorRst.nodes, migrationId, tenantId);
- }, assertCmdSucceededOrInterruptedDueToShutDown, 3 /* numDonorRsNodes */);
-})();
})();
diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
new file mode 100644
index 00000000000..19aaaf8398e
--- /dev/null
+++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
@@ -0,0 +1,213 @@
+/**
+ * Tests that tenant migrations resume successfully on stepup and restart.
+ *
+ * @tags: [requires_fcv_47, requires_majority_read_concern, requires_persistence,
+ * incompatible_with_eft]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/parallelTester.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
+
+const kMaxSleepTimeMS = 100;
+const kConfigDonorsNS = "config.tenantMigrationDonors";
+const kTenantId = "testTenantId";
+
+// Set the delay before a donor state doc is garbage collected to be short to speed up the test.
+const kGarbageCollectionDelayMS = 30 * 1000;
+
+// Set the TTL monitor to run at a smaller interval to speed up the test.
+const kTTLMonitorSleepSecs = 1;
+
+/**
+ * If the donor state doc for the migration 'migrationId' exists on the donor (i.e. the donor's
+ * primary stepped down or shut down after inserting the doc), asserts that the migration
+ * eventually commits.
+ */
+function assertMigrationCommitsIfDurableStateExists(donorRst, migrationId, tenantId) {
+ const donorPrimary = donorRst.getPrimary();
+ const configDonorsColl = donorPrimary.getCollection(kConfigDonorsNS);
+ if (configDonorsColl.count({_id: migrationId}) > 0) {
+ TenantMigrationUtil.waitForMigrationToCommit(donorRst.nodes, migrationId, tenantId);
+ }
+}
+
+/**
+ * Runs the donorStartMigration command to start a migration, and interrupts the migration on the
+ * donor using the 'interruptFunc', and asserts that migration eventually commits.
+ */
+function testDonorStartMigrationInterrupt(interruptFunc) {
+ const donorRst = new ReplSetTest(
+ {nodes: 3, name: "donorRst", nodeOptions: {setParameter: {enableTenantMigrations: true}}});
+ const recipientRst = new ReplSetTest({
+ nodes: 1,
+ name: "recipientRst",
+ nodeOptions: {setParameter: {enableTenantMigrations: true}}
+ });
+
+ donorRst.startSet();
+ donorRst.initiate();
+
+ recipientRst.startSet();
+ recipientRst.initiate();
+
+ const donorPrimary = donorRst.getPrimary();
+
+ const donorRstArgs = {
+ name: donorRst.name,
+ nodeHosts: donorRst.nodes.map(node => `127.0.0.1:${node.port}`),
+ nodeOptions: donorRst.nodeOptions,
+ keyFile: donorRst.keyFile,
+ host: donorRst.host,
+ waitForKeys: false,
+ };
+
+ const migrationId = UUID();
+ const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ recipientConnString: recipientRst.getURL(),
+ tenantId: kTenantId,
+ readPreference: {mode: "primary"},
+ };
+
+ let migrationThread = new Thread(
+ TenantMigrationUtil.startMigrationRetryOnRetryableErrors, donorRstArgs, migrationOpts);
+ migrationThread.start();
+
+ // Wait for to donorStartMigration command to start.
+ assert.soon(() => donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})
+ .inprog.length > 0);
+
+ sleep(Math.random() * kMaxSleepTimeMS);
+ interruptFunc(donorRst);
+
+ assert.commandWorked(migrationThread.returnData());
+ assertMigrationCommitsIfDurableStateExists(donorRst, migrationId, migrationOpts.tenantId);
+
+ donorRst.stopSet();
+ recipientRst.stopSet();
+}
+
+/**
+ * Starts a migration and waits for it to commit, then runs the donorForgetMigration, and interrupts
+ * the donor using the 'interruptFunc', and asserts that the migration state is eventually garbage
+ * collected.
+ */
+function testDonorForgetMigrationInterrupt(interruptFunc) {
+ const donorRst = new ReplSetTest({
+ nodes: 3,
+ name: "donorRst",
+ nodeOptions: {
+ setParameter: {
+ enableTenantMigrations: true,
+ tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
+ ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
+ }
+ }
+ });
+ const recipientRst = new ReplSetTest({
+ nodes: 1,
+ name: "recipientRst",
+ nodeOptions: {
+ setParameter: {
+ enableTenantMigrations: true,
+ tenantMigrationGarbageCollectionDelayMS: kGarbageCollectionDelayMS,
+ ttlMonitorSleepSecs: kTTLMonitorSleepSecs,
+ }
+ }
+ });
+
+ donorRst.startSet();
+ donorRst.initiate();
+
+ recipientRst.startSet();
+ recipientRst.initiate();
+
+ let donorPrimary = donorRst.getPrimary();
+
+ const donorRstArgs = {
+ name: donorRst.name,
+ nodeHosts: donorRst.nodes.map(node => `127.0.0.1:${node.port}`),
+ nodeOptions: donorRst.nodeOptions,
+ keyFile: donorRst.keyFile,
+ host: donorRst.host,
+ waitForKeys: false,
+ };
+
+ const migrationId = UUID();
+ const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ recipientConnString: recipientRst.getURL(),
+ tenantId: kTenantId,
+ readPreference: {mode: "primary"},
+ };
+
+ donorPrimary.getCollection(kConfigDonorsNS).createIndex({expireAt: 1}, {expireAfterSeconds: 0});
+
+ assert.commandWorked(TenantMigrationUtil.startMigration(donorPrimary.host, migrationOpts));
+ let forgetMigrationThread =
+ new Thread(TenantMigrationUtil.forgetMigrationRetryOnRetryableErrors,
+ donorRstArgs,
+ migrationOpts.migrationIdString);
+ forgetMigrationThread.start();
+
+ // Wait for to donorForgetMigration command to start.
+ assert.soon(() => {
+ const res = assert.commandWorked(
+ donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"}));
+ return res.inprog[0].expireAt != null;
+ });
+
+ sleep(Math.random() * kMaxSleepTimeMS);
+ interruptFunc(donorRst);
+
+ donorPrimary = donorRst.getPrimary();
+ assert.commandWorkedOrFailedWithCode(
+ TenantMigrationUtil.forgetMigration(donorPrimary.host, extractUUIDFromObject(migrationId)),
+ ErrorCodes.NoSuchTenantMigration);
+
+ assert.commandWorked(forgetMigrationThread.returnData());
+ TenantMigrationUtil.waitForMigrationGarbageCollection(
+ donorRst.nodes, migrationId, migrationOpts.tenantId);
+
+ donorRst.stopSet();
+ recipientRst.stopSet();
+}
+
+(() => {
+ jsTest.log("Test that the migration resumes on stepup");
+ testDonorStartMigrationInterrupt((donorRst) => {
+ // Use a short replSetStepDown seconds to make it more likely for the old primary to
+ // step back up.
+ assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true}));
+ });
+})();
+
+(() => {
+ jsTest.log("Test that the migration resumes after restart");
+ testDonorStartMigrationInterrupt((donorRst) => {
+ donorRst.stopSet(null /* signal */, true /*forRestart */);
+ donorRst.startSet({restart: true});
+ });
+})();
+
+(() => {
+ jsTest.log("Test that the donorForgetMigration command can be retried on stepup");
+ testDonorForgetMigrationInterrupt((donorRst) => {
+ // Use a short replSetStepDown seconds to make it more likely for the old primary to
+ // step back up.
+ assert.commandWorked(donorRst.getPrimary().adminCommand({replSetStepDown: 1, force: true}));
+ });
+})();
+
+(() => {
+ jsTest.log("Test that the donorForgetMigration command can be retried after restart");
+ testDonorForgetMigrationInterrupt((donorRst) => {
+ donorRst.stopSet(null /* signal */, true /*forRestart */);
+ donorRst.startSet({restart: true});
+ });
+})();
+})();
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp
index a2b2a4ed925..1478ad2cf9c 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp
@@ -119,7 +119,7 @@ boost::optional<BSONObj> TenantMigrationDonorService::Instance::reportForCurrent
bob.append("migrationCompleted", _completionPromise.getFuture().isReady());
bob.append("instanceID", _stateDoc.getId().toBSON());
bob.append("recipientConnectionString", _stateDoc.getRecipientConnectionString());
- bob.append("lastDurableState", _stateDoc.getState());
+ bob.append("lastDurableState", _durableState.state);
if (_stateDoc.getExpireAt()) {
bob.append("expireAt", _stateDoc.getExpireAt()->toString());
}