summaryrefslogtreecommitdiff
path: root/jstests/replsets
diff options
context:
space:
mode:
authorAndrew Shuvalov <andrew.shuvalov@mongodb.com>2021-02-19 17:28:24 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-03-11 15:57:49 +0000
commit8a139e9482632def38281aacddb6d12c02a6f26e (patch)
treea44e65ca4865bd232717b6af5c86847ff69e4d34 /jstests/replsets
parentab0c03e4ab5208339fd671305dce44a47c64fe12 (diff)
downloadmongo-8a139e9482632def38281aacddb6d12c02a6f26e.tar.gz
SERVER-54263: Add tenantMigrations serverStatus section
Diffstat (limited to 'jstests/replsets')
-rw-r--r--jstests/replsets/libs/tenant_migration_test.js47
-rw-r--r--jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js26
-rw-r--r--jstests/replsets/tenant_migration_donor_state_machine.js83
-rw-r--r--jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js23
4 files changed, 167 insertions, 12 deletions
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js
index 07819e60bfc..2ceb9d892b2 100644
--- a/jstests/replsets/libs/tenant_migration_test.js
+++ b/jstests/replsets/libs/tenant_migration_test.js
@@ -564,7 +564,7 @@ function TenantMigrationTest({
};
/**
- * Returns the TenantMigrationAccessBlocker associated with given the tenantId on the
+ * Returns the TenantMigrationAccessBlocker associated with the given tenantId on the
* node.
*/
this.getTenantMigrationAccessBlocker = function(node, tenantId) {
@@ -573,6 +573,51 @@ function TenantMigrationTest({
};
/**
+ * Returns the TenantMigrationStats on the node.
+ */
+ this.getTenantMigrationStats = function(node) {
+ return assert.commandWorked(node.adminCommand({serverStatus: 1})).tenantMigrations;
+ };
+
+ /**
+ * Awaits the condition when every stats counter reaches the specified count.
+ */
+ this.awaitTenantMigrationStatsCounts = function(node, {
+ currentMigrationsDonating = 0,
+ currentMigrationsReceiving = 0,
+ totalSuccessfulMigrationsDonated = 0,
+ totalSuccessfulMigrationsReceived = 0,
+ totalFailedMigrationsDonated = 0,
+ totalFailedMigrationsReceived = 0
+ }) {
+ const check = function(expectedVal, stats, fieldName) {
+ if (expectedVal == stats[fieldName]) {
+ return true; // Condition reached, true means the counter reached the target.
+ }
+ assert.gt(expectedVal,
+ stats[fieldName],
+ `Stat ${fieldName} value ${stats[fieldName]} exceeded the target`);
+ return false;
+ };
+ let stats;
+ assert.soon(() => {
+ stats = this.getTenantMigrationStats(node);
+ if (check(currentMigrationsDonating, stats, "currentMigrationsDonating") &&
+ check(currentMigrationsReceiving, stats, "currentMigrationsReceiving") &&
+ check(
+ totalSuccessfulMigrationsDonated, stats, "totalSuccessfulMigrationsDonated") &&
+ check(totalSuccessfulMigrationsReceived,
+ stats,
+ "totalSuccessfulMigrationsReceived") &&
+ check(totalFailedMigrationsDonated, stats, "totalFailedMigrationsDonated") &&
+ check(totalFailedMigrationsReceived, stats, "totalFailedMigrationsReceived")) {
+ return true; // Done.
+ }
+ return false;
+ }, `Awaiting for tenant migration stats to reach target, got ${tojson(stats)}`);
+ };
+
+ /**
* Returns the donor ReplSetTest.
*/
this.getDonorRst = function() {
diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
index 0e85ab72e0b..bad84337a06 100644
--- a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
+++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
@@ -36,7 +36,7 @@ const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest();
* Runs the donorStartMigration command to start a migration, and interrupts the migration on the
* donor using the 'interruptFunc', and asserts that migration eventually commits.
*/
-function testDonorStartMigrationInterrupt(interruptFunc) {
+function testDonorStartMigrationInterrupt(interruptFunc, donorRestarted) {
const donorRst =
new ReplSetTest({nodes: 3, name: "donorRst", nodeOptions: migrationX509Options.donor});
@@ -51,7 +51,8 @@ function testDonorStartMigrationInterrupt(interruptFunc) {
donorRst.stopSet();
return;
}
- const donorPrimary = tenantMigrationTest.getDonorPrimary();
+ let donorPrimary = tenantMigrationTest.getDonorPrimary();
+ const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
const migrationId = UUID();
const migrationOpts = {
@@ -82,6 +83,23 @@ function testDonorStartMigrationInterrupt(interruptFunc) {
TenantMigrationTest.DonorState.kCommitted);
assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
+ donorPrimary = tenantMigrationTest.getDonorPrimary(); // Could change after interrupt.
+ const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary);
+ jsTestLog(`Stats at the donor primary: ${tojson(donorStats)}`);
+ if (donorRestarted) {
+ // If full restart happened the count could be lost completely.
+ assert.gte(1, donorStats.totalSuccessfulMigrationsDonated);
+ } else {
+ // The double counting happens when the failover happens after migration completes
+ // but before the state doc GC mark is persisted. While this test is targeting this
+ // scenario it is low probability in production.
+ assert(1 == donorStats.totalSuccessfulMigrationsDonated ||
+ 2 == donorStats.totalSuccessfulMigrationsDonated);
+ }
+ // Skip checking the stats on the recipient since enableRecipientTesting is false
+ // so the recipient is forced to respond to recipientSyncData without starting the
+ // migration.
+
tenantMigrationTest.stop();
donorRst.stopSet();
}
@@ -377,7 +395,7 @@ function testStateDocPersistenceOnFailover(interruptFunc, fpName, isShutdown = f
assert.commandWorked(
donorPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0}));
- });
+ }, false /* donor restarted */);
})();
(() => {
@@ -385,7 +403,7 @@ function testStateDocPersistenceOnFailover(interruptFunc, fpName, isShutdown = f
testDonorStartMigrationInterrupt((donorRst) => {
donorRst.stopSet(null /* signal */, true /*forRestart */);
donorRst.startSet({restart: true});
- });
+ }, true /* donor restarted */);
})();
(() => {
diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js
index 0eb11a3b70b..433eb51bd25 100644
--- a/jstests/replsets/tenant_migration_donor_state_machine.js
+++ b/jstests/replsets/tenant_migration_donor_state_machine.js
@@ -20,6 +20,7 @@ load("jstests/replsets/libs/tenant_migration_test.js");
let expectedNumRecipientSyncDataCmdSent = 0;
let expectedNumRecipientForgetMigrationCmdSent = 0;
+let expectedRecipientSyncDataMetricsFailed = 0;
/**
* Runs the donorForgetMigration command and asserts that the TenantMigrationAccessBlocker and donor
@@ -118,6 +119,24 @@ const kTenantId = "testDb";
let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS);
+function testStats(node, {
+ currentMigrationsDonating = 0,
+ currentMigrationsReceiving = 0,
+ totalSuccessfulMigrationsDonated = 0,
+ totalSuccessfulMigrationsReceived = 0,
+ totalFailedMigrationsDonated = 0,
+ totalFailedMigrationsReceived = 0
+}) {
+ const stats = tenantMigrationTest.getTenantMigrationStats(node);
+ jsTestLog(stats);
+ assert.eq(currentMigrationsDonating, stats.currentMigrationsDonating);
+ assert.eq(currentMigrationsReceiving, stats.currentMigrationsReceiving);
+ assert.eq(totalSuccessfulMigrationsDonated, stats.totalSuccessfulMigrationsDonated);
+ assert.eq(totalSuccessfulMigrationsReceived, stats.totalSuccessfulMigrationsReceived);
+ assert.eq(totalFailedMigrationsDonated, stats.totalFailedMigrationsDonated);
+ assert.eq(totalFailedMigrationsReceived, stats.totalFailedMigrationsReceived);
+}
+
(() => {
jsTest.log("Test the case where the migration commits");
const migrationId = UUID();
@@ -153,6 +172,9 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon
donorPrimary.adminCommand({donorForgetMigration: 1, migrationId: migrationId}),
ErrorCodes.TenantMigrationInProgress);
+ testStats(donorPrimary, {currentMigrationsDonating: 1});
+ testStats(recipientPrimary, {currentMigrationsReceiving: 1});
+
// Allow the migration to complete.
blockingFp.off();
const stateRes =
@@ -178,6 +200,56 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon
assert.eq(recipientSyncDataMetrics.total, expectedNumRecipientSyncDataCmdSent);
testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId);
+
+ testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1});
+ testStats(recipientPrimary, {totalSuccessfulMigrationsReceived: 1});
+})();
+
+(() => {
+ jsTest.log(
+ "Test the case where the migration aborts after data becomes consistent on the recipient " +
+ "but before setting the consistent promise.");
+ const migrationId = UUID();
+ const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(migrationId),
+ tenantId: kTenantId,
+ };
+
+ let abortRecipientFp =
+ configureFailPoint(recipientPrimary,
+ "fpBeforeFulfillingDataConsistentPromise",
+ {action: "stop", stopErrorCode: ErrorCodes.InternalError});
+ const stateRes = assert.commandWorked(tenantMigrationTest.runMigration(
+ migrationOpts, false /* retryOnRetryableErrors */, false /* automaticForgetMigration */));
+ assert.eq(stateRes.state, TenantMigrationTest.DonorState.kAborted);
+ abortRecipientFp.off();
+
+ const donorDoc = configDonorsColl.findOne({tenantId: kTenantId});
+ const abortOplogEntry = donorPrimary.getDB("local").oplog.rs.findOne(
+ {ns: TenantMigrationTest.kConfigDonorsNS, op: "u", o: donorDoc});
+ assert.eq(donorDoc.state, TenantMigrationTest.DonorState.kAborted);
+ assert.eq(donorDoc.commitOrAbortOpTime.ts, abortOplogEntry.ts);
+ assert.eq(donorDoc.abortReason.code, ErrorCodes.InternalError);
+
+ let mtabs;
+ assert.soon(() => {
+ mtabs = donorPrimary.adminCommand({serverStatus: 1}).tenantMigrationAccessBlocker;
+ return mtabs[kTenantId].state === TenantMigrationTest.DonorAccessState.kAborted;
+ });
+ assert(mtabs[kTenantId].abortOpTime);
+
+ expectedRecipientSyncDataMetricsFailed++;
+ expectedNumRecipientSyncDataCmdSent++;
+ const recipientSyncDataMetrics =
+ recipientPrimary.adminCommand({serverStatus: 1}).metrics.commands.recipientSyncData;
+ assert.eq(recipientSyncDataMetrics.failed, expectedRecipientSyncDataMetricsFailed);
+ assert.eq(recipientSyncDataMetrics.total, expectedNumRecipientSyncDataCmdSent);
+
+ testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId);
+
+ testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 1});
+ testStats(recipientPrimary,
+ {totalSuccessfulMigrationsReceived: 1, totalFailedMigrationsReceived: 1});
})();
(() => {
@@ -188,12 +260,12 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon
tenantId: kTenantId,
};
- let abortFp =
+ let abortDonorFp =
configureFailPoint(donorPrimary, "abortTenantMigrationBeforeLeavingBlockingState");
const stateRes = assert.commandWorked(tenantMigrationTest.runMigration(
migrationOpts, false /* retryOnRetryableErrors */, false /* automaticForgetMigration */));
assert.eq(stateRes.state, TenantMigrationTest.DonorState.kAborted);
- abortFp.off();
+ abortDonorFp.off();
const donorDoc = configDonorsColl.findOne({tenantId: kTenantId});
const abortOplogEntry = donorPrimary.getDB("local").oplog.rs.findOne(
@@ -212,10 +284,15 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon
expectedNumRecipientSyncDataCmdSent += 2;
const recipientSyncDataMetrics =
recipientPrimary.adminCommand({serverStatus: 1}).metrics.commands.recipientSyncData;
- assert.eq(recipientSyncDataMetrics.failed, 0);
+ assert.eq(recipientSyncDataMetrics.failed, expectedRecipientSyncDataMetricsFailed);
assert.eq(recipientSyncDataMetrics.total, expectedNumRecipientSyncDataCmdSent);
testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId);
+
+ testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 2});
+ // The recipient had a chance to synchronize data and from its side the migration succeeded.
+ testStats(recipientPrimary,
+ {totalSuccessfulMigrationsReceived: 2, totalFailedMigrationsReceived: 1});
})();
// Drop the TTL index to make sure that the migration state is still available when the
diff --git a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
index 3aafeaab1e4..3bbdf02f43f 100644
--- a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
+++ b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
@@ -30,8 +30,9 @@ const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest();
* Runs the donorStartMigration command to start a migration, and interrupts the migration on the
* recipient using the 'interruptFunc' after the migration starts on the recipient side, and
* asserts that migration eventually commits.
+ * @param {recipientRestarted} bool is needed to properly assert the tenant migrations stat count.
*/
-function testRecipientSyncDataInterrupt(interruptFunc) {
+function testRecipientSyncDataInterrupt(interruptFunc, recipientRestarted) {
const recipientRst = new ReplSetTest(
{nodes: 3, name: "recipientRst", nodeOptions: migrationX509Options.recipient});
recipientRst.startSet();
@@ -44,7 +45,8 @@ function testRecipientSyncDataInterrupt(interruptFunc) {
return;
}
const donorRst = tenantMigrationTest.getDonorRst();
- const recipientPrimary = tenantMigrationTest.getRecipientPrimary();
+ const donorPrimary = tenantMigrationTest.getDonorPrimary();
+ let recipientPrimary = tenantMigrationTest.getRecipientPrimary();
const migrationId = UUID();
const migrationOpts = {
@@ -76,6 +78,19 @@ function testRecipientSyncDataInterrupt(interruptFunc) {
TenantMigrationTest.DonorState.kCommitted);
assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
+ tenantMigrationTest.awaitTenantMigrationStatsCounts(donorPrimary,
+ {totalSuccessfulMigrationsDonated: 1});
+ recipientPrimary = tenantMigrationTest.getRecipientPrimary(); // Could change after interrupt.
+ if (!recipientRestarted) {
+ tenantMigrationTest.awaitTenantMigrationStatsCounts(recipientPrimary,
+ {totalSuccessfulMigrationsReceived: 1});
+ } else {
+ // In full restart the count could be lost completely.
+ const stats = tenantMigrationTest.getTenantMigrationStats(recipientPrimary);
+ assert(1 == stats.totalSuccessfulMigrationsReceived ||
+ 0 == stats.totalSuccessfulMigrationsReceived);
+ }
+
tenantMigrationTest.stop();
recipientRst.stopSet();
}
@@ -169,7 +184,7 @@ function testRecipientForgetMigrationInterrupt(interruptFunc) {
assert.commandWorked(recipientPrimary.adminCommand(
{replSetStepDown: ReplSetTest.kForeverSecs, force: true}));
assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0}));
- });
+ }, false);
})();
(() => {
@@ -177,7 +192,7 @@ function testRecipientForgetMigrationInterrupt(interruptFunc) {
testRecipientSyncDataInterrupt((recipientRst) => {
recipientRst.stopSet(null /* signal */, true /*forRestart */);
recipientRst.startSet({restart: true});
- });
+ }, true);
})();
(() => {