diff options
author | Andrew Shuvalov <andrew.shuvalov@mongodb.com> | 2021-02-19 17:28:24 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-03-11 15:57:49 +0000 |
commit | 8a139e9482632def38281aacddb6d12c02a6f26e (patch) | |
tree | a44e65ca4865bd232717b6af5c86847ff69e4d34 /jstests/replsets | |
parent | ab0c03e4ab5208339fd671305dce44a47c64fe12 (diff) | |
download | mongo-8a139e9482632def38281aacddb6d12c02a6f26e.tar.gz |
SERVER-54263: Add tenantMigrations serverStatus section
Diffstat (limited to 'jstests/replsets')
4 files changed, 167 insertions, 12 deletions
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js index 07819e60bfc..2ceb9d892b2 100644 --- a/jstests/replsets/libs/tenant_migration_test.js +++ b/jstests/replsets/libs/tenant_migration_test.js @@ -564,7 +564,7 @@ function TenantMigrationTest({ }; /** - * Returns the TenantMigrationAccessBlocker associated with given the tenantId on the + * Returns the TenantMigrationAccessBlocker associated with the given tenantId on the * node. */ this.getTenantMigrationAccessBlocker = function(node, tenantId) { @@ -573,6 +573,51 @@ function TenantMigrationTest({ }; /** + * Returns the TenantMigrationStats on the node. + */ + this.getTenantMigrationStats = function(node) { + return assert.commandWorked(node.adminCommand({serverStatus: 1})).tenantMigrations; + }; + + /** + * Awaits the condition when every stats counter reaches the specified count. + */ + this.awaitTenantMigrationStatsCounts = function(node, { + currentMigrationsDonating = 0, + currentMigrationsReceiving = 0, + totalSuccessfulMigrationsDonated = 0, + totalSuccessfulMigrationsReceived = 0, + totalFailedMigrationsDonated = 0, + totalFailedMigrationsReceived = 0 + }) { + const check = function(expectedVal, stats, fieldName) { + if (expectedVal == stats[fieldName]) { + return true; // Condition reached, true means the counter reached the target. + } + assert.gt(expectedVal, + stats[fieldName], + `Stat ${fieldName} value ${stats[fieldName]} exceeded the target`); + return false; + }; + let stats; + assert.soon(() => { + stats = this.getTenantMigrationStats(node); + if (check(currentMigrationsDonating, stats, "currentMigrationsDonating") && + check(currentMigrationsReceiving, stats, "currentMigrationsReceiving") && + check( + totalSuccessfulMigrationsDonated, stats, "totalSuccessfulMigrationsDonated") && + check(totalSuccessfulMigrationsReceived, + stats, + "totalSuccessfulMigrationsReceived") && + check(totalFailedMigrationsDonated, stats, "totalFailedMigrationsDonated") && + check(totalFailedMigrationsReceived, stats, "totalFailedMigrationsReceived")) { + return true; // Done. + } + return false; + }, `Awaiting for tenant migration stats to reach target, got ${tojson(stats)}`); + }; + + /** * Returns the donor ReplSetTest. */ this.getDonorRst = function() { diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js index 0e85ab72e0b..bad84337a06 100644 --- a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js +++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js @@ -36,7 +36,7 @@ const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest(); * Runs the donorStartMigration command to start a migration, and interrupts the migration on the * donor using the 'interruptFunc', and asserts that migration eventually commits. */ -function testDonorStartMigrationInterrupt(interruptFunc) { +function testDonorStartMigrationInterrupt(interruptFunc, donorRestarted) { const donorRst = new ReplSetTest({nodes: 3, name: "donorRst", nodeOptions: migrationX509Options.donor}); @@ -51,7 +51,8 @@ function testDonorStartMigrationInterrupt(interruptFunc) { donorRst.stopSet(); return; } - const donorPrimary = tenantMigrationTest.getDonorPrimary(); + let donorPrimary = tenantMigrationTest.getDonorPrimary(); + const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); const migrationId = UUID(); const migrationOpts = { @@ -82,6 +83,23 @@ function testDonorStartMigrationInterrupt(interruptFunc) { TenantMigrationTest.DonorState.kCommitted); assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + donorPrimary = tenantMigrationTest.getDonorPrimary(); // Could change after interrupt. + const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary); + jsTestLog(`Stats at the donor primary: ${tojson(donorStats)}`); + if (donorRestarted) { + // If full restart happened the count could be lost completely. + assert.gte(1, donorStats.totalSuccessfulMigrationsDonated); + } else { + // The double counting happens when the failover happens after migration completes + // but before the state doc GC mark is persisted. While this test is targeting this + // scenario it is low probability in production. + assert(1 == donorStats.totalSuccessfulMigrationsDonated || + 2 == donorStats.totalSuccessfulMigrationsDonated); + } + // Skip checking the stats on the recipient since enableRecipientTesting is false + // so the recipient is forced to respond to recipientSyncData without starting the + // migration. + tenantMigrationTest.stop(); donorRst.stopSet(); } @@ -377,7 +395,7 @@ function testStateDocPersistenceOnFailover(interruptFunc, fpName, isShutdown = f assert.commandWorked( donorPrimary.adminCommand({replSetStepDown: ReplSetTest.kForeverSecs, force: true})); assert.commandWorked(donorPrimary.adminCommand({replSetFreeze: 0})); - }); + }, false /* donor restarted */); })(); (() => { @@ -385,7 +403,7 @@ function testStateDocPersistenceOnFailover(interruptFunc, fpName, isShutdown = f testDonorStartMigrationInterrupt((donorRst) => { donorRst.stopSet(null /* signal */, true /*forRestart */); donorRst.startSet({restart: true}); - }); + }, true /* donor restarted */); })(); (() => { diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js index 0eb11a3b70b..433eb51bd25 100644 --- a/jstests/replsets/tenant_migration_donor_state_machine.js +++ b/jstests/replsets/tenant_migration_donor_state_machine.js @@ -20,6 +20,7 @@ load("jstests/replsets/libs/tenant_migration_test.js"); let expectedNumRecipientSyncDataCmdSent = 0; let expectedNumRecipientForgetMigrationCmdSent = 0; +let expectedRecipientSyncDataMetricsFailed = 0; /** * Runs the donorForgetMigration command and asserts that the TenantMigrationAccessBlocker and donor @@ -118,6 +119,24 @@ const kTenantId = "testDb"; let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS); +function testStats(node, { + currentMigrationsDonating = 0, + currentMigrationsReceiving = 0, + totalSuccessfulMigrationsDonated = 0, + totalSuccessfulMigrationsReceived = 0, + totalFailedMigrationsDonated = 0, + totalFailedMigrationsReceived = 0 +}) { + const stats = tenantMigrationTest.getTenantMigrationStats(node); + jsTestLog(stats); + assert.eq(currentMigrationsDonating, stats.currentMigrationsDonating); + assert.eq(currentMigrationsReceiving, stats.currentMigrationsReceiving); + assert.eq(totalSuccessfulMigrationsDonated, stats.totalSuccessfulMigrationsDonated); + assert.eq(totalSuccessfulMigrationsReceived, stats.totalSuccessfulMigrationsReceived); + assert.eq(totalFailedMigrationsDonated, stats.totalFailedMigrationsDonated); + assert.eq(totalFailedMigrationsReceived, stats.totalFailedMigrationsReceived); +} + (() => { jsTest.log("Test the case where the migration commits"); const migrationId = UUID(); @@ -153,6 +172,9 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon donorPrimary.adminCommand({donorForgetMigration: 1, migrationId: migrationId}), ErrorCodes.TenantMigrationInProgress); + testStats(donorPrimary, {currentMigrationsDonating: 1}); + testStats(recipientPrimary, {currentMigrationsReceiving: 1}); + // Allow the migration to complete. blockingFp.off(); const stateRes = @@ -178,6 +200,56 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon assert.eq(recipientSyncDataMetrics.total, expectedNumRecipientSyncDataCmdSent); testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId); + + testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1}); + testStats(recipientPrimary, {totalSuccessfulMigrationsReceived: 1}); +})(); + +(() => { + jsTest.log( + "Test the case where the migration aborts after data becomes consistent on the recipient " + + "but before setting the consistent promise."); + const migrationId = UUID(); + const migrationOpts = { + migrationIdString: extractUUIDFromObject(migrationId), + tenantId: kTenantId, + }; + + let abortRecipientFp = + configureFailPoint(recipientPrimary, + "fpBeforeFulfillingDataConsistentPromise", + {action: "stop", stopErrorCode: ErrorCodes.InternalError}); + const stateRes = assert.commandWorked(tenantMigrationTest.runMigration( + migrationOpts, false /* retryOnRetryableErrors */, false /* automaticForgetMigration */)); + assert.eq(stateRes.state, TenantMigrationTest.DonorState.kAborted); + abortRecipientFp.off(); + + const donorDoc = configDonorsColl.findOne({tenantId: kTenantId}); + const abortOplogEntry = donorPrimary.getDB("local").oplog.rs.findOne( + {ns: TenantMigrationTest.kConfigDonorsNS, op: "u", o: donorDoc}); + assert.eq(donorDoc.state, TenantMigrationTest.DonorState.kAborted); + assert.eq(donorDoc.commitOrAbortOpTime.ts, abortOplogEntry.ts); + assert.eq(donorDoc.abortReason.code, ErrorCodes.InternalError); + + let mtabs; + assert.soon(() => { + mtabs = donorPrimary.adminCommand({serverStatus: 1}).tenantMigrationAccessBlocker; + return mtabs[kTenantId].state === TenantMigrationTest.DonorAccessState.kAborted; + }); + assert(mtabs[kTenantId].abortOpTime); + + expectedRecipientSyncDataMetricsFailed++; + expectedNumRecipientSyncDataCmdSent++; + const recipientSyncDataMetrics = + recipientPrimary.adminCommand({serverStatus: 1}).metrics.commands.recipientSyncData; + assert.eq(recipientSyncDataMetrics.failed, expectedRecipientSyncDataMetricsFailed); + assert.eq(recipientSyncDataMetrics.total, expectedNumRecipientSyncDataCmdSent); + + testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId); + + testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 1}); + testStats(recipientPrimary, + {totalSuccessfulMigrationsReceived: 1, totalFailedMigrationsReceived: 1}); })(); (() => { @@ -188,12 +260,12 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon tenantId: kTenantId, }; - let abortFp = + let abortDonorFp = configureFailPoint(donorPrimary, "abortTenantMigrationBeforeLeavingBlockingState"); const stateRes = assert.commandWorked(tenantMigrationTest.runMigration( migrationOpts, false /* retryOnRetryableErrors */, false /* automaticForgetMigration */)); assert.eq(stateRes.state, TenantMigrationTest.DonorState.kAborted); - abortFp.off(); + abortDonorFp.off(); const donorDoc = configDonorsColl.findOne({tenantId: kTenantId}); const abortOplogEntry = donorPrimary.getDB("local").oplog.rs.findOne( @@ -212,10 +284,15 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon expectedNumRecipientSyncDataCmdSent += 2; const recipientSyncDataMetrics = recipientPrimary.adminCommand({serverStatus: 1}).metrics.commands.recipientSyncData; - assert.eq(recipientSyncDataMetrics.failed, 0); + assert.eq(recipientSyncDataMetrics.failed, expectedRecipientSyncDataMetricsFailed); assert.eq(recipientSyncDataMetrics.total, expectedNumRecipientSyncDataCmdSent); testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId); + + testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 2}); + // The recipient had a chance to synchronize data and from its side the migration succeeded. + testStats(recipientPrimary, + {totalSuccessfulMigrationsReceived: 2, totalFailedMigrationsReceived: 1}); })(); // Drop the TTL index to make sure that the migration state is still available when the diff --git a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js index 3aafeaab1e4..3bbdf02f43f 100644 --- a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js +++ b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js @@ -30,8 +30,9 @@ const migrationX509Options = TenantMigrationUtil.makeX509OptionsForTest(); * Runs the donorStartMigration command to start a migration, and interrupts the migration on the * recipient using the 'interruptFunc' after the migration starts on the recipient side, and * asserts that migration eventually commits. + * @param {recipientRestarted} bool is needed to properly assert the tenant migrations stat count. */ -function testRecipientSyncDataInterrupt(interruptFunc) { +function testRecipientSyncDataInterrupt(interruptFunc, recipientRestarted) { const recipientRst = new ReplSetTest( {nodes: 3, name: "recipientRst", nodeOptions: migrationX509Options.recipient}); recipientRst.startSet(); @@ -44,7 +45,8 @@ function testRecipientSyncDataInterrupt(interruptFunc) { return; } const donorRst = tenantMigrationTest.getDonorRst(); - const recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + const donorPrimary = tenantMigrationTest.getDonorPrimary(); + let recipientPrimary = tenantMigrationTest.getRecipientPrimary(); const migrationId = UUID(); const migrationOpts = { @@ -76,6 +78,19 @@ function testRecipientSyncDataInterrupt(interruptFunc) { TenantMigrationTest.DonorState.kCommitted); assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); + tenantMigrationTest.awaitTenantMigrationStatsCounts(donorPrimary, + {totalSuccessfulMigrationsDonated: 1}); + recipientPrimary = tenantMigrationTest.getRecipientPrimary(); // Could change after interrupt. + if (!recipientRestarted) { + tenantMigrationTest.awaitTenantMigrationStatsCounts(recipientPrimary, + {totalSuccessfulMigrationsReceived: 1}); + } else { + // In full restart the count could be lost completely. + const stats = tenantMigrationTest.getTenantMigrationStats(recipientPrimary); + assert(1 == stats.totalSuccessfulMigrationsReceived || + 0 == stats.totalSuccessfulMigrationsReceived); + } + tenantMigrationTest.stop(); recipientRst.stopSet(); } @@ -169,7 +184,7 @@ function testRecipientForgetMigrationInterrupt(interruptFunc) { assert.commandWorked(recipientPrimary.adminCommand( {replSetStepDown: ReplSetTest.kForeverSecs, force: true})); assert.commandWorked(recipientPrimary.adminCommand({replSetFreeze: 0})); - }); + }, false); })(); (() => { @@ -177,7 +192,7 @@ function testRecipientForgetMigrationInterrupt(interruptFunc) { testRecipientSyncDataInterrupt((recipientRst) => { recipientRst.stopSet(null /* signal */, true /*forRestart */); recipientRst.startSet({restart: true}); - }); + }, true); })(); (() => { |