diff options
author | Esha Maharishi <esha.maharishi@mongodb.com> | 2022-08-10 15:07:31 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-10 16:13:09 +0000 |
commit | 4a13b3ca73c91126f78e87b39f31765e28ed8dfb (patch) | |
tree | abce22259606a77549e9f47c248dd3a5f6913a25 | |
parent | 66b9f6ac4fdb55d40f39c4318dc982aa5de1d8f2 (diff) | |
download | mongo-4a13b3ca73c91126f78e87b39f31765e28ed8dfb.tar.gz |
SERVER-65236 Make tenant migration donor delete its state doc in its run method
11 files changed, 238 insertions, 141 deletions
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js index f2f1436fa99..e1d4b472dff 100644 --- a/jstests/replsets/libs/tenant_migration_test.js +++ b/jstests/replsets/libs/tenant_migration_test.js @@ -563,44 +563,6 @@ function TenantMigrationTest({ }; /** - * Awaits the condition when every stats counter reaches the specified count. - */ - this.awaitTenantMigrationStatsCounts = function(node, { - currentMigrationsDonating = 0, - currentMigrationsReceiving = 0, - totalSuccessfulMigrationsDonated = 0, - totalSuccessfulMigrationsReceived = 0, - totalFailedMigrationsDonated = 0, - totalFailedMigrationsReceived = 0 - }) { - const check = function(expectedVal, stats, fieldName) { - if (expectedVal == stats[fieldName]) { - return true; // Condition reached, true means the counter reached the target. - } - assert.gt(expectedVal, - stats[fieldName], - `Stat ${fieldName} value ${stats[fieldName]} exceeded the target`); - return false; - }; - let stats; - assert.soon(() => { - stats = this.getTenantMigrationStats(node); - if (check(currentMigrationsDonating, stats, "currentMigrationsDonating") && - check(currentMigrationsReceiving, stats, "currentMigrationsReceiving") && - check( - totalSuccessfulMigrationsDonated, stats, "totalSuccessfulMigrationsDonated") && - check(totalSuccessfulMigrationsReceived, - stats, - "totalSuccessfulMigrationsReceived") && - check(totalFailedMigrationsDonated, stats, "totalFailedMigrationsDonated") && - check(totalFailedMigrationsReceived, stats, "totalFailedMigrationsReceived")) { - return true; // Done. - } - return false; - }, `Awaiting for tenant migration stats to reach target, got ${tojson(stats)}`); - }; - - /** * Returns the donor ReplSetTest. */ this.getDonorRst = function() { diff --git a/jstests/replsets/tenant_migration_donor_directly_deletes_its_state_doc.js b/jstests/replsets/tenant_migration_donor_directly_deletes_its_state_doc.js new file mode 100644 index 00000000000..2eb0f795dba --- /dev/null +++ b/jstests/replsets/tenant_migration_donor_directly_deletes_its_state_doc.js @@ -0,0 +1,139 @@ +/** + * Tests that a tenant migration donor instance shows up as active in serverStatus metrics until it + * has directly deleted its state doc (even if the state doc has actually already been deleted by + * the TTL monitor). + * + * @tags: [ + * incompatible_with_macos, + * incompatible_with_windows_tls, + * # Uses the pauseTenantMigrationDonorBeforeDeletingStateDoc failpoint, which was added in 6.1. + * requires_fcv_61, + * requires_majority_read_concern, + * requires_persistence, + * serverless, + * ] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); + +(() => { + jsTest.log("Test case where the TTL monitor deletes the state doc first"); + + const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true}); + + const donorPrimary = tmt.getDonorPrimary(); + const fp = configureFailPoint(donorPrimary, "pauseTenantMigrationDonorBeforeDeletingStateDoc"); + + jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations."); + let donorStats = tmt.getTenantMigrationStats(donorPrimary); + jsTest.log("donorStats: " + tojson(donorStats)); + assert.eq(0, donorStats.currentMigrationsDonating); + assert.eq(0, donorStats.totalMigrationDonationsCommitted); + assert.eq(0, donorStats.totalMigrationDonationsAborted); + + jsTest.log("Start a tenant migration."); + const tenantId = "testTenantId"; + const migrationId = extractUUIDFromObject(UUID()); + const migrationOpts = { + migrationIdString: migrationId, + tenantId: tenantId, + recipientConnString: tmt.getRecipientConnString(), + }; + TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts)); + + jsTest.log("Wait for the migration to reach the failpoint before deleting the state doc."); + fp.wait(); + + jsTest.log("Confirm the state doc has been deleted by the TTL monitor"); + assert.soon(() => { + return 0 == donorPrimary.getDB("config").getCollection("tenantMigrationDonors").count(); + }); + + jsTest.log("Confirm the instance still shows up as active in serverStatus."); + donorStats = tmt.getTenantMigrationStats(donorPrimary); + jsTest.log("donorStats: " + tojson(donorStats)); + assert.eq(1, donorStats.currentMigrationsDonating); + assert.eq(1, donorStats.totalMigrationDonationsCommitted); + assert.eq(0, donorStats.totalMigrationDonationsAborted); + + // TODO (SERVER-61717): Confirm the instance still shows up in the POS map. Currently, the + // instance is removed from the map as soon as its' state doc is deleted by the TTL monitor. + + jsTest.log("Turn off the failpoint."); + fp.off(); + + jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus"); + assert.soon(() => { + donorStats = tmt.getTenantMigrationStats(donorPrimary); + return 0 == donorStats.currentMigrationsDonating && + 1 == donorStats.totalMigrationDonationsCommitted && + 0 == donorStats.totalMigrationDonationsAborted; + }); + + // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map. + + tmt.stop(); +})(); + +(() => { + jsTest.log("Test case where the instance deletes the state doc first"); + + const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true}); + + const donorPrimary = tmt.getDonorPrimary(); + + jsTest.log("Confirm the TTL index exists"); + const listIndexesRes1 = assert.commandWorked( + donorPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationDonors"})); + assert(listIndexesRes1.cursor.firstBatch.some( + elem => elem.name === "TenantMigrationDonorTTLIndex" && elem.key.expireAt === 1)); + + jsTest.log("Drop the TTL index"); + assert.commandWorked(donorPrimary.getDB("config").runCommand( + {dropIndexes: "tenantMigrationDonors", index: "TenantMigrationDonorTTLIndex"})); + + jsTest.log("Confirm the TTL index no longer exists"); + const listIndexesRes2 = assert.commandWorked( + donorPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationDonors"})); + assert(listIndexesRes2.cursor.firstBatch.every(elem => elem.key.expireAt == null)); + + jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations."); + let donorStats = tmt.getTenantMigrationStats(donorPrimary); + jsTest.log("donorStats: " + tojson(donorStats)); + assert.eq(0, donorStats.currentMigrationsDonating); + assert.eq(0, donorStats.totalMigrationDonationsCommitted); + assert.eq(0, donorStats.totalMigrationDonationsAborted); + + jsTest.log("Start a tenant migration."); + const tenantId = "testTenantId"; + const migrationId = extractUUIDFromObject(UUID()); + const migrationOpts = { + migrationIdString: migrationId, + tenantId: tenantId, + recipientConnString: tmt.getRecipientConnString(), + }; + TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts)); + + jsTest.log("Wait for the instance to delete the state doc"); + assert.soon(() => { + return 0 == donorPrimary.getDB("config").getCollection("tenantMigrationDonors").count(); + }); + + jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus"); + assert.soon(() => { + donorStats = tmt.getTenantMigrationStats(donorPrimary); + return 0 == donorStats.currentMigrationsDonating && + 1 == donorStats.totalMigrationDonationsCommitted && + 0 == donorStats.totalMigrationDonationsAborted; + }); + + // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map. + + tmt.stop(); +})(); +})(); diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js index 8ced7f53232..d6cbe11b703 100644 --- a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js +++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js @@ -7,6 +7,8 @@ * incompatible_with_macos, * incompatible_with_shard_merge, * incompatible_with_windows_tls, + * # Some tenant migration statistics field names were changed in 6.1. + * requires_fcv_61, * requires_majority_read_concern, * requires_persistence, * # Tenant migrations are only used in serverless. @@ -99,13 +101,13 @@ function testDonorStartMigrationInterrupt(interruptFunc, jsTestLog(`Stats at the donor primary: ${tojson(donorStats)}`); if (donorRestarted) { // If full restart happened the count could be lost completely. - assert.gte(1, donorStats.totalSuccessfulMigrationsDonated); + assert.gte(1, donorStats.totalMigrationDonationsCommitted); } else { // The double counting happens when the failover happens after migration completes // but before the state doc GC mark is persisted. While this test is targeting this // scenario it is low probability in production. - assert(1 == donorStats.totalSuccessfulMigrationsDonated || - 2 == donorStats.totalSuccessfulMigrationsDonated); + assert(1 == donorStats.totalMigrationDonationsCommitted || + 2 == donorStats.totalMigrationDonationsCommitted); } // Skip checking the stats on the recipient since enableRecipientTesting is false // so the recipient is forced to respond to recipientSyncData without starting the diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js index 2afaf565de4..cd251bb6657 100644 --- a/jstests/replsets/tenant_migration_donor_state_machine.js +++ b/jstests/replsets/tenant_migration_donor_state_machine.js @@ -5,6 +5,8 @@ * @tags: [ * incompatible_with_macos, * incompatible_with_windows_tls, + * # Some tenant migration statistics field names were changed in 6.1. + * requires_fcv_61, * requires_majority_read_concern, * requires_persistence, * serverless, @@ -116,19 +118,15 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon function testStats(node, { currentMigrationsDonating = 0, currentMigrationsReceiving = 0, - totalSuccessfulMigrationsDonated = 0, - totalSuccessfulMigrationsReceived = 0, - totalFailedMigrationsDonated = 0, - totalFailedMigrationsReceived = 0 + totalMigrationDonationsCommitted = 0, + totalMigrationDonationsAborted = 0, }) { const stats = tenantMigrationTest.getTenantMigrationStats(node); jsTestLog(stats); assert.eq(currentMigrationsDonating, stats.currentMigrationsDonating); assert.eq(currentMigrationsReceiving, stats.currentMigrationsReceiving); - assert.eq(totalSuccessfulMigrationsDonated, stats.totalSuccessfulMigrationsDonated); - assert.eq(totalSuccessfulMigrationsReceived, stats.totalSuccessfulMigrationsReceived); - assert.eq(totalFailedMigrationsDonated, stats.totalFailedMigrationsDonated); - assert.eq(totalFailedMigrationsReceived, stats.totalFailedMigrationsReceived); + assert.eq(totalMigrationDonationsCommitted, stats.totalMigrationDonationsCommitted); + assert.eq(totalMigrationDonationsAborted, stats.totalMigrationDonationsAborted); } (() => { @@ -196,8 +194,7 @@ function testStats(node, { testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId); - testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1}); - testStats(recipientPrimary, {totalSuccessfulMigrationsReceived: 1}); + testStats(donorPrimary, {totalMigrationDonationsCommitted: 1}); })(); (() => { @@ -242,9 +239,8 @@ function testStats(node, { testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId); - testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 1}); - testStats(recipientPrimary, - {totalSuccessfulMigrationsReceived: 1, totalFailedMigrationsReceived: 1}); + testStats(donorPrimary, + {totalMigrationDonationsCommitted: 1, totalMigrationDonationsAborted: 1}); })(); (() => { @@ -284,10 +280,8 @@ function testStats(node, { testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId); - testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 2}); - // The recipient had a chance to synchronize data and from its side the migration succeeded. - testStats(recipientPrimary, - {totalSuccessfulMigrationsReceived: 2, totalFailedMigrationsReceived: 1}); + testStats(donorPrimary, + {totalMigrationDonationsCommitted: 1, totalMigrationDonationsAborted: 2}); })(); // Drop the TTL index to make sure that the migration state is still available when the diff --git a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js index c51f3e2a3ee..01d98b34097 100644 --- a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js +++ b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js @@ -5,6 +5,8 @@ * incompatible_with_macos, * incompatible_with_windows_tls, * incompatible_with_shard_merge, + * # Some tenant migration statistics field names were changed in 6.1. + * requires_fcv_61, * requires_majority_read_concern, * requires_persistence, * serverless, @@ -77,18 +79,8 @@ function testRecipientSyncDataInterrupt(interruptFunc, recipientRestarted) { TenantMigrationTest.DonorState.kCommitted); assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString)); - tenantMigrationTest.awaitTenantMigrationStatsCounts(donorPrimary, - {totalSuccessfulMigrationsDonated: 1}); - recipientPrimary = tenantMigrationTest.getRecipientPrimary(); // Could change after interrupt. - if (!recipientRestarted) { - tenantMigrationTest.awaitTenantMigrationStatsCounts(recipientPrimary, - {totalSuccessfulMigrationsReceived: 1}); - } else { - // In full restart the count could be lost completely. - const stats = tenantMigrationTest.getTenantMigrationStats(recipientPrimary); - assert(1 == stats.totalSuccessfulMigrationsReceived || - 0 == stats.totalSuccessfulMigrationsReceived); - } + const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary); + assert.eq(1, donorStats.totalMigrationDonationsCommitted); tenantMigrationTest.stop(); recipientRst.stopSet(); diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp index 313710c291d..98904d323c1 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp @@ -72,6 +72,7 @@ MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeFetchingKeys); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorBeforeWaitingForKeysToReplicate); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorBeforeMarkingStateGarbageCollectable); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorAfterMarkingStateGarbageCollectable); +MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorBeforeDeletingStateDoc); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeEnteringFutureChain); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationAfterFetchingAndStoringKeys); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorWhileUpdatingStateDoc); @@ -206,9 +207,6 @@ void TenantMigrationDonorService::abortAllMigrations(OperationContext* opCtx) { } } -// Note this index is required on both the donor and recipient in a tenant migration, since each -// will copy cluster time keys from the other. The donor service is set up on all mongods on stepup -// to primary, so this index will be created on both donors and recipients. ExecutorFuture<void> TenantMigrationDonorService::createStateDocumentTTLIndex( std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) { return AsyncTry([this] { @@ -267,6 +265,9 @@ ExecutorFuture<void> TenantMigrationDonorService::createExternalKeysTTLIndex( ExecutorFuture<void> TenantMigrationDonorService::_rebuildService( std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) { return createStateDocumentTTLIndex(executor, token).then([this, executor, token] { + // Since a tenant migration donor and recipient both copy signing keys from each other and + // put them in the same external keys collection, they share this TTL index (the recipient + // service does not also build this TTL index). return createExternalKeysTTLIndex(executor, token); }); } @@ -672,6 +673,25 @@ TenantMigrationDonorService::Instance::_markStateDocAsGarbageCollectable( .on(**executor, token); } +ExecutorFuture<void> TenantMigrationDonorService::Instance::_removeStateDoc( + std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) { + return AsyncTry([this, self = shared_from_this()] { + auto opCtxHolder = cc().makeOperationContext(); + auto opCtx = opCtxHolder.get(); + + pauseTenantMigrationDonorBeforeDeletingStateDoc.pauseWhileSet(opCtx); + + PersistentTaskStore<TenantMigrationDonorDocument> store(_stateDocumentsNS); + store.remove( + opCtx, + BSON(TenantMigrationDonorDocument::kIdFieldName << _migrationUuid), + WriteConcernOptions(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0))); + }) + .until([](Status status) { return status.isOK(); }) + .withBackoffBetweenIterations(kExponentialBackoff) + .on(**executor, token); +} + ExecutorFuture<void> TenantMigrationDonorService::Instance::_waitForMajorityWriteConcern( std::shared_ptr<executor::ScopedTaskExecutor> executor, repl::OpTime opTime, @@ -922,10 +942,10 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( // happens after this block and before the state doc GC is persisted. if (_abortReason) { TenantMigrationStatistics::get(_serviceContext) - ->incTotalFailedMigrationsDonated(); + ->incTotalMigrationDonationsAborted(); } else { TenantMigrationStatistics::get(_serviceContext) - ->incTotalSuccessfulMigrationsDonated(); + ->incTotalMigrationDonationsCommitted(); } } }) @@ -933,6 +953,9 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( return _waitForForgetMigrationThenMarkMigrationGarbageCollectable( executor, recipientTargeterRS, token); }) + .then([this, self = shared_from_this(), executor, token] { + return _waitForGarbageCollectionDelayThenDeleteStateDoc(executor, token); + }) .onCompletion([this, self = shared_from_this(), token, @@ -944,11 +967,6 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( stdx::lock_guard<Latch> lg(_mutex); - LOGV2(4920400, - "Marked migration state as garbage collectable", - "migrationId"_attr = _migrationUuid, - "expireAt"_attr = _stateDoc.getExpireAt(), - "status"_attr = status); setPromiseFromStatusIfNotReady(lg, _forgetMigrationDurablePromise, status); LOGV2(5006601, @@ -1433,7 +1451,7 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG }) .then([this, self = shared_from_this(), executor, token] { LOGV2(6104911, - "Marking migration as garbage-collectable.", + "Marking external keys as garbage collectable.", "migrationId"_attr = _migrationUuid, "tenantId"_attr = _tenantId); // Note marking the keys as garbage collectable is not atomic with marking the @@ -1448,6 +1466,10 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG token); }) .then([this, self = shared_from_this(), executor, token] { + LOGV2(6523600, + "Marking state document as garbage collectable.", + "migrationId"_attr = _migrationUuid, + "tenantId"_attr = _tenantId); return _markStateDocAsGarbageCollectable(executor, token); }) .then([this, self = shared_from_this(), executor, token](repl::OpTime opTime) { @@ -1455,6 +1477,30 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG }) .then([this, self = shared_from_this()] { pauseTenantMigrationDonorAfterMarkingStateGarbageCollectable.pauseWhileSet(); + stdx::lock_guard<Latch> lg(_mutex); + setPromiseOkIfNotReady(lg, _forgetMigrationDurablePromise); + }); +} + +ExecutorFuture<void> +TenantMigrationDonorService::Instance::_waitForGarbageCollectionDelayThenDeleteStateDoc( + const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const CancellationToken& token) { + + LOGV2(8423362, + "Waiting for garbage collection delay before deleting state document", + "migrationId"_attr = _migrationUuid, + "tenantId"_attr = _tenantId, + "expireAt"_attr = *_stateDoc.getExpireAt()); + + stdx::lock_guard<Latch> lg(_mutex); + return (*executor) + ->sleepUntil(*_stateDoc.getExpireAt(), token) + .then([this, self = shared_from_this(), executor, token]() { + LOGV2(8423363, + "Deleting state document", + "migrationId"_attr = _migrationUuid, + "tenantId"_attr = _tenantId); + return _removeStateDoc(executor, token); }); } diff --git a/src/mongo/db/repl/tenant_migration_donor_service.h b/src/mongo/db/repl/tenant_migration_donor_service.h index d8a73412b55..6eb481b45c1 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.h +++ b/src/mongo/db/repl/tenant_migration_donor_service.h @@ -201,6 +201,10 @@ public: std::shared_ptr<RemoteCommandTargeter> recipientTargeterRS, const CancellationToken& token); + ExecutorFuture<void> _waitForGarbageCollectionDelayThenDeleteStateDoc( + const std::shared_ptr<executor::ScopedTaskExecutor>& executor, + const CancellationToken& token); + /** * Makes a task executor for executing commands against the recipient. If the server * parameter 'tenantMigrationDisableX509Auth' is false, configures the executor to use the @@ -227,6 +231,14 @@ public: const CancellationToken& token); /** + * Deletes the state document. Does not return the opTime for the delete, since it's not + * necessary to wait for this delete to be majority committed (this is the last step in the + * chain, and if the delete rolls back, the new primary will re-do the delete). + */ + ExecutorFuture<void> _removeStateDoc(std::shared_ptr<executor::ScopedTaskExecutor> executor, + const CancellationToken& token); + + /** * Sets the "expireAt" time for the state document to be garbage collected, and returns the * the opTime for the write. */ diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp index f540dbe553a..427f5410e15 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp +++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp @@ -2928,7 +2928,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run( } _cleanupOnDataSyncCompletion(status); - _setMigrationStatsOnCompletion(status); // Handle recipientForgetMigration. stdx::lock_guard lk(_mutex); @@ -3000,35 +2999,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run( .semi(); } -void TenantMigrationRecipientService::Instance::_setMigrationStatsOnCompletion( - Status completionStatus) const { - bool success = false; - - if (completionStatus.code() == ErrorCodes::TenantMigrationForgotten) { - stdx::lock_guard lk(_mutex); - if (_stateDoc.getExpireAt()) { - // Avoid double counting tenant migration statistics after failover. - return; - } - // The migration committed if and only if it received recipientForgetMigration after it has - // applied data past the returnAfterReachingDonorTimestamp, saved in state doc as - // rejectReadsBeforeTimestamp. - if (_stateDoc.getRejectReadsBeforeTimestamp().has_value()) { - success = true; - } - } else if (ErrorCodes::isRetriableError(completionStatus)) { - // The migration was interrupted due to shutdown or stepdown, avoid incrementing the count - // for failed migrations since the migration will be resumed on stepup. - return; - } - - if (success) { - TenantMigrationStatistics::get(_serviceContext)->incTotalSuccessfulMigrationsReceived(); - } else { - TenantMigrationStatistics::get(_serviceContext)->incTotalFailedMigrationsReceived(); - } -} - const UUID& TenantMigrationRecipientService::Instance::getMigrationUUID() const { return _migrationUuid; } diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.h b/src/mongo/db/repl/tenant_migration_recipient_service.h index 820ffdae0fd..9e292e0041f 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service.h +++ b/src/mongo/db/repl/tenant_migration_recipient_service.h @@ -582,12 +582,6 @@ public: void _compareRecipientAndDonorFCV() const; /* - * Increments either 'totalSuccessfulMigrationsReceived' or 'totalFailedMigrationsReceived' - * in TenantMigrationStatistics by examining status and promises. - */ - void _setMigrationStatsOnCompletion(Status completionStatus) const; - - /* * Sets up internal state to begin migration. */ void _setup(); diff --git a/src/mongo/db/repl/tenant_migration_statistics.cpp b/src/mongo/db/repl/tenant_migration_statistics.cpp index b18bfe028af..40f4c69725b 100644 --- a/src/mongo/db/repl/tenant_migration_statistics.cpp +++ b/src/mongo/db/repl/tenant_migration_statistics.cpp @@ -54,29 +54,19 @@ TenantMigrationStatistics::getScopedOutstandingReceivingCount() { [this] { _currentMigrationsReceiving.fetchAndAddRelaxed(-1); }); } -void TenantMigrationStatistics::incTotalSuccessfulMigrationsDonated() { - _totalSuccessfulMigrationsDonated.fetchAndAddRelaxed(1); +void TenantMigrationStatistics::incTotalMigrationDonationsCommitted() { + _totalMigrationDonationsCommitted.fetchAndAddRelaxed(1); } -void TenantMigrationStatistics::incTotalSuccessfulMigrationsReceived() { - _totalSuccessfulMigrationsReceived.fetchAndAddRelaxed(1); -} - -void TenantMigrationStatistics::incTotalFailedMigrationsDonated() { - _totalFailedMigrationsDonated.fetchAndAddRelaxed(1); -} - -void TenantMigrationStatistics::incTotalFailedMigrationsReceived() { - _totalFailedMigrationsReceived.fetchAndAddRelaxed(1); +void TenantMigrationStatistics::incTotalMigrationDonationsAborted() { + _totalMigrationDonationsAborted.fetchAndAddRelaxed(1); } void TenantMigrationStatistics::appendInfoForServerStatus(BSONObjBuilder* builder) const { builder->append("currentMigrationsDonating", _currentMigrationsDonating.load()); builder->append("currentMigrationsReceiving", _currentMigrationsReceiving.load()); - builder->append("totalSuccessfulMigrationsDonated", _totalSuccessfulMigrationsDonated.load()); - builder->append("totalSuccessfulMigrationsReceived", _totalSuccessfulMigrationsReceived.load()); - builder->append("totalFailedMigrationsDonated", _totalFailedMigrationsDonated.load()); - builder->append("totalFailedMigrationsReceived", _totalFailedMigrationsReceived.load()); + builder->append("totalMigrationDonationsCommitted", _totalMigrationDonationsCommitted.load()); + builder->append("totalMigrationDonationsAborted", _totalMigrationDonationsAborted.load()); } } // namespace mongo diff --git a/src/mongo/db/repl/tenant_migration_statistics.h b/src/mongo/db/repl/tenant_migration_statistics.h index 37c640712b7..1043858ef9b 100644 --- a/src/mongo/db/repl/tenant_migration_statistics.h +++ b/src/mongo/db/repl/tenant_migration_statistics.h @@ -49,20 +49,16 @@ public: std::unique_ptr<ScopeGuard<std::function<void()>>> getScopedOutstandingDonatingCount(); std::unique_ptr<ScopeGuard<std::function<void()>>> getScopedOutstandingReceivingCount(); - void incTotalSuccessfulMigrationsDonated(); - void incTotalSuccessfulMigrationsReceived(); - void incTotalFailedMigrationsDonated(); - void incTotalFailedMigrationsReceived(); + void incTotalMigrationDonationsCommitted(); + void incTotalMigrationDonationsAborted(); void appendInfoForServerStatus(BSONObjBuilder* builder) const; private: AtomicWord<long long> _currentMigrationsDonating; AtomicWord<long long> _currentMigrationsReceiving; - AtomicWord<long long> _totalSuccessfulMigrationsDonated; - AtomicWord<long long> _totalSuccessfulMigrationsReceived; - AtomicWord<long long> _totalFailedMigrationsDonated; - AtomicWord<long long> _totalFailedMigrationsReceived; + AtomicWord<long long> _totalMigrationDonationsCommitted; + AtomicWord<long long> _totalMigrationDonationsAborted; }; } // namespace mongo |