summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2022-08-10 15:07:31 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-10 16:13:09 +0000
commit4a13b3ca73c91126f78e87b39f31765e28ed8dfb (patch)
treeabce22259606a77549e9f47c248dd3a5f6913a25
parent66b9f6ac4fdb55d40f39c4318dc982aa5de1d8f2 (diff)
downloadmongo-4a13b3ca73c91126f78e87b39f31765e28ed8dfb.tar.gz
SERVER-65236 Make tenant migration donor delete its state doc in its run method
-rw-r--r--jstests/replsets/libs/tenant_migration_test.js38
-rw-r--r--jstests/replsets/tenant_migration_donor_directly_deletes_its_state_doc.js139
-rw-r--r--jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js8
-rw-r--r--jstests/replsets/tenant_migration_donor_state_machine.js28
-rw-r--r--jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js16
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_service.cpp68
-rw-r--r--src/mongo/db/repl/tenant_migration_donor_service.h12
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp30
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.h6
-rw-r--r--src/mongo/db/repl/tenant_migration_statistics.cpp22
-rw-r--r--src/mongo/db/repl/tenant_migration_statistics.h12
11 files changed, 238 insertions, 141 deletions
diff --git a/jstests/replsets/libs/tenant_migration_test.js b/jstests/replsets/libs/tenant_migration_test.js
index f2f1436fa99..e1d4b472dff 100644
--- a/jstests/replsets/libs/tenant_migration_test.js
+++ b/jstests/replsets/libs/tenant_migration_test.js
@@ -563,44 +563,6 @@ function TenantMigrationTest({
};
/**
- * Awaits the condition when every stats counter reaches the specified count.
- */
- this.awaitTenantMigrationStatsCounts = function(node, {
- currentMigrationsDonating = 0,
- currentMigrationsReceiving = 0,
- totalSuccessfulMigrationsDonated = 0,
- totalSuccessfulMigrationsReceived = 0,
- totalFailedMigrationsDonated = 0,
- totalFailedMigrationsReceived = 0
- }) {
- const check = function(expectedVal, stats, fieldName) {
- if (expectedVal == stats[fieldName]) {
- return true; // Condition reached, true means the counter reached the target.
- }
- assert.gt(expectedVal,
- stats[fieldName],
- `Stat ${fieldName} value ${stats[fieldName]} exceeded the target`);
- return false;
- };
- let stats;
- assert.soon(() => {
- stats = this.getTenantMigrationStats(node);
- if (check(currentMigrationsDonating, stats, "currentMigrationsDonating") &&
- check(currentMigrationsReceiving, stats, "currentMigrationsReceiving") &&
- check(
- totalSuccessfulMigrationsDonated, stats, "totalSuccessfulMigrationsDonated") &&
- check(totalSuccessfulMigrationsReceived,
- stats,
- "totalSuccessfulMigrationsReceived") &&
- check(totalFailedMigrationsDonated, stats, "totalFailedMigrationsDonated") &&
- check(totalFailedMigrationsReceived, stats, "totalFailedMigrationsReceived")) {
- return true; // Done.
- }
- return false;
- }, `Awaiting for tenant migration stats to reach target, got ${tojson(stats)}`);
- };
-
- /**
* Returns the donor ReplSetTest.
*/
this.getDonorRst = function() {
diff --git a/jstests/replsets/tenant_migration_donor_directly_deletes_its_state_doc.js b/jstests/replsets/tenant_migration_donor_directly_deletes_its_state_doc.js
new file mode 100644
index 00000000000..2eb0f795dba
--- /dev/null
+++ b/jstests/replsets/tenant_migration_donor_directly_deletes_its_state_doc.js
@@ -0,0 +1,139 @@
+/**
+ * Tests that a tenant migration donor instance shows up as active in serverStatus metrics until it
+ * has directly deleted its state doc (even if the state doc has actually already been deleted by
+ * the TTL monitor).
+ *
+ * @tags: [
+ * incompatible_with_macos,
+ * incompatible_with_windows_tls,
+ * # Uses the pauseTenantMigrationDonorBeforeDeletingStateDoc failpoint, which was added in 6.1.
+ * requires_fcv_61,
+ * requires_majority_read_concern,
+ * requires_persistence,
+ * serverless,
+ * ]
+ */
+
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+load("jstests/libs/uuid_util.js");
+load("jstests/replsets/libs/tenant_migration_test.js");
+
+(() => {
+ jsTest.log("Test case where the TTL monitor deletes the state doc first");
+
+ const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true});
+
+ const donorPrimary = tmt.getDonorPrimary();
+ const fp = configureFailPoint(donorPrimary, "pauseTenantMigrationDonorBeforeDeletingStateDoc");
+
+ jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations.");
+ let donorStats = tmt.getTenantMigrationStats(donorPrimary);
+ jsTest.log("donorStats: " + tojson(donorStats));
+ assert.eq(0, donorStats.currentMigrationsDonating);
+ assert.eq(0, donorStats.totalMigrationDonationsCommitted);
+ assert.eq(0, donorStats.totalMigrationDonationsAborted);
+
+ jsTest.log("Start a tenant migration.");
+ const tenantId = "testTenantId";
+ const migrationId = extractUUIDFromObject(UUID());
+ const migrationOpts = {
+ migrationIdString: migrationId,
+ tenantId: tenantId,
+ recipientConnString: tmt.getRecipientConnString(),
+ };
+ TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts));
+
+ jsTest.log("Wait for the migration to reach the failpoint before deleting the state doc.");
+ fp.wait();
+
+ jsTest.log("Confirm the state doc has been deleted by the TTL monitor");
+ assert.soon(() => {
+ return 0 == donorPrimary.getDB("config").getCollection("tenantMigrationDonors").count();
+ });
+
+ jsTest.log("Confirm the instance still shows up as active in serverStatus.");
+ donorStats = tmt.getTenantMigrationStats(donorPrimary);
+ jsTest.log("donorStats: " + tojson(donorStats));
+ assert.eq(1, donorStats.currentMigrationsDonating);
+ assert.eq(1, donorStats.totalMigrationDonationsCommitted);
+ assert.eq(0, donorStats.totalMigrationDonationsAborted);
+
+ // TODO (SERVER-61717): Confirm the instance still shows up in the POS map. Currently, the
+ // instance is removed from the map as soon as its' state doc is deleted by the TTL monitor.
+
+ jsTest.log("Turn off the failpoint.");
+ fp.off();
+
+ jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus");
+ assert.soon(() => {
+ donorStats = tmt.getTenantMigrationStats(donorPrimary);
+ return 0 == donorStats.currentMigrationsDonating &&
+ 1 == donorStats.totalMigrationDonationsCommitted &&
+ 0 == donorStats.totalMigrationDonationsAborted;
+ });
+
+ // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map.
+
+ tmt.stop();
+})();
+
+(() => {
+ jsTest.log("Test case where the instance deletes the state doc first");
+
+ const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true});
+
+ const donorPrimary = tmt.getDonorPrimary();
+
+ jsTest.log("Confirm the TTL index exists");
+ const listIndexesRes1 = assert.commandWorked(
+ donorPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationDonors"}));
+ assert(listIndexesRes1.cursor.firstBatch.some(
+ elem => elem.name === "TenantMigrationDonorTTLIndex" && elem.key.expireAt === 1));
+
+ jsTest.log("Drop the TTL index");
+ assert.commandWorked(donorPrimary.getDB("config").runCommand(
+ {dropIndexes: "tenantMigrationDonors", index: "TenantMigrationDonorTTLIndex"}));
+
+ jsTest.log("Confirm the TTL index no longer exists");
+ const listIndexesRes2 = assert.commandWorked(
+ donorPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationDonors"}));
+ assert(listIndexesRes2.cursor.firstBatch.every(elem => elem.key.expireAt == null));
+
+ jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations.");
+ let donorStats = tmt.getTenantMigrationStats(donorPrimary);
+ jsTest.log("donorStats: " + tojson(donorStats));
+ assert.eq(0, donorStats.currentMigrationsDonating);
+ assert.eq(0, donorStats.totalMigrationDonationsCommitted);
+ assert.eq(0, donorStats.totalMigrationDonationsAborted);
+
+ jsTest.log("Start a tenant migration.");
+ const tenantId = "testTenantId";
+ const migrationId = extractUUIDFromObject(UUID());
+ const migrationOpts = {
+ migrationIdString: migrationId,
+ tenantId: tenantId,
+ recipientConnString: tmt.getRecipientConnString(),
+ };
+ TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts));
+
+ jsTest.log("Wait for the instance to delete the state doc");
+ assert.soon(() => {
+ return 0 == donorPrimary.getDB("config").getCollection("tenantMigrationDonors").count();
+ });
+
+ jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus");
+ assert.soon(() => {
+ donorStats = tmt.getTenantMigrationStats(donorPrimary);
+ return 0 == donorStats.currentMigrationsDonating &&
+ 1 == donorStats.totalMigrationDonationsCommitted &&
+ 0 == donorStats.totalMigrationDonationsAborted;
+ });
+
+ // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map.
+
+ tmt.stop();
+})();
+})();
diff --git a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
index 8ced7f53232..d6cbe11b703 100644
--- a/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
+++ b/jstests/replsets/tenant_migration_donor_resume_on_stepup_and_restart.js
@@ -7,6 +7,8 @@
* incompatible_with_macos,
* incompatible_with_shard_merge,
* incompatible_with_windows_tls,
+ * # Some tenant migration statistics field names were changed in 6.1.
+ * requires_fcv_61,
* requires_majority_read_concern,
* requires_persistence,
* # Tenant migrations are only used in serverless.
@@ -99,13 +101,13 @@ function testDonorStartMigrationInterrupt(interruptFunc,
jsTestLog(`Stats at the donor primary: ${tojson(donorStats)}`);
if (donorRestarted) {
// If full restart happened the count could be lost completely.
- assert.gte(1, donorStats.totalSuccessfulMigrationsDonated);
+ assert.gte(1, donorStats.totalMigrationDonationsCommitted);
} else {
// The double counting happens when the failover happens after migration completes
// but before the state doc GC mark is persisted. While this test is targeting this
// scenario it is low probability in production.
- assert(1 == donorStats.totalSuccessfulMigrationsDonated ||
- 2 == donorStats.totalSuccessfulMigrationsDonated);
+ assert(1 == donorStats.totalMigrationDonationsCommitted ||
+ 2 == donorStats.totalMigrationDonationsCommitted);
}
// Skip checking the stats on the recipient since enableRecipientTesting is false
// so the recipient is forced to respond to recipientSyncData without starting the
diff --git a/jstests/replsets/tenant_migration_donor_state_machine.js b/jstests/replsets/tenant_migration_donor_state_machine.js
index 2afaf565de4..cd251bb6657 100644
--- a/jstests/replsets/tenant_migration_donor_state_machine.js
+++ b/jstests/replsets/tenant_migration_donor_state_machine.js
@@ -5,6 +5,8 @@
* @tags: [
* incompatible_with_macos,
* incompatible_with_windows_tls,
+ * # Some tenant migration statistics field names were changed in 6.1.
+ * requires_fcv_61,
* requires_majority_read_concern,
* requires_persistence,
* serverless,
@@ -116,19 +118,15 @@ let configDonorsColl = donorPrimary.getCollection(TenantMigrationTest.kConfigDon
function testStats(node, {
currentMigrationsDonating = 0,
currentMigrationsReceiving = 0,
- totalSuccessfulMigrationsDonated = 0,
- totalSuccessfulMigrationsReceived = 0,
- totalFailedMigrationsDonated = 0,
- totalFailedMigrationsReceived = 0
+ totalMigrationDonationsCommitted = 0,
+ totalMigrationDonationsAborted = 0,
}) {
const stats = tenantMigrationTest.getTenantMigrationStats(node);
jsTestLog(stats);
assert.eq(currentMigrationsDonating, stats.currentMigrationsDonating);
assert.eq(currentMigrationsReceiving, stats.currentMigrationsReceiving);
- assert.eq(totalSuccessfulMigrationsDonated, stats.totalSuccessfulMigrationsDonated);
- assert.eq(totalSuccessfulMigrationsReceived, stats.totalSuccessfulMigrationsReceived);
- assert.eq(totalFailedMigrationsDonated, stats.totalFailedMigrationsDonated);
- assert.eq(totalFailedMigrationsReceived, stats.totalFailedMigrationsReceived);
+ assert.eq(totalMigrationDonationsCommitted, stats.totalMigrationDonationsCommitted);
+ assert.eq(totalMigrationDonationsAborted, stats.totalMigrationDonationsAborted);
}
(() => {
@@ -196,8 +194,7 @@ function testStats(node, {
testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId);
- testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1});
- testStats(recipientPrimary, {totalSuccessfulMigrationsReceived: 1});
+ testStats(donorPrimary, {totalMigrationDonationsCommitted: 1});
})();
(() => {
@@ -242,9 +239,8 @@ function testStats(node, {
testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId);
- testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 1});
- testStats(recipientPrimary,
- {totalSuccessfulMigrationsReceived: 1, totalFailedMigrationsReceived: 1});
+ testStats(donorPrimary,
+ {totalMigrationDonationsCommitted: 1, totalMigrationDonationsAborted: 1});
})();
(() => {
@@ -284,10 +280,8 @@ function testStats(node, {
testDonorForgetMigrationAfterMigrationCompletes(donorRst, recipientRst, migrationId, kTenantId);
- testStats(donorPrimary, {totalSuccessfulMigrationsDonated: 1, totalFailedMigrationsDonated: 2});
- // The recipient had a chance to synchronize data and from its side the migration succeeded.
- testStats(recipientPrimary,
- {totalSuccessfulMigrationsReceived: 2, totalFailedMigrationsReceived: 1});
+ testStats(donorPrimary,
+ {totalMigrationDonationsCommitted: 1, totalMigrationDonationsAborted: 2});
})();
// Drop the TTL index to make sure that the migration state is still available when the
diff --git a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
index c51f3e2a3ee..01d98b34097 100644
--- a/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
+++ b/jstests/replsets/tenant_migration_recipient_resume_on_stepup_and_restart.js
@@ -5,6 +5,8 @@
* incompatible_with_macos,
* incompatible_with_windows_tls,
* incompatible_with_shard_merge,
+ * # Some tenant migration statistics field names were changed in 6.1.
+ * requires_fcv_61,
* requires_majority_read_concern,
* requires_persistence,
* serverless,
@@ -77,18 +79,8 @@ function testRecipientSyncDataInterrupt(interruptFunc, recipientRestarted) {
TenantMigrationTest.DonorState.kCommitted);
assert.commandWorked(tenantMigrationTest.forgetMigration(migrationOpts.migrationIdString));
- tenantMigrationTest.awaitTenantMigrationStatsCounts(donorPrimary,
- {totalSuccessfulMigrationsDonated: 1});
- recipientPrimary = tenantMigrationTest.getRecipientPrimary(); // Could change after interrupt.
- if (!recipientRestarted) {
- tenantMigrationTest.awaitTenantMigrationStatsCounts(recipientPrimary,
- {totalSuccessfulMigrationsReceived: 1});
- } else {
- // In full restart the count could be lost completely.
- const stats = tenantMigrationTest.getTenantMigrationStats(recipientPrimary);
- assert(1 == stats.totalSuccessfulMigrationsReceived ||
- 0 == stats.totalSuccessfulMigrationsReceived);
- }
+ const donorStats = tenantMigrationTest.getTenantMigrationStats(donorPrimary);
+ assert.eq(1, donorStats.totalMigrationDonationsCommitted);
tenantMigrationTest.stop();
recipientRst.stopSet();
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp
index 313710c291d..98904d323c1 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp
@@ -72,6 +72,7 @@ MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeFetchingKeys);
MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorBeforeWaitingForKeysToReplicate);
MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorBeforeMarkingStateGarbageCollectable);
MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorAfterMarkingStateGarbageCollectable);
+MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorBeforeDeletingStateDoc);
MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeEnteringFutureChain);
MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationAfterFetchingAndStoringKeys);
MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorWhileUpdatingStateDoc);
@@ -206,9 +207,6 @@ void TenantMigrationDonorService::abortAllMigrations(OperationContext* opCtx) {
}
}
-// Note this index is required on both the donor and recipient in a tenant migration, since each
-// will copy cluster time keys from the other. The donor service is set up on all mongods on stepup
-// to primary, so this index will be created on both donors and recipients.
ExecutorFuture<void> TenantMigrationDonorService::createStateDocumentTTLIndex(
std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) {
return AsyncTry([this] {
@@ -267,6 +265,9 @@ ExecutorFuture<void> TenantMigrationDonorService::createExternalKeysTTLIndex(
ExecutorFuture<void> TenantMigrationDonorService::_rebuildService(
std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) {
return createStateDocumentTTLIndex(executor, token).then([this, executor, token] {
+ // Since a tenant migration donor and recipient both copy signing keys from each other and
+ // put them in the same external keys collection, they share this TTL index (the recipient
+ // service does not also build this TTL index).
return createExternalKeysTTLIndex(executor, token);
});
}
@@ -672,6 +673,25 @@ TenantMigrationDonorService::Instance::_markStateDocAsGarbageCollectable(
.on(**executor, token);
}
+ExecutorFuture<void> TenantMigrationDonorService::Instance::_removeStateDoc(
+ std::shared_ptr<executor::ScopedTaskExecutor> executor, const CancellationToken& token) {
+ return AsyncTry([this, self = shared_from_this()] {
+ auto opCtxHolder = cc().makeOperationContext();
+ auto opCtx = opCtxHolder.get();
+
+ pauseTenantMigrationDonorBeforeDeletingStateDoc.pauseWhileSet(opCtx);
+
+ PersistentTaskStore<TenantMigrationDonorDocument> store(_stateDocumentsNS);
+ store.remove(
+ opCtx,
+ BSON(TenantMigrationDonorDocument::kIdFieldName << _migrationUuid),
+ WriteConcernOptions(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0)));
+ })
+ .until([](Status status) { return status.isOK(); })
+ .withBackoffBetweenIterations(kExponentialBackoff)
+ .on(**executor, token);
+}
+
ExecutorFuture<void> TenantMigrationDonorService::Instance::_waitForMajorityWriteConcern(
std::shared_ptr<executor::ScopedTaskExecutor> executor,
repl::OpTime opTime,
@@ -922,10 +942,10 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run(
// happens after this block and before the state doc GC is persisted.
if (_abortReason) {
TenantMigrationStatistics::get(_serviceContext)
- ->incTotalFailedMigrationsDonated();
+ ->incTotalMigrationDonationsAborted();
} else {
TenantMigrationStatistics::get(_serviceContext)
- ->incTotalSuccessfulMigrationsDonated();
+ ->incTotalMigrationDonationsCommitted();
}
}
})
@@ -933,6 +953,9 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run(
return _waitForForgetMigrationThenMarkMigrationGarbageCollectable(
executor, recipientTargeterRS, token);
})
+ .then([this, self = shared_from_this(), executor, token] {
+ return _waitForGarbageCollectionDelayThenDeleteStateDoc(executor, token);
+ })
.onCompletion([this,
self = shared_from_this(),
token,
@@ -944,11 +967,6 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run(
stdx::lock_guard<Latch> lg(_mutex);
- LOGV2(4920400,
- "Marked migration state as garbage collectable",
- "migrationId"_attr = _migrationUuid,
- "expireAt"_attr = _stateDoc.getExpireAt(),
- "status"_attr = status);
setPromiseFromStatusIfNotReady(lg, _forgetMigrationDurablePromise, status);
LOGV2(5006601,
@@ -1433,7 +1451,7 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG
})
.then([this, self = shared_from_this(), executor, token] {
LOGV2(6104911,
- "Marking migration as garbage-collectable.",
+ "Marking external keys as garbage collectable.",
"migrationId"_attr = _migrationUuid,
"tenantId"_attr = _tenantId);
// Note marking the keys as garbage collectable is not atomic with marking the
@@ -1448,6 +1466,10 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG
token);
})
.then([this, self = shared_from_this(), executor, token] {
+ LOGV2(6523600,
+ "Marking state document as garbage collectable.",
+ "migrationId"_attr = _migrationUuid,
+ "tenantId"_attr = _tenantId);
return _markStateDocAsGarbageCollectable(executor, token);
})
.then([this, self = shared_from_this(), executor, token](repl::OpTime opTime) {
@@ -1455,6 +1477,30 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG
})
.then([this, self = shared_from_this()] {
pauseTenantMigrationDonorAfterMarkingStateGarbageCollectable.pauseWhileSet();
+ stdx::lock_guard<Latch> lg(_mutex);
+ setPromiseOkIfNotReady(lg, _forgetMigrationDurablePromise);
+ });
+}
+
+ExecutorFuture<void>
+TenantMigrationDonorService::Instance::_waitForGarbageCollectionDelayThenDeleteStateDoc(
+ const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const CancellationToken& token) {
+
+ LOGV2(8423362,
+ "Waiting for garbage collection delay before deleting state document",
+ "migrationId"_attr = _migrationUuid,
+ "tenantId"_attr = _tenantId,
+ "expireAt"_attr = *_stateDoc.getExpireAt());
+
+ stdx::lock_guard<Latch> lg(_mutex);
+ return (*executor)
+ ->sleepUntil(*_stateDoc.getExpireAt(), token)
+ .then([this, self = shared_from_this(), executor, token]() {
+ LOGV2(8423363,
+ "Deleting state document",
+ "migrationId"_attr = _migrationUuid,
+ "tenantId"_attr = _tenantId);
+ return _removeStateDoc(executor, token);
});
}
diff --git a/src/mongo/db/repl/tenant_migration_donor_service.h b/src/mongo/db/repl/tenant_migration_donor_service.h
index d8a73412b55..6eb481b45c1 100644
--- a/src/mongo/db/repl/tenant_migration_donor_service.h
+++ b/src/mongo/db/repl/tenant_migration_donor_service.h
@@ -201,6 +201,10 @@ public:
std::shared_ptr<RemoteCommandTargeter> recipientTargeterRS,
const CancellationToken& token);
+ ExecutorFuture<void> _waitForGarbageCollectionDelayThenDeleteStateDoc(
+ const std::shared_ptr<executor::ScopedTaskExecutor>& executor,
+ const CancellationToken& token);
+
/**
* Makes a task executor for executing commands against the recipient. If the server
* parameter 'tenantMigrationDisableX509Auth' is false, configures the executor to use the
@@ -227,6 +231,14 @@ public:
const CancellationToken& token);
/**
+ * Deletes the state document. Does not return the opTime for the delete, since it's not
+ * necessary to wait for this delete to be majority committed (this is the last step in the
+ * chain, and if the delete rolls back, the new primary will re-do the delete).
+ */
+ ExecutorFuture<void> _removeStateDoc(std::shared_ptr<executor::ScopedTaskExecutor> executor,
+ const CancellationToken& token);
+
+ /**
* Sets the "expireAt" time for the state document to be garbage collected, and returns the
* the opTime for the write.
*/
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index f540dbe553a..427f5410e15 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -2928,7 +2928,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
}
_cleanupOnDataSyncCompletion(status);
- _setMigrationStatsOnCompletion(status);
// Handle recipientForgetMigration.
stdx::lock_guard lk(_mutex);
@@ -3000,35 +2999,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
.semi();
}
-void TenantMigrationRecipientService::Instance::_setMigrationStatsOnCompletion(
- Status completionStatus) const {
- bool success = false;
-
- if (completionStatus.code() == ErrorCodes::TenantMigrationForgotten) {
- stdx::lock_guard lk(_mutex);
- if (_stateDoc.getExpireAt()) {
- // Avoid double counting tenant migration statistics after failover.
- return;
- }
- // The migration committed if and only if it received recipientForgetMigration after it has
- // applied data past the returnAfterReachingDonorTimestamp, saved in state doc as
- // rejectReadsBeforeTimestamp.
- if (_stateDoc.getRejectReadsBeforeTimestamp().has_value()) {
- success = true;
- }
- } else if (ErrorCodes::isRetriableError(completionStatus)) {
- // The migration was interrupted due to shutdown or stepdown, avoid incrementing the count
- // for failed migrations since the migration will be resumed on stepup.
- return;
- }
-
- if (success) {
- TenantMigrationStatistics::get(_serviceContext)->incTotalSuccessfulMigrationsReceived();
- } else {
- TenantMigrationStatistics::get(_serviceContext)->incTotalFailedMigrationsReceived();
- }
-}
-
const UUID& TenantMigrationRecipientService::Instance::getMigrationUUID() const {
return _migrationUuid;
}
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.h b/src/mongo/db/repl/tenant_migration_recipient_service.h
index 820ffdae0fd..9e292e0041f 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.h
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.h
@@ -582,12 +582,6 @@ public:
void _compareRecipientAndDonorFCV() const;
/*
- * Increments either 'totalSuccessfulMigrationsReceived' or 'totalFailedMigrationsReceived'
- * in TenantMigrationStatistics by examining status and promises.
- */
- void _setMigrationStatsOnCompletion(Status completionStatus) const;
-
- /*
* Sets up internal state to begin migration.
*/
void _setup();
diff --git a/src/mongo/db/repl/tenant_migration_statistics.cpp b/src/mongo/db/repl/tenant_migration_statistics.cpp
index b18bfe028af..40f4c69725b 100644
--- a/src/mongo/db/repl/tenant_migration_statistics.cpp
+++ b/src/mongo/db/repl/tenant_migration_statistics.cpp
@@ -54,29 +54,19 @@ TenantMigrationStatistics::getScopedOutstandingReceivingCount() {
[this] { _currentMigrationsReceiving.fetchAndAddRelaxed(-1); });
}
-void TenantMigrationStatistics::incTotalSuccessfulMigrationsDonated() {
- _totalSuccessfulMigrationsDonated.fetchAndAddRelaxed(1);
+void TenantMigrationStatistics::incTotalMigrationDonationsCommitted() {
+ _totalMigrationDonationsCommitted.fetchAndAddRelaxed(1);
}
-void TenantMigrationStatistics::incTotalSuccessfulMigrationsReceived() {
- _totalSuccessfulMigrationsReceived.fetchAndAddRelaxed(1);
-}
-
-void TenantMigrationStatistics::incTotalFailedMigrationsDonated() {
- _totalFailedMigrationsDonated.fetchAndAddRelaxed(1);
-}
-
-void TenantMigrationStatistics::incTotalFailedMigrationsReceived() {
- _totalFailedMigrationsReceived.fetchAndAddRelaxed(1);
+void TenantMigrationStatistics::incTotalMigrationDonationsAborted() {
+ _totalMigrationDonationsAborted.fetchAndAddRelaxed(1);
}
void TenantMigrationStatistics::appendInfoForServerStatus(BSONObjBuilder* builder) const {
builder->append("currentMigrationsDonating", _currentMigrationsDonating.load());
builder->append("currentMigrationsReceiving", _currentMigrationsReceiving.load());
- builder->append("totalSuccessfulMigrationsDonated", _totalSuccessfulMigrationsDonated.load());
- builder->append("totalSuccessfulMigrationsReceived", _totalSuccessfulMigrationsReceived.load());
- builder->append("totalFailedMigrationsDonated", _totalFailedMigrationsDonated.load());
- builder->append("totalFailedMigrationsReceived", _totalFailedMigrationsReceived.load());
+ builder->append("totalMigrationDonationsCommitted", _totalMigrationDonationsCommitted.load());
+ builder->append("totalMigrationDonationsAborted", _totalMigrationDonationsAborted.load());
}
} // namespace mongo
diff --git a/src/mongo/db/repl/tenant_migration_statistics.h b/src/mongo/db/repl/tenant_migration_statistics.h
index 37c640712b7..1043858ef9b 100644
--- a/src/mongo/db/repl/tenant_migration_statistics.h
+++ b/src/mongo/db/repl/tenant_migration_statistics.h
@@ -49,20 +49,16 @@ public:
std::unique_ptr<ScopeGuard<std::function<void()>>> getScopedOutstandingDonatingCount();
std::unique_ptr<ScopeGuard<std::function<void()>>> getScopedOutstandingReceivingCount();
- void incTotalSuccessfulMigrationsDonated();
- void incTotalSuccessfulMigrationsReceived();
- void incTotalFailedMigrationsDonated();
- void incTotalFailedMigrationsReceived();
+ void incTotalMigrationDonationsCommitted();
+ void incTotalMigrationDonationsAborted();
void appendInfoForServerStatus(BSONObjBuilder* builder) const;
private:
AtomicWord<long long> _currentMigrationsDonating;
AtomicWord<long long> _currentMigrationsReceiving;
- AtomicWord<long long> _totalSuccessfulMigrationsDonated;
- AtomicWord<long long> _totalSuccessfulMigrationsReceived;
- AtomicWord<long long> _totalFailedMigrationsDonated;
- AtomicWord<long long> _totalFailedMigrationsReceived;
+ AtomicWord<long long> _totalMigrationDonationsCommitted;
+ AtomicWord<long long> _totalMigrationDonationsAborted;
};
} // namespace mongo