diff options
author | Sophia Tan <sophia_tll@hotmail.com> | 2022-08-22 18:08:21 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-08-22 18:52:06 +0000 |
commit | b719d98d47f2866d670d44a9a7537dd825a37824 (patch) | |
tree | 77eec61be858d8bf54c408b1dbe48f54000cedcc | |
parent | b00a1cb333043adfd4d0867ec01d9b699a17cf6a (diff) | |
download | mongo-b719d98d47f2866d670d44a9a7537dd825a37824.tar.gz |
Revert "SERVER-67372 Make tenant migration recipient delete its state document in its run method"
This reverts commit a96f1436e02feea955565ac0ea39f8b56f087639.
4 files changed, 10 insertions, 204 deletions
diff --git a/jstests/replsets/tenant_migration_recipient_directly_deletes_its_state_doc.js b/jstests/replsets/tenant_migration_recipient_directly_deletes_its_state_doc.js deleted file mode 100644 index fd49cb3da4b..00000000000 --- a/jstests/replsets/tenant_migration_recipient_directly_deletes_its_state_doc.js +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Tests that a tenant migration recipient instance shows up as active in serverStatus metrics until - * it has directly deleted its state doc (even if the state doc has actually already been deleted by - * the TTL monitor). - * - * @tags: [ - * incompatible_with_macos, - * incompatible_with_windows_tls, - * # Uses pauseTenantMigrationRecipientBeforeDeletingStateDoc failpoint, which was added in 6.1 - * requires_fcv_61, - * requires_majority_read_concern, - * requires_persistence, - * serverless, - * ] - */ - -(function() { -"use strict"; - -load("jstests/libs/fail_point_util.js"); -load("jstests/libs/uuid_util.js"); -load("jstests/replsets/libs/tenant_migration_test.js"); - -(() => { - jsTest.log("Test case where the TTL monitor deletes the state doc first"); - - const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true}); - - const recipientPrimary = tmt.getRecipientPrimary(); - const fp = - configureFailPoint(recipientPrimary, "pauseTenantMigrationRecipientBeforeDeletingStateDoc"); - - jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations."); - let recipientStats = tmt.getTenantMigrationStats(recipientPrimary); - jsTest.log("recipientStats: " + tojson(recipientStats)); - assert.eq(0, recipientStats.currentMigrationsReceiving); - - jsTest.log("Start a tenant migration."); - const tenantId = "testTenantId"; - const migrationId = extractUUIDFromObject(UUID()); - const migrationOpts = { - migrationIdString: migrationId, - tenantId: tenantId, - recipientConnString: tmt.getRecipientConnString(), - }; - - TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts)); - - jsTest.log("Wait for the migration to reach the failpoint before deleting the state doc."); - fp.wait(); - - jsTest.log("Confirm the state doc has been deleted by the TTL monitor"); - assert.soon(() => { - return 0 == - recipientPrimary.getDB("config").getCollection("tenantMigrationRecipients").count(); - }); - - jsTest.log("Confirm the instance still shows up as active in serverStatus."); - recipientStats = tmt.getTenantMigrationStats(recipientPrimary); - jsTest.log("recipientStats: " + tojson(recipientStats)); - assert.eq(1, recipientStats.currentMigrationsReceiving); - - // TODO (SERVER-61717): Confirm the instance still shows up in the POS map. Currently, the - // instance is removed from the map as soon as its' state doc is deleted by the TTL monitor. - - jsTest.log("Turn off the failpoint."); - fp.off(); - - jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus"); - assert.soon(() => { - recipientStats = tmt.getTenantMigrationStats(recipientPrimary); - return 0 == recipientStats.currentMigrationsReceiving; - }); - - // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map. - - tmt.stop(); -})(); - -(() => { - jsTest.log("Test case where the instance deletes the state doc first"); - - const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true}); - - const recipientPrimary = tmt.getRecipientPrimary(); - - jsTest.log("Confirm the TTL index exists"); - const listIndexesRes1 = assert.commandWorked( - recipientPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationRecipients"})); - assert(listIndexesRes1.cursor.firstBatch.some( - elem => elem.name === "TenantMigrationRecipientTTLIndex" && elem.key.expireAt === 1)); - - jsTest.log("Drop the TTL index"); - assert.commandWorked(recipientPrimary.getDB("config").runCommand( - {dropIndexes: "tenantMigrationRecipients", index: "TenantMigrationRecipientTTLIndex"})); - - jsTest.log("Confirm the TTL index no longer exists"); - const listIndexesRes2 = assert.commandWorked( - recipientPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationRecipients"})); - assert(listIndexesRes2.cursor.firstBatch.every(elem => elem.key.expireAt == null)); - - jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations."); - let recipientStats = tmt.getTenantMigrationStats(recipientPrimary); - jsTest.log("recipientStats: " + tojson(recipientStats)); - assert.eq(0, recipientStats.currentMigrationsReceiving); - - jsTest.log("Start a tenant migration."); - const tenantId = "testTenantId"; - const migrationId = extractUUIDFromObject(UUID()); - const migrationOpts = { - migrationIdString: migrationId, - tenantId: tenantId, - recipientConnString: tmt.getRecipientConnString(), - }; - TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts)); - - jsTest.log("Wait for the instance to delete the state doc"); - assert.soon(() => { - return 0 == - recipientPrimary.getDB("config").getCollection("tenantMigrationRecipients").count(); - }); - - jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus"); - assert.soon(() => { - recipientStats = tmt.getTenantMigrationStats(recipientPrimary); - return 0 == recipientStats.currentMigrationsReceiving; - }); - - // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map. - - tmt.stop(); -})(); -})(); diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp index 9be2540d5ce..7f3b1e2474f 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp +++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp @@ -49,7 +49,6 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/op_observer/op_observer.h" #include "mongo/db/ops/write_ops_exec.h" -#include "mongo/db/persistent_task_store.h" #include "mongo/db/pipeline/process_interface/mongo_process_interface.h" #include "mongo/db/repl/cloner_utils.h" #include "mongo/db/repl/data_replicator_external_state.h" @@ -147,7 +146,6 @@ MONGO_FAIL_POINT_DEFINE(skipComparingRecipientAndDonorFCV); MONGO_FAIL_POINT_DEFINE(autoRecipientForgetMigration); MONGO_FAIL_POINT_DEFINE(skipFetchingCommittedTransactions); MONGO_FAIL_POINT_DEFINE(skipFetchingRetryableWritesEntriesBeforeStartOpTime); -MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationRecipientBeforeDeletingStateDoc); // Fails before waiting for the state doc to be majority replicated. MONGO_FAIL_POINT_DEFINE(failWhilePersistingTenantMigrationRecipientInstanceStateDoc); @@ -2986,13 +2984,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run( return storageInterface->dropCollection(opCtx.get(), getOplogBufferNs(getMigrationUUID())); }) - .then([this, self = shared_from_this(), token] { - { - stdx::lock_guard lk(_mutex); - setPromiseOkifNotReady(lk, _forgetMigrationDurablePromise); - } - return _waitForGarbageCollectionDelayThenDeleteStateDoc(token); - }) .thenRunOn(_recipientService->getInstanceCleanupExecutor()) .onCompletion([this, self = shared_from_this(), @@ -3001,7 +2992,16 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run( // is safe even on shutDown/stepDown. stdx::lock_guard lk(_mutex); invariant(_dataSyncCompletionPromise.getFuture().isReady()); - if (!status.isOK()) { + if (status.isOK()) { + LOGV2(4881401, + "Migration marked to be garbage collectable due to " + "recipientForgetMigration " + "command", + "migrationId"_attr = getMigrationUUID(), + "tenantId"_attr = getTenantId(), + "expireAt"_attr = *_stateDoc.getExpireAt()); + setPromiseOkifNotReady(lk, _forgetMigrationDurablePromise); + } else { // We should only hit here on a stepDown/shutDown, or a 'conflicting migration' // error. LOGV2(4881402, @@ -3016,48 +3016,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run( .semi(); } -SemiFuture<void> TenantMigrationRecipientService::Instance::_removeStateDoc( - const CancellationToken& token) { - return AsyncTry([this, self = shared_from_this()] { - auto opCtxHolder = cc().makeOperationContext(); - auto opCtx = opCtxHolder.get(); - - pauseTenantMigrationRecipientBeforeDeletingStateDoc.pauseWhileSet(opCtx); - - PersistentTaskStore<TenantMigrationRecipientDocument> store(_stateDocumentsNS); - store.remove( - opCtx, - BSON(TenantMigrationRecipientDocument::kIdFieldName << _migrationUuid), - WriteConcernOptions(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0))); - }) - .until([](Status status) { return status.isOK(); }) - .withBackoffBetweenIterations(kExponentialBackoff) - .on(**_scopedExecutor, token) - .semi(); -} - -SemiFuture<void> -TenantMigrationRecipientService::Instance::_waitForGarbageCollectionDelayThenDeleteStateDoc( - const CancellationToken& token) { - stdx::lock_guard<Latch> lg(_mutex); - LOGV2(8423364, - "Waiting for garbage collection delay before deleting state document", - "migrationId"_attr = _migrationUuid, - "tenantId"_attr = _tenantId, - "expireAt"_attr = *_stateDoc.getExpireAt()); - - return (**_scopedExecutor) - ->sleepUntil(*_stateDoc.getExpireAt(), token) - .then([this, self = shared_from_this(), token]() { - LOGV2(8423365, - "Deleting state document", - "migrationId"_attr = _migrationUuid, - "tenantId"_attr = _tenantId); - return _removeStateDoc(token); - }) - .semi(); -} - const UUID& TenantMigrationRecipientService::Instance::getMigrationUUID() const { return _migrationUuid; } diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.h b/src/mongo/db/repl/tenant_migration_recipient_service.h index 22de9a00fd1..bce11fb695d 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service.h +++ b/src/mongo/db/repl/tenant_migration_recipient_service.h @@ -214,9 +214,6 @@ public: private: friend class TenantMigrationRecipientServiceTest; - const NamespaceString _stateDocumentsNS = - NamespaceString::kTenantMigrationRecipientsNamespace; - using ConnectionPair = std::pair<std::unique_ptr<DBClientConnection>, std::unique_ptr<DBClientConnection>>; @@ -337,16 +334,6 @@ public: SemiFuture<void> _markStateDocAsGarbageCollectable(); /** - * Deletes the state document. Does not return the opTime for the delete, since it's not - * necessary to wait for this delete to be majority committed (this is one of the last steps - * in the chain, and if the delete rolls back, the new primary will re-do the delete). - */ - SemiFuture<void> _removeStateDoc(const CancellationToken& token); - - SemiFuture<void> _waitForGarbageCollectionDelayThenDeleteStateDoc( - const CancellationToken& token); - - /** * Creates a client, connects it to the donor. If '_transientSSLParams' is not none, uses * the migration certificate to do SSL authentication. Otherwise, uses the default * authentication mode. Throws a user assertion on failure. diff --git a/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp b/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp index 3e78be1a5f7..acc80047451 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp +++ b/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp @@ -2722,12 +2722,6 @@ TEST_F(TenantMigrationRecipientServiceTest, RecipientForgetMigration_WaitUntilSt AssertionException, opCtx->getTimeoutError()); - // Hang the chain before deleting the state doc until it can be verified that the state doc was - // persisted. - FailPointEnableBlock fpDeletingStateDoc("pauseTenantMigrationRecipientBeforeDeletingStateDoc", - BSON("action" - << "hang")); - { // Hang the chain after persisting the state doc. FailPointEnableBlock fpPersistingStateDoc( |