summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSophia Tan <sophia_tll@hotmail.com>2022-08-22 18:08:21 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-22 18:52:06 +0000
commitb719d98d47f2866d670d44a9a7537dd825a37824 (patch)
tree77eec61be858d8bf54c408b1dbe48f54000cedcc
parentb00a1cb333043adfd4d0867ec01d9b699a17cf6a (diff)
downloadmongo-b719d98d47f2866d670d44a9a7537dd825a37824.tar.gz
Revert "SERVER-67372 Make tenant migration recipient delete its state document in its run method"
This reverts commit a96f1436e02feea955565ac0ea39f8b56f087639.
-rw-r--r--jstests/replsets/tenant_migration_recipient_directly_deletes_its_state_doc.js133
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp62
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.h13
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service_test.cpp6
4 files changed, 10 insertions, 204 deletions
diff --git a/jstests/replsets/tenant_migration_recipient_directly_deletes_its_state_doc.js b/jstests/replsets/tenant_migration_recipient_directly_deletes_its_state_doc.js
deleted file mode 100644
index fd49cb3da4b..00000000000
--- a/jstests/replsets/tenant_migration_recipient_directly_deletes_its_state_doc.js
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Tests that a tenant migration recipient instance shows up as active in serverStatus metrics until
- * it has directly deleted its state doc (even if the state doc has actually already been deleted by
- * the TTL monitor).
- *
- * @tags: [
- * incompatible_with_macos,
- * incompatible_with_windows_tls,
- * # Uses pauseTenantMigrationRecipientBeforeDeletingStateDoc failpoint, which was added in 6.1
- * requires_fcv_61,
- * requires_majority_read_concern,
- * requires_persistence,
- * serverless,
- * ]
- */
-
-(function() {
-"use strict";
-
-load("jstests/libs/fail_point_util.js");
-load("jstests/libs/uuid_util.js");
-load("jstests/replsets/libs/tenant_migration_test.js");
-
-(() => {
- jsTest.log("Test case where the TTL monitor deletes the state doc first");
-
- const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true});
-
- const recipientPrimary = tmt.getRecipientPrimary();
- const fp =
- configureFailPoint(recipientPrimary, "pauseTenantMigrationRecipientBeforeDeletingStateDoc");
-
- jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations.");
- let recipientStats = tmt.getTenantMigrationStats(recipientPrimary);
- jsTest.log("recipientStats: " + tojson(recipientStats));
- assert.eq(0, recipientStats.currentMigrationsReceiving);
-
- jsTest.log("Start a tenant migration.");
- const tenantId = "testTenantId";
- const migrationId = extractUUIDFromObject(UUID());
- const migrationOpts = {
- migrationIdString: migrationId,
- tenantId: tenantId,
- recipientConnString: tmt.getRecipientConnString(),
- };
-
- TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts));
-
- jsTest.log("Wait for the migration to reach the failpoint before deleting the state doc.");
- fp.wait();
-
- jsTest.log("Confirm the state doc has been deleted by the TTL monitor");
- assert.soon(() => {
- return 0 ==
- recipientPrimary.getDB("config").getCollection("tenantMigrationRecipients").count();
- });
-
- jsTest.log("Confirm the instance still shows up as active in serverStatus.");
- recipientStats = tmt.getTenantMigrationStats(recipientPrimary);
- jsTest.log("recipientStats: " + tojson(recipientStats));
- assert.eq(1, recipientStats.currentMigrationsReceiving);
-
- // TODO (SERVER-61717): Confirm the instance still shows up in the POS map. Currently, the
- // instance is removed from the map as soon as its' state doc is deleted by the TTL monitor.
-
- jsTest.log("Turn off the failpoint.");
- fp.off();
-
- jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus");
- assert.soon(() => {
- recipientStats = tmt.getTenantMigrationStats(recipientPrimary);
- return 0 == recipientStats.currentMigrationsReceiving;
- });
-
- // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map.
-
- tmt.stop();
-})();
-
-(() => {
- jsTest.log("Test case where the instance deletes the state doc first");
-
- const tmt = new TenantMigrationTest({name: jsTestName(), quickGarbageCollection: true});
-
- const recipientPrimary = tmt.getRecipientPrimary();
-
- jsTest.log("Confirm the TTL index exists");
- const listIndexesRes1 = assert.commandWorked(
- recipientPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationRecipients"}));
- assert(listIndexesRes1.cursor.firstBatch.some(
- elem => elem.name === "TenantMigrationRecipientTTLIndex" && elem.key.expireAt === 1));
-
- jsTest.log("Drop the TTL index");
- assert.commandWorked(recipientPrimary.getDB("config").runCommand(
- {dropIndexes: "tenantMigrationRecipients", index: "TenantMigrationRecipientTTLIndex"}));
-
- jsTest.log("Confirm the TTL index no longer exists");
- const listIndexesRes2 = assert.commandWorked(
- recipientPrimary.getDB("config").runCommand({listIndexes: "tenantMigrationRecipients"}));
- assert(listIndexesRes2.cursor.firstBatch.every(elem => elem.key.expireAt == null));
-
- jsTest.log("Confirm serverStatus does not show any active or completed tenant migrations.");
- let recipientStats = tmt.getTenantMigrationStats(recipientPrimary);
- jsTest.log("recipientStats: " + tojson(recipientStats));
- assert.eq(0, recipientStats.currentMigrationsReceiving);
-
- jsTest.log("Start a tenant migration.");
- const tenantId = "testTenantId";
- const migrationId = extractUUIDFromObject(UUID());
- const migrationOpts = {
- migrationIdString: migrationId,
- tenantId: tenantId,
- recipientConnString: tmt.getRecipientConnString(),
- };
- TenantMigrationTest.assertCommitted(tmt.runMigration(migrationOpts));
-
- jsTest.log("Wait for the instance to delete the state doc");
- assert.soon(() => {
- return 0 ==
- recipientPrimary.getDB("config").getCollection("tenantMigrationRecipients").count();
- });
-
- jsTest.log("Confirm the instance eventually stops showing up as active in serverStatus");
- assert.soon(() => {
- recipientStats = tmt.getTenantMigrationStats(recipientPrimary);
- return 0 == recipientStats.currentMigrationsReceiving;
- });
-
- // TODO (SERVER-61717): Confirm the instance eventually stops showing up in the POS map.
-
- tmt.stop();
-})();
-})();
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index 9be2540d5ce..7f3b1e2474f 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -49,7 +49,6 @@
#include "mongo/db/namespace_string.h"
#include "mongo/db/op_observer/op_observer.h"
#include "mongo/db/ops/write_ops_exec.h"
-#include "mongo/db/persistent_task_store.h"
#include "mongo/db/pipeline/process_interface/mongo_process_interface.h"
#include "mongo/db/repl/cloner_utils.h"
#include "mongo/db/repl/data_replicator_external_state.h"
@@ -147,7 +146,6 @@ MONGO_FAIL_POINT_DEFINE(skipComparingRecipientAndDonorFCV);
MONGO_FAIL_POINT_DEFINE(autoRecipientForgetMigration);
MONGO_FAIL_POINT_DEFINE(skipFetchingCommittedTransactions);
MONGO_FAIL_POINT_DEFINE(skipFetchingRetryableWritesEntriesBeforeStartOpTime);
-MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationRecipientBeforeDeletingStateDoc);
// Fails before waiting for the state doc to be majority replicated.
MONGO_FAIL_POINT_DEFINE(failWhilePersistingTenantMigrationRecipientInstanceStateDoc);
@@ -2986,13 +2984,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
return storageInterface->dropCollection(opCtx.get(),
getOplogBufferNs(getMigrationUUID()));
})
- .then([this, self = shared_from_this(), token] {
- {
- stdx::lock_guard lk(_mutex);
- setPromiseOkifNotReady(lk, _forgetMigrationDurablePromise);
- }
- return _waitForGarbageCollectionDelayThenDeleteStateDoc(token);
- })
.thenRunOn(_recipientService->getInstanceCleanupExecutor())
.onCompletion([this,
self = shared_from_this(),
@@ -3001,7 +2992,16 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
// is safe even on shutDown/stepDown.
stdx::lock_guard lk(_mutex);
invariant(_dataSyncCompletionPromise.getFuture().isReady());
- if (!status.isOK()) {
+ if (status.isOK()) {
+ LOGV2(4881401,
+ "Migration marked to be garbage collectable due to "
+ "recipientForgetMigration "
+ "command",
+ "migrationId"_attr = getMigrationUUID(),
+ "tenantId"_attr = getTenantId(),
+ "expireAt"_attr = *_stateDoc.getExpireAt());
+ setPromiseOkifNotReady(lk, _forgetMigrationDurablePromise);
+ } else {
// We should only hit here on a stepDown/shutDown, or a 'conflicting migration'
// error.
LOGV2(4881402,
@@ -3016,48 +3016,6 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
.semi();
}
-SemiFuture<void> TenantMigrationRecipientService::Instance::_removeStateDoc(
- const CancellationToken& token) {
- return AsyncTry([this, self = shared_from_this()] {
- auto opCtxHolder = cc().makeOperationContext();
- auto opCtx = opCtxHolder.get();
-
- pauseTenantMigrationRecipientBeforeDeletingStateDoc.pauseWhileSet(opCtx);
-
- PersistentTaskStore<TenantMigrationRecipientDocument> store(_stateDocumentsNS);
- store.remove(
- opCtx,
- BSON(TenantMigrationRecipientDocument::kIdFieldName << _migrationUuid),
- WriteConcernOptions(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0)));
- })
- .until([](Status status) { return status.isOK(); })
- .withBackoffBetweenIterations(kExponentialBackoff)
- .on(**_scopedExecutor, token)
- .semi();
-}
-
-SemiFuture<void>
-TenantMigrationRecipientService::Instance::_waitForGarbageCollectionDelayThenDeleteStateDoc(
- const CancellationToken& token) {
- stdx::lock_guard<Latch> lg(_mutex);
- LOGV2(8423364,
- "Waiting for garbage collection delay before deleting state document",
- "migrationId"_attr = _migrationUuid,
- "tenantId"_attr = _tenantId,
- "expireAt"_attr = *_stateDoc.getExpireAt());
-
- return (**_scopedExecutor)
- ->sleepUntil(*_stateDoc.getExpireAt(), token)
- .then([this, self = shared_from_this(), token]() {
- LOGV2(8423365,
- "Deleting state document",
- "migrationId"_attr = _migrationUuid,
- "tenantId"_attr = _tenantId);
- return _removeStateDoc(token);
- })
- .semi();
-}
-
const UUID& TenantMigrationRecipientService::Instance::getMigrationUUID() const {
return _migrationUuid;
}
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.h b/src/mongo/db/repl/tenant_migration_recipient_service.h
index 22de9a00fd1..bce11fb695d 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.h
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.h
@@ -214,9 +214,6 @@ public:
private:
friend class TenantMigrationRecipientServiceTest;
- const NamespaceString _stateDocumentsNS =
- NamespaceString::kTenantMigrationRecipientsNamespace;
-
using ConnectionPair =
std::pair<std::unique_ptr<DBClientConnection>, std::unique_ptr<DBClientConnection>>;
@@ -337,16 +334,6 @@ public:
SemiFuture<void> _markStateDocAsGarbageCollectable();
/**
- * Deletes the state document. Does not return the opTime for the delete, since it's not
- * necessary to wait for this delete to be majority committed (this is one of the last steps
- * in the chain, and if the delete rolls back, the new primary will re-do the delete).
- */
- SemiFuture<void> _removeStateDoc(const CancellationToken& token);
-
- SemiFuture<void> _waitForGarbageCollectionDelayThenDeleteStateDoc(
- const CancellationToken& token);
-
- /**
* Creates a client, connects it to the donor. If '_transientSSLParams' is not none, uses
* the migration certificate to do SSL authentication. Otherwise, uses the default
* authentication mode. Throws a user assertion on failure.
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp b/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp
index 3e78be1a5f7..acc80047451 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service_test.cpp
@@ -2722,12 +2722,6 @@ TEST_F(TenantMigrationRecipientServiceTest, RecipientForgetMigration_WaitUntilSt
AssertionException,
opCtx->getTimeoutError());
- // Hang the chain before deleting the state doc until it can be verified that the state doc was
- // persisted.
- FailPointEnableBlock fpDeletingStateDoc("pauseTenantMigrationRecipientBeforeDeletingStateDoc",
- BSON("action"
- << "hang"));
-
{
// Hang the chain after persisting the state doc.
FailPointEnableBlock fpPersistingStateDoc(