summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVishnu Kaushik <vishnu.kaushik@mongodb.com>2021-02-11 22:05:17 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-02-16 16:24:03 +0000
commit09c474af1cca7c54d3f93ac46d8fbe9549cd4689 (patch)
treeae0401389dadefed36a28317c9bd6e23c79ddf06
parent932032e1f3749d25cf17fd477bf9feddf1a2bdeb (diff)
downloadmongo-09c474af1cca7c54d3f93ac46d8fbe9549cd4689.tar.gz
SERVER-54478 Fix issue where recipient primary failover after forget migration can lead to the oplog buffer collection not being dropped on new primary
-rw-r--r--jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js71
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp11
2 files changed, 77 insertions, 5 deletions
diff --git a/jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js b/jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js
new file mode 100644
index 00000000000..53b93f73d57
--- /dev/null
+++ b/jstests/replsets/tenant_migration_recipient_stepdown_after_forget.js
@@ -0,0 +1,71 @@
+/**
+ * Tests whether the new recipient primary properly processes a forgetMigration when the original
+ * primary is made to step down after marking as garbage collectable. The oplog buffer collection
+ * must be dropped.
+ *
+ * @tags: [requires_fcv_49, requires_replication, incompatible_with_windows_tls]
+ */
+
+(function() {
+
+"use strict";
+load("jstests/libs/uuid_util.js"); // For extractUUIDFromObject().
+load("jstests/libs/fail_point_util.js"); // For configureFailPoint().
+load("jstests/libs/parallelTester.js"); // For Thread(), used for async forgetMigration.
+load("jstests/replsets/libs/tenant_migration_test.js");
+load("jstests/replsets/libs/tenant_migration_util.js");
+
+const tenantMigrationTest =
+ new TenantMigrationTest({name: jsTestName(), sharedOptions: {nodes: 2}});
+
+if (!tenantMigrationTest.isFeatureFlagEnabled()) {
+ jsTestLog("Skipping test because the tenant migrations feature flag is disabled");
+ tenantMigrationTest.stop();
+ return;
+}
+
+const kMigrationId = UUID();
+const kTenantId = 'testTenantId';
+const kReadPreference = {
+ mode: "primary"
+};
+const migrationOpts = {
+ migrationIdString: extractUUIDFromObject(kMigrationId),
+ tenantId: kTenantId,
+ readPreference: kReadPreference
+};
+
+tenantMigrationTest.runMigration(
+ migrationOpts, true /* retryOnRetryableErrors */, false /* automaticForgetMigration */);
+
+const fpBeforeDroppingOplogBufferCollection =
+ configureFailPoint(tenantMigrationTest.getRecipientPrimary(),
+ "fpBeforeDroppingOplogBufferCollection",
+ {action: "hang"});
+
+jsTestLog("Issuing a forget migration command.");
+const forgetMigrationThread =
+ new Thread(TenantMigrationUtil.forgetMigrationAsync,
+ migrationOpts.migrationIdString,
+ TenantMigrationUtil.createRstArgs(tenantMigrationTest.getDonorRst()),
+ true /* retryOnRetryableErrors */);
+forgetMigrationThread.start();
+
+fpBeforeDroppingOplogBufferCollection.wait();
+
+jsTestLog("Step up a new recipient primary.");
+assert.commandWorked(tenantMigrationTest.getRecipientRst().getSecondaries()[0].adminCommand(
+ {replSetStepUp: ReplSetTest.kForeverSecs, force: true}));
+
+fpBeforeDroppingOplogBufferCollection.off();
+
+jsTestLog("Waiting for forget migration to complete.");
+assert.commandWorked(forgetMigrationThread.returnData());
+
+const configDBCollections =
+ tenantMigrationTest.getRecipientPrimary().getDB('config').getCollectionNames();
+assert(!configDBCollections.includes('repl.migration.oplog_' + migrationOpts.migrationIdString),
+ configDBCollections);
+
+tenantMigrationTest.stop();
+})(); \ No newline at end of file
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index d0780d91cd2..bf964c2e834 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -100,6 +100,7 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeTaskCompletion);
MONGO_FAIL_POINT_DEFINE(fpAfterReceivingRecipientForgetMigration);
MONGO_FAIL_POINT_DEFINE(hangAfterCreatingRSM);
MONGO_FAIL_POINT_DEFINE(skipRetriesWhenConnectingToDonorHost);
+MONGO_FAIL_POINT_DEFINE(fpBeforeDroppingOplogBufferCollection);
namespace {
// We never restart just the oplog fetcher. If a failure occurs, we restart the whole state machine
@@ -1377,15 +1378,14 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
pauseAfterRunTenantMigrationRecipientInstance.pauseWhileSet();
- uassert(ErrorCodes::TenantMigrationForgotten,
- str::stream() << "Migration " << getMigrationUUID()
- << " already marked for garbage collect",
- !_stateDoc.getExpireAt());
-
return _initializeStateDoc(lk);
})
.then([this, self = shared_from_this()] {
_stateDocPersistedPromise.emplaceValue();
+ uassert(ErrorCodes::TenantMigrationForgotten,
+ str::stream() << "Migration " << getMigrationUUID()
+ << " already marked for garbage collect",
+ !_stateDoc.getExpireAt());
_stopOrHangOnFailPoint(&fpAfterPersistingTenantMigrationRecipientInstanceStateDoc);
return _createAndConnectClients();
})
@@ -1604,6 +1604,7 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run(
})
.then([this, self = shared_from_this()] { return _markStateDocAsGarbageCollectable(); })
.then([this, self = shared_from_this()] {
+ _stopOrHangOnFailPoint(&fpBeforeDroppingOplogBufferCollection);
auto opCtx = cc().makeOperationContext();
auto storageInterface = StorageInterface::get(opCtx.get());