diff options
author | Cheahuychou Mao <mao.cheahuychou@gmail.com> | 2021-04-25 02:12:37 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-05-05 18:39:13 +0000 |
commit | 5f58ab0e48fbe1293a7abd8b770a3ec50512651b (patch) | |
tree | 83b55f57ed90d2ef989820fa33b738425dd6ff61 | |
parent | 202937fa78084e21b17af47d1dc2af1b9be5a38f (diff) | |
download | mongo-5f58ab0e48fbe1293a7abd8b770a3ec50512651b.tar.gz |
SERVER-54302 Write tenant migration test when the state doc collection is dropped
(cherry picked from commit b37de758aa8a5fcc74d8af8b7556e3a18d76e90c)
7 files changed, 262 insertions, 9 deletions
diff --git a/jstests/replsets/tenant_migration_drop_state_doc_collection.js b/jstests/replsets/tenant_migration_drop_state_doc_collection.js new file mode 100644 index 00000000000..e8b1d36649c --- /dev/null +++ b/jstests/replsets/tenant_migration_drop_state_doc_collection.js @@ -0,0 +1,211 @@ +/** + * Tests dropping the donor and recipient state doc collections in the middle of a tenant migration. + * + * @tags: [requires_fcv_47, requires_majority_read_concern, requires_persistence, + * incompatible_with_eft, incompatible_with_windows_tls] + */ + +(function() { +"use strict"; + +load("jstests/libs/fail_point_util.js"); +load("jstests/libs/uuid_util.js"); +load("jstests/replsets/libs/tenant_migration_test.js"); +load("jstests/replsets/libs/tenant_migration_util.js"); + +const kMigrationFpNames = [ + "pauseTenantMigrationAfterPersistingInitialDonorStateDoc", + "pauseTenantMigrationBeforeLeavingDataSyncState", + "pauseTenantMigrationBeforeLeavingBlockingState", + "abortTenantMigrationBeforeLeavingBlockingState", + null, +]; +const kTenantId = "testTenantId"; +let testNum = 0; + +function makeTenantId() { + return kTenantId + testNum++; +} + +function makeMigrationOpts(tenantMigrationTest, tenantId) { + return { + migrationIdString: extractUUIDFromObject(UUID()), + tenantId: tenantId, + recipientConnString: tenantMigrationTest.getRecipientConnString() + }; +} + +/** + * Starts a migration and then either waits for the failpoint or lets the migration run to + * completion. Next, drops the donor and/or recipient state doc collections and asserts that the + * migration is no longer running on the donor and/or recipient. Then, retries the migration (with a + * different migration id if 'retryWithDifferentMigrationId' is true) and verifies that the retry + * succeeds or fails as expected. + */ +function testDroppingStateDocCollections(tenantMigrationTest, fpName, { + dropDonorsCollection = false, + dropRecipientsCollection = false, + retryWithDifferentMigrationId = false, + expectedRunMigrationError, + expectedAbortReason +}) { + assert(dropDonorsCollection || dropRecipientsCollection); + + jsTest.log(`Testing with failpoint: ${fpName} dropDonorsCollection: ${ + dropDonorsCollection}, dropRecipientsCollection: ${ + dropRecipientsCollection}, retryWithDifferentMigrationId: ${ + retryWithDifferentMigrationId}`); + + const tenantId = makeTenantId(); + const migrationOptsBeforeDrop = makeMigrationOpts(tenantMigrationTest, tenantId); + let donorPrimary = tenantMigrationTest.getDonorPrimary(); + let recipientPrimary = tenantMigrationTest.getRecipientPrimary(); + + let fp; + if (fpName) { + fp = configureFailPoint(donorPrimary, fpName, {tenantId: tenantId}); + assert.commandWorked( + tenantMigrationTest.startMigration(migrationOptsBeforeDrop, + false /* retryOnRetryableErrors */, + false /* automaticForgetMigration */)); + fp.wait(); + } else { + assert.commandWorked( + tenantMigrationTest.runMigration(migrationOptsBeforeDrop, + false /* retryOnRetryableErrors */, + false /* automaticForgetMigration */)); + } + + if (dropDonorsCollection) { + assert(donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).drop()); + let donorDoc = donorPrimary.getCollection(TenantMigrationTest.kConfigDonorsNS).findOne({ + tenantId: tenantId + }); + assert.eq(donorDoc, null); + + const currOpDonor = assert.commandWorked( + donorPrimary.adminCommand({currentOp: true, desc: "tenant donor migration"})); + assert.eq(currOpDonor.inprog.length, 0); + + // Trigger stepup to allow the donor service to rebuild. + assert.commandWorked(donorPrimary.adminCommand({replSetStepDown: 30, force: true})); + donorPrimary = tenantMigrationTest.getDonorRst().getPrimary(); + } + + if (dropRecipientsCollection) { + assert(recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS).drop({ + writeConcern: {w: "majority"} + })); + let recipientDoc = + recipientPrimary.getCollection(TenantMigrationTest.kConfigRecipientsNS).findOne({ + tenantId: tenantId + }); + assert.eq(recipientDoc, null); + const currOpRecipient = assert.commandWorked( + recipientPrimary.adminCommand({currentOp: true, desc: "tenant recipient migration"})); + assert.eq(currOpRecipient.inprog.length, 0); + + // Trigger stepup to allow the recipient service to rebuild. + assert.commandWorked(recipientPrimary.adminCommand({replSetStepDown: 30, force: true})); + recipientPrimary = tenantMigrationTest.getRecipientRst().getPrimary(); + } + + if (fp) { + fp.off(); + } + const migrationOptsAfterDrop = retryWithDifferentMigrationId + ? makeMigrationOpts(tenantMigrationTest, tenantId) + : migrationOptsBeforeDrop; + const runMigrationRes = tenantMigrationTest.runMigration(migrationOptsAfterDrop, + false /* retryOnRetryableErrors */, + false /* automaticForgetMigration */); + if (expectedRunMigrationError) { + assert.commandFailedWithCode(runMigrationRes, expectedRunMigrationError); + } else { + assert.commandWorked(runMigrationRes); + if (expectedAbortReason) { + assert.eq(runMigrationRes.state, TenantMigrationTest.DonorState.kAborted); + assert.eq(runMigrationRes.abortReason.code, expectedAbortReason); + } else { + assert.eq(runMigrationRes.state, TenantMigrationTest.DonorState.kCommitted); + } + + assert.commandWorked( + tenantMigrationTest.forgetMigration(migrationOptsAfterDrop.migrationIdString)); + tenantMigrationTest.waitForMigrationGarbageCollection( + UUID(migrationOptsAfterDrop.migrationIdString)); + } + + if (retryWithDifferentMigrationId && !dropDonorsCollection) { + assert(dropRecipientsCollection); + // The original migration will still run to completion after the recipient service rebuilds + // since the donor will retry the recipientSyncData command on Interrupted error. Wait for + // the migration to complete and clean up to avoid concurrent migrations when the next test + // case starts. + assert.commandWorked( + tenantMigrationTest.waitForMigrationToComplete(migrationOptsBeforeDrop)); + assert.commandWorked( + tenantMigrationTest.forgetMigration(migrationOptsBeforeDrop.migrationIdString)); + tenantMigrationTest.waitForMigrationGarbageCollection( + UUID(migrationOptsAfterDrop.migrationIdString)); + } +} + +const tenantMigrationTest = new TenantMigrationTest({ + name: jsTestName(), + sharedOptions: { + setParameter: { + tenantMigrationGarbageCollectionDelayMS: 1, + ttlMonitorSleepSecs: 1, + } + }, + initiateRstWithHighElectionTimeout: false +}); + +if (!tenantMigrationTest.isFeatureFlagEnabled()) { + jsTestLog("Skipping test because the tenant migrations feature flag is disabled"); + return; +} + +jsTest.log("Test dropping donor and recipient state doc collections during a migration."); +kMigrationFpNames.forEach(fpName => { + testDroppingStateDocCollections( + tenantMigrationTest, fpName, {dropDonorsCollection: true, dropRecipientsCollection: true}); + + testDroppingStateDocCollections(tenantMigrationTest, fpName, { + dropDonorsCollection: true, + dropRecipientsCollection: true, + retryWithDifferentMigrationId: true + }); + + testDroppingStateDocCollections(tenantMigrationTest, fpName, { + dropDonorsCollection: false, + dropRecipientsCollection: true, + expectedAbortReason: (fpName == "abortTenantMigrationBeforeLeavingBlockingState") + ? ErrorCodes.InternalError + : null + }); + + testDroppingStateDocCollections(tenantMigrationTest, fpName, { + dropDonorsCollection: false, + dropRecipientsCollection: true, + retryWithDifferentMigrationId: true, + // The original migration is still running on the donor so the retry is expected to fail + // with ConflictingOperationInProgress. + expectedRunMigrationError: ErrorCodes.ConflictingOperationInProgress + }); + + const sentBlockTimestampToRecipient = + (!fpName || fpName == "pauseTenantMigrationBeforeLeavingBlockingState" || + fpName == "abortTenantMigrationBeforeLeavingBlockingState"); + testDroppingStateDocCollections(tenantMigrationTest, fpName, { + dropDonorsCollection: true, + dropRecipientsCollection: false, + // The retry causes the donor to restart the migration and send a different + // returnAfterReachingTimestamp/blockTimestamp to the recipient, which is illegal. + expectedAbortReason: sentBlockTimestampToRecipient ? ErrorCodes.IllegalOperation : null + }); +}); + +tenantMigrationTest.stop(); +})(); diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp index 96738408ebc..b7ad1166325 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp +++ b/src/mongo/db/repl/tenant_migration_access_blocker_registry.cpp @@ -62,9 +62,7 @@ void TenantMigrationAccessBlockerRegistry::add(StringData tenantId, _tenantMigrationAccessBlockers.emplace(tenantId, mtabPair); } -void TenantMigrationAccessBlockerRegistry::remove(StringData tenantId, MtabType type) { - stdx::lock_guard<Latch> lg(_mutex); - +void TenantMigrationAccessBlockerRegistry::_remove(WithLock, StringData tenantId, MtabType type) { auto it = _tenantMigrationAccessBlockers.find(tenantId); invariant(it != _tenantMigrationAccessBlockers.end()); auto mtabPair = it->second; @@ -75,6 +73,19 @@ void TenantMigrationAccessBlockerRegistry::remove(StringData tenantId, MtabType } } +void TenantMigrationAccessBlockerRegistry::remove(StringData tenantId, MtabType type) { + stdx::lock_guard<Latch> lg(_mutex); + _remove(lg, tenantId, type); +} + +void TenantMigrationAccessBlockerRegistry::removeAll(MtabType type) { + stdx::lock_guard<Latch> lg(_mutex); + + for (auto& [tenantId, _] : _tenantMigrationAccessBlockers) { + _remove(lg, tenantId, type); + } +} + boost::optional<MtabPair> TenantMigrationAccessBlockerRegistry::getTenantMigrationAccessBlockerForDbName(StringData dbName) { stdx::lock_guard<Latch> lg(_mutex); diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_registry.h b/src/mongo/db/repl/tenant_migration_access_blocker_registry.h index 51f06185868..2d6c588a883 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_registry.h +++ b/src/mongo/db/repl/tenant_migration_access_blocker_registry.h @@ -95,6 +95,12 @@ public: * Invariants that an entry for tenantId exists, and then removes the entry for (tenantId, mtab) */ void remove(StringData tenantId, TenantMigrationAccessBlocker::BlockerType type); + void _remove(WithLock, StringData tenantId, TenantMigrationAccessBlocker::BlockerType type); + + /** + * Removes all mtabs of the given type. + */ + void removeAll(TenantMigrationAccessBlocker::BlockerType type); /** * Iterates through each of the TenantMigrationAccessBlockers and diff --git a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp index f7c032492da..a02d6137859 100644 --- a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp @@ -270,6 +270,20 @@ void TenantMigrationDonorOpObserver::onDelete(OperationContext* opCtx, } } +repl::OpTime TenantMigrationDonorOpObserver::onDropCollection(OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + std::uint64_t numRecords, + const CollectionDropType dropType) { + if (collectionName == NamespaceString::kTenantMigrationDonorsNamespace) { + opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) { + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .removeAll(TenantMigrationAccessBlocker::BlockerType::kDonor); + }); + } + return {}; +} + void TenantMigrationDonorOpObserver::onMajorityCommitPointUpdate( ServiceContext* service, const repl::OpTime& newCommitPoint) { TenantMigrationAccessBlockerRegistry::get(service).onMajorityCommitPointUpdate(newCommitPoint); diff --git a/src/mongo/db/repl/tenant_migration_donor_op_observer.h b/src/mongo/db/repl/tenant_migration_donor_op_observer.h index d6874272e0a..68741e4ab6f 100644 --- a/src/mongo/db/repl/tenant_migration_donor_op_observer.h +++ b/src/mongo/db/repl/tenant_migration_donor_op_observer.h @@ -125,9 +125,7 @@ public: const NamespaceString& collectionName, OptionalCollectionUUID uuid, std::uint64_t numRecords, - CollectionDropType dropType) final { - return repl::OpTime(); - } + CollectionDropType dropType) final; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, diff --git a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp index e16004be680..01413ef3023 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp +++ b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp @@ -166,5 +166,20 @@ void TenantMigrationRecipientOpObserver::onDelete(OperationContext* opCtx, } } +repl::OpTime TenantMigrationRecipientOpObserver::onDropCollection( + OperationContext* opCtx, + const NamespaceString& collectionName, + OptionalCollectionUUID uuid, + std::uint64_t numRecords, + const CollectionDropType dropType) { + if (collectionName == NamespaceString::kTenantMigrationRecipientsNamespace) { + opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) { + TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()) + .removeAll(TenantMigrationAccessBlocker::BlockerType::kRecipient); + }); + } + return {}; +} + } // namespace repl } // namespace mongo diff --git a/src/mongo/db/repl/tenant_migration_recipient_op_observer.h b/src/mongo/db/repl/tenant_migration_recipient_op_observer.h index af2899286a5..1ecf7747251 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_op_observer.h +++ b/src/mongo/db/repl/tenant_migration_recipient_op_observer.h @@ -126,9 +126,7 @@ public: const NamespaceString& collectionName, OptionalCollectionUUID uuid, std::uint64_t numRecords, - CollectionDropType dropType) final { - return repl::OpTime(); - } + CollectionDropType dropType) final; void onDropIndex(OperationContext* opCtx, const NamespaceString& nss, |