diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2021-10-26 22:50:11 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-10-27 05:24:44 +0000 |
commit | 2a0cafd88c343e3df4c20f8d94c5a12fb21bd9c1 (patch) | |
tree | 588eb557b90c2bf64bb7002bd7c92ab4f6c8c619 | |
parent | 802a6724a5ffaab84ac631a0091a806ad97fde6d (diff) | |
download | mongo-2a0cafd88c343e3df4c20f8d94c5a12fb21bd9c1.tar.gz |
Revert "SERVER-59907 Add an explicit stage field to tenant migration currentOp output (donor side)"
This reverts commit 8fa2f2672585346c535a5c65e9acbb32eeddc8d8.
-rw-r--r-- | jstests/replsets/tenant_migration_donor_currentop_stage.js | 143 | ||||
-rw-r--r-- | src/mongo/db/repl/tenant_migration_donor_service.cpp | 44 | ||||
-rw-r--r-- | src/mongo/db/repl/tenant_migration_donor_service.h | 60 |
3 files changed, 7 insertions, 240 deletions
diff --git a/jstests/replsets/tenant_migration_donor_currentop_stage.js b/jstests/replsets/tenant_migration_donor_currentop_stage.js deleted file mode 100644 index 39bb6033cf8..00000000000 --- a/jstests/replsets/tenant_migration_donor_currentop_stage.js +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Tests the human readable "donorStage" currentOp field at various times - * during a migration. - * - * @tags: [ - * incompatible_with_eft, - * incompatible_with_macos, - * incompatible_with_windows_tls, - * requires_majority_read_concern, - * requires_persistence, - * multiversion_incompatible, - * ] - */ - -(function() { -"use strict"; - -load("jstests/libs/fail_point_util.js"); -load("jstests/libs/parallelTester.js"); -load("jstests/libs/uuid_util.js"); -load("jstests/replsets/libs/tenant_migration_test.js"); -load("jstests/replsets/libs/tenant_migration_util.js"); - -// Main test runner. -function runTest(logs, failPoints, descriptions, forgetMigrationCutoffIndex) { - jsTestLog("Setting up test."); - const tenantMigrationTest = new TenantMigrationTest({name: jsTestName()}); - - const donorPrimary = tenantMigrationTest.getDonorPrimary(); - - // Turn on each failpoint ahead of time. We will reach them one-at-a-time. - let fps = []; - for (let i = 0; i < failPoints.length; i++) { - fps.push(configureFailPoint(donorPrimary, failPoints[i])); - } - - const tenantId = "testTenantId"; - const migrationId = extractUUIDFromObject(UUID()); - const migrationOpts = { - migrationIdString: migrationId, - tenantId: tenantId, - recipientConnString: tenantMigrationTest.getRecipientConnString(), - }; - - jsTestLog("Starting migration"); - const donorRstArgs = TenantMigrationUtil.createRstArgs(tenantMigrationTest.getDonorRst()); - const startMigrationThread = - new Thread(TenantMigrationUtil.runMigrationAsync, migrationOpts, donorRstArgs); - startMigrationThread.start(); - - for (let i = 0; i < forgetMigrationCutoffIndex; i++) { - checkStage(logs[i], fps[i], descriptions[i], donorPrimary); - } - - jsTestLog("Waiting for migration to complete"); - startMigrationThread.join(); - TenantMigrationTest.assertCommitted( - tenantMigrationTest.waitForMigrationToComplete(migrationOpts)); - - jsTestLog("Forgetting the migration"); - const forgetMigrationThread = new Thread(TenantMigrationUtil.forgetMigrationAsync, - migrationOpts.migrationIdString, - donorRstArgs, - true /* retryOnRetryableErrors */); - forgetMigrationThread.start(); - - for (let i = forgetMigrationCutoffIndex; i < descriptions.length; i++) { - checkStage(logs[i], fps[i], descriptions[i], donorPrimary); - } - - forgetMigrationThread.join(); - - jsTestLog("Shutting down test"); - tenantMigrationTest.stop(); -} - -// Checks requested stage against expectations. -function checkStage(log, fp, desc, node) { - jsTestLog(log); - - fp.wait(); - - const res = - assert.commandWorked(node.adminCommand({currentOp: true, desc: "tenant donor migration"})); - assert.eq(res.inprog.length, 1, () => tojson(res)); - const instance = res.inprog[0]; - assert.eq(instance.donorStage, desc, () => tojson(res)); - - fp.off(); -} - -runTest( - [ - "[1] Testing state: kUnstarted", - "[2] Testing state: kEnteringAbortingIndexBuildsState", - "[3] Testing state: kAbortingIndexBuilds", - "[4] Testing state: kFetchingClusterTimeKeys", - "[5] Testing state: kEnteringDataSyncState", - "[6] Testing state: kWaitingForRecipientConsistency", - "[7] Testing state: kEnteringBlockingState", - "[8] Testing state: kWaitingForRecipientBlockTs", - "[9] Testing state: kEnteringCommittedState", - "[10] Testing state: kWaitingForDonorForgetMigration", - /* forgetMigration cutoff */ - "[11] Testing state: kWaitingForRecipientForgetMigration", - "[12] Testing state: kMarkingMigrationGarbageCollectable", - "[13] Testing state: kForgotten", - ], - [ - "pauseTenantMigrationBeforeEnteringFutureChain", - "pauseTenantMigrationAfterPersistingInitialDonorStateDoc", - "pauseTenantMigrationBeforeAbortingIndexBuilds", - "pauseTenantMigrationDonorBeforeWaitingForKeysToReplicate", - "pauseTenantMigrationAfterFetchingAndStoringKeys", - "pauseTenantMigrationBeforeLeavingDataSyncState", - "pauseTenantMigrationDonorWhileEnteringBlockingState", - "pauseTenantMigrationBeforeLeavingBlockingState", - "pauseTenantMigrationBeforeEnteringCommittedState", - "pauseWhileWaitingForDonorForgetMigration", - /* forgetMigration cutoff */ - "pauseTenantMigrationBeforeSendingRecipientForgetMigration", - "pauseTenantMigrationDonorBeforeMarkingStateGarbageCollectable", - "pauseTenantMigrationBeforeLeavingFutureChain", - ], - [ - "Migration not yet started.", - "Updating its state document to enter 'aborting index builds' state.", - "Aborting index builds.", - "Fetching cluster time key documents from recipient.", - "Updating its state document to enter 'data sync' state.", - "Waiting for recipient to finish data sync and become consistent.", - "Updating its state doc to enter 'blocking' state.", - "Waiting for receipient to reach the block timestamp.", - "Updating its state document to enter 'committed' state.", - "Waiting to receive 'donorForgetMigration' command.", - /* forgetMigration cutoff */ - "Waiting for recipient to forget migration.", - "Marking migration as garbage-collectable.", - "Migration has been forgotten.", - ], - 10 /* forgetMigrationCutoffIndex */ -); -}()); diff --git a/src/mongo/db/repl/tenant_migration_donor_service.cpp b/src/mongo/db/repl/tenant_migration_donor_service.cpp index 0c4bab73050..7cf45761185 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_service.cpp @@ -75,12 +75,6 @@ MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorWhileUpdatingStateDoc); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeInsertingDonorStateDoc); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeCreatingStateDocumentTTLIndex); MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeCreatingExternalKeysTTLIndex); -MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeAbortingIndexBuilds); -MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationDonorWhileEnteringBlockingState); -MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeEnteringCommittedState); -MONGO_FAIL_POINT_DEFINE(pauseWhileWaitingForDonorForgetMigration); -MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeSendingRecipientForgetMigration); -MONGO_FAIL_POINT_DEFINE(pauseTenantMigrationBeforeLeavingFutureChain); const std::string kTTLIndexName = "TenantMigrationDonorTTLIndex"; const std::string kExternalKeysTTLIndexName = "ExternalKeysTTLIndex"; @@ -396,7 +390,6 @@ boost::optional<BSONObj> TenantMigrationDonorService::Instance::reportForCurrent if (_stateDoc.getAbortReason()) { bob.append("abortReason", *_stateDoc.getAbortReason()); } - bob.append("donorStage", describeStage(_donorStage)); return bob.obj(); } @@ -814,32 +807,26 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( return ExecutorFuture(**executor) .then([this, self = shared_from_this(), executor, token] { - _updateDonorStage(DonorStage::kEnteringAbortingIndexBuildsState); // Note we do not use the abort migration token here because the donorAbortMigration // command waits for a decision to be persisted which will not happen if inserting the // initial state document fails. return _enterAbortingIndexBuildsState(executor, token); }) .then([this, self = shared_from_this(), executor, abortToken] { - _updateDonorStage(DonorStage::kAbortingIndexBuilds); _abortIndexBuilds(abortToken); }) .then([this, self = shared_from_this(), executor, recipientTargeterRS, abortToken] { - _updateDonorStage(DonorStage::kFetchingClusterTimeKeys); return _fetchAndStoreRecipientClusterTimeKeyDocs( executor, recipientTargeterRS, abortToken); }) .then([this, self = shared_from_this(), executor, abortToken] { - _updateDonorStage(DonorStage::kEnteringDataSyncState); return _enterDataSyncState(executor, abortToken); }) .then([this, self = shared_from_this(), executor, recipientTargeterRS, abortToken] { - _updateDonorStage(DonorStage::kWaitingForRecipientConsistency); return _waitForRecipientToBecomeConsistentAndEnterBlockingState( executor, recipientTargeterRS, abortToken); }) .then([this, self = shared_from_this(), executor, recipientTargeterRS, abortToken] { - _updateDonorStage(DonorStage::kWaitingForRecipientBlockTs); return _waitForRecipientToReachBlockTimestampAndEnterCommittedState( executor, recipientTargeterRS, abortToken); }) @@ -880,20 +867,15 @@ SemiFuture<void> TenantMigrationDonorService::Instance::run( // whatever canceled the token will also set the promise with an appropriate error. checkForTokenInterrupt(token); - { - stdx::lock_guard<Latch> lg(_mutex); - - LOGV2(4920400, - "Marked migration state as garbage collectable", - "migrationId"_attr = _migrationUuid, - "expireAt"_attr = _stateDoc.getExpireAt(), - "status"_attr = status); + stdx::lock_guard<Latch> lg(_mutex); - setPromiseFromStatusIfNotReady(lg, _completionPromise, status); - _updateDonorStage(DonorStage::kForgotten); - } + LOGV2(4920400, + "Marked migration state as garbage collectable", + "migrationId"_attr = _migrationUuid, + "expireAt"_attr = _stateDoc.getExpireAt(), + "status"_attr = status); - pauseTenantMigrationBeforeLeavingFutureChain.pauseWhileSet(); + setPromiseFromStatusIfNotReady(lg, _completionPromise, status); }) .semi(); } @@ -929,8 +911,6 @@ void TenantMigrationDonorService::Instance::_abortIndexBuilds(const Cancellation } } - pauseTenantMigrationBeforeAbortingIndexBuilds.pauseWhileSet(); - // Before starting data sync, abort any in-progress index builds. No new index // builds can start while we are doing this because the mtab prevents it. { @@ -1113,8 +1093,6 @@ TenantMigrationDonorService::Instance::_waitForRecipientToBecomeConsistentAndEnt }) .then([this, self = shared_from_this(), executor, token] { // Enter "blocking" state. - _updateDonorStage(DonorStage::kEnteringBlockingState); - pauseTenantMigrationDonorWhileEnteringBlockingState.pauseWhileSet(); return _updateStateDoc(executor, TenantMigrationDonorStateEnum::kBlocking, token) .then([this, self = shared_from_this(), executor, token](repl::OpTime opTime) { return _waitForMajorityWriteConcern(executor, std::move(opTime), token); @@ -1193,8 +1171,6 @@ TenantMigrationDonorService::Instance::_waitForRecipientToReachBlockTimestampAnd }) .then([this, self = shared_from_this(), executor, token] { // Enter "commit" state. - _updateDonorStage(DonorStage::kEnteringCommittedState); - pauseTenantMigrationBeforeEnteringCommittedState.pauseWhileSet(); return _updateStateDoc(executor, TenantMigrationDonorStateEnum::kCommitted, token) .then([this, self = shared_from_this(), executor, token](repl::OpTime opTime) { return _waitForMajorityWriteConcern(executor, std::move(opTime), token) @@ -1273,7 +1249,6 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG const std::shared_ptr<executor::ScopedTaskExecutor>& executor, std::shared_ptr<RemoteCommandTargeter> recipientTargeterRS, const CancellationToken& token) { - _updateDonorStage(DonorStage::kWaitingForDonorForgetMigration); auto expiredAt = [&]() { stdx::lock_guard<Latch> lg(_mutex); return _stateDoc.getExpireAt(); @@ -1287,8 +1262,6 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG return ExecutorFuture(**executor); } - pauseWhileWaitingForDonorForgetMigration.pauseWhileSet(); - // Wait for the donorForgetMigration command. // If donorAbortMigration has already canceled work, the abortMigrationSource would be // canceled and continued usage of the source would lead to incorrect behavior. Thus, we @@ -1297,12 +1270,9 @@ TenantMigrationDonorService::Instance::_waitForForgetMigrationThenMarkMigrationG return std::move(_receiveDonorForgetMigrationPromise.getFuture()) .thenRunOn(**executor) .then([this, self = shared_from_this(), executor, recipientTargeterRS, token] { - _updateDonorStage(DonorStage::kWaitingForRecipientForgetMigration); - pauseTenantMigrationBeforeSendingRecipientForgetMigration.pauseWhileSet(); return _sendRecipientForgetMigrationCommand(executor, recipientTargeterRS, token); }) .then([this, self = shared_from_this(), executor, token] { - _updateDonorStage(DonorStage::kMarkingMigrationGarbageCollectable); // Note marking the keys as garbage collectable is not atomic with marking the // state document garbage collectable, so an interleaved failover can lead the // keys to be deleted before the state document has an expiration date. This is diff --git a/src/mongo/db/repl/tenant_migration_donor_service.h b/src/mongo/db/repl/tenant_migration_donor_service.h index 7df69538ad7..9663b63d006 100644 --- a/src/mongo/db/repl/tenant_migration_donor_service.h +++ b/src/mongo/db/repl/tenant_migration_donor_service.h @@ -258,63 +258,6 @@ public: return recipientCmdThreadPoolLimits; } - /** - * This enum and its accompanying methods serve to provide a human-readable - * description of what the donor is currently doing, in the currentOp output. - * See "describeStage" for a summary of each stage. - */ - enum DonorStage { - kUnstarted, - kEnteringAbortingIndexBuildsState, - kAbortingIndexBuilds, - kFetchingClusterTimeKeys, - kEnteringDataSyncState, - kWaitingForRecipientConsistency, - kEnteringBlockingState, - kWaitingForRecipientBlockTs, - kEnteringCommittedState, - kWaitingForDonorForgetMigration, - kWaitingForRecipientForgetMigration, - kMarkingMigrationGarbageCollectable, - kForgotten - }; - - static std::string describeStage(DonorStage ds) { - switch (ds) { - case DonorStage::kUnstarted: - return "Migration not yet started."; - case DonorStage::kEnteringAbortingIndexBuildsState: - return "Updating its state document to enter 'aborting index builds' state."; - case DonorStage::kAbortingIndexBuilds: - return "Aborting index builds."; - case DonorStage::kFetchingClusterTimeKeys: - return "Fetching cluster time key documents from recipient."; - case DonorStage::kEnteringDataSyncState: - return "Updating its state document to enter 'data sync' state."; - case DonorStage::kWaitingForRecipientConsistency: - return "Waiting for recipient to finish data sync and become consistent."; - case DonorStage::kEnteringBlockingState: - return "Updating its state doc to enter 'blocking' state."; - case DonorStage::kWaitingForRecipientBlockTs: - return "Waiting for receipient to reach the block timestamp."; - case DonorStage::kEnteringCommittedState: - return "Updating its state document to enter 'committed' state."; - case DonorStage::kWaitingForDonorForgetMigration: - return "Waiting to receive 'donorForgetMigration' command."; - case DonorStage::kWaitingForRecipientForgetMigration: - return "Waiting for recipient to forget migration."; - case DonorStage::kMarkingMigrationGarbageCollectable: - return "Marking migration as garbage-collectable."; - case DonorStage::kForgotten: - return "Migration has been forgotten."; - } - MONGO_UNREACHABLE; - } - - void _updateDonorStage(DonorStage ds) { - _donorStage = ds; - } - /* * Initializes _abortMigrationSource and returns a token from it. The source will be * immediately canceled if an abort has already been requested. @@ -378,9 +321,6 @@ public: // interrupting the instance, e.g. receiving donorAbortMigration. Initialized in // _initAbortMigrationSource(). boost::optional<CancellationSource> _abortMigrationSource; - - // A diagnostics-only field used to describe the donor's progress in currentOp. - DonorStage _donorStage = DonorStage::kUnstarted; }; private: |