diff options
author | Cheahuychou Mao <mao.cheahuychou@gmail.com> | 2021-05-21 06:05:04 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-05-24 20:49:24 +0000 |
commit | d7b28dbc26d5690b92415b22145a8b92c2815aed (patch) | |
tree | c7e2d62b7f391ce200ea81478fd620e6aaee412e /src/mongo/db | |
parent | 74616fe794f7fb0a50d607299a7d02a6bd571138 (diff) | |
download | mongo-d7b28dbc26d5690b92415b22145a8b92c2815aed.tar.gz |
SERVER-57071 Ensure that TenantMigrationDonorAccessBlocker's promises are fulfilled before removing it from TenantMigrationDonorAccessBlockerRegistry
Diffstat (limited to 'src/mongo/db')
4 files changed, 47 insertions, 3 deletions
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_registry.h b/src/mongo/db/repl/tenant_migration_access_blocker_registry.h index 2d6c588a883..5d2d8ea5c77 100644 --- a/src/mongo/db/repl/tenant_migration_access_blocker_registry.h +++ b/src/mongo/db/repl/tenant_migration_access_blocker_registry.h @@ -71,6 +71,7 @@ public: void clearAccessBlocker(TenantMigrationAccessBlocker::BlockerType type) { if (type == TenantMigrationAccessBlocker::BlockerType::kDonor) { invariant(_donor); + checked_pointer_cast<TenantMigrationDonorAccessBlocker>(_donor)->interrupt(); _donor.reset(); } else { invariant(_recipient); diff --git a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp index 33ad03a196c..41f6be0c327 100644 --- a/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_access_blocker.cpp @@ -249,6 +249,19 @@ void TenantMigrationDonorAccessBlocker::rollBackStartBlocking() { _transitionOutOfBlockingPromise.setFrom(Status::OK()); } +void TenantMigrationDonorAccessBlocker::interrupt() { + stdx::unique_lock<Latch> lk(_mutex); + const Status status( + ErrorCodes::Interrupted, + "Blocked read or write interrupted while waiting for tenant migration to commit or abort"); + if (!_transitionOutOfBlockingPromise.getFuture().isReady()) { + _transitionOutOfBlockingPromise.setFrom(status); + } + if (!_completionPromise.getFuture().isReady()) { + _completionPromise.setError(status); + } +} + void TenantMigrationDonorAccessBlocker::setCommitOpTime(OperationContext* opCtx, repl::OpTime opTime) { { diff --git a/src/mongo/db/repl/tenant_migration_donor_access_blocker.h b/src/mongo/db/repl/tenant_migration_donor_access_blocker.h index 6c04bc2308a..b11a1a56b9a 100644 --- a/src/mongo/db/repl/tenant_migration_donor_access_blocker.h +++ b/src/mongo/db/repl/tenant_migration_donor_access_blocker.h @@ -230,6 +230,13 @@ public: void rollBackStartBlocking(); /** + * Called when this mtab is about to be removed from the TenantMigrationAccessBlockerRegistry. + * Resolves all unfulfilled promises with an Interrupted error to unblock any blocked reads or + * writes. + */ + void interrupt(); + + /** * Stores the commit opTime and calls _onMajorityCommitCommitOpTime if the opTime is already * majority-committed. */ @@ -241,6 +248,10 @@ public: */ void setAbortOpTime(OperationContext* opCtx, repl::OpTime opTime); + bool inStateAborted() const { + return _state.isAborted(); + } + private: /** * The access states of an mtab. diff --git a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp index 8c8fca5a31a..fa1ea18d569 100644 --- a/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp +++ b/src/mongo/db/repl/tenant_migration_donor_op_observer.cpp @@ -138,10 +138,29 @@ public: void commit(boost::optional<Timestamp>) override { if (_donorStateDoc.getExpireAt()) { - // The TenantMigrationDonorAccessBlocker entry needs to be removed to re-allow writes, - // reads and future migrations with the same tenantId as this migration has already - // been aborted and forgotten. + auto mtab = tenant_migration_access_blocker::getTenantMigrationDonorAccessBlocker( + _opCtx->getServiceContext(), _donorStateDoc.getTenantId()); + invariant(mtab); + + if (!_opCtx->writesAreReplicated()) { + // Setting expireAt implies that the TenantMigrationDonorAccessBlocker for this + // migration will be removed shortly after this. However, a lagged secondary + // might not manage to advance its majority commit point past the migration commit + // or abort opTime and consequently transition out of the blocking state before the + // TenantMigrationDonorAccessBlocker is removed. When this occurs, blocked reads or + // writes will be left waiting for the migration decision indefinitely. To avoid + // that, notify the TenantMigrationDonorAccessBlocker here that the commit or + // abort opTime has been majority committed (guaranteed to be true since by design + // the donor never marks its state doc as garbage collectable before the migration + // decision is majority committed). + mtab->onMajorityCommitPointUpdate(_donorStateDoc.getCommitOrAbortOpTime().get()); + } + if (_donorStateDoc.getState() == TenantMigrationDonorStateEnum::kAborted) { + invariant(mtab->inStateAborted()); + // The migration durably aborted and is now marked as garbage collectable, remove + // its TenantMigrationDonorAccessBlocker right away to allow back-to-back migration + // retries. TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext()) .remove(_donorStateDoc.getTenantId(), TenantMigrationAccessBlocker::BlockerType::kDonor); |