diff options
author | A. Jesse Jiryu Davis <jesse@mongodb.com> | 2021-11-18 20:04:35 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-11-19 00:04:35 +0000 |
commit | adfc573b37bf74e7e22c9ca465e9483c0c900b0b (patch) | |
tree | bc2265ea83476512b3ca72afa12859289a17a767 | |
parent | 7b6775a0ad49992a3054b8bcfcda29914803fc5b (diff) | |
download | mongo-adfc573b37bf74e7e22c9ca465e9483c0c900b0b.tar.gz |
SERVER-61565 Data race in TenantMigrationRecipientService
(cherry picked from commit b325971796111acf38033c8c989d794b4b80359c)
-rw-r--r-- | src/mongo/db/repl/tenant_migration_recipient_service.cpp | 41 |
1 files changed, 27 insertions, 14 deletions
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp index 6978555c310..22822975522 100644 --- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp +++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp @@ -508,15 +508,15 @@ TenantMigrationRecipientService::Instance::waitUntilMigrationReachesReturnAfterR // Note: tickClusterTimeTo() will not tick the recipient clock backwards in time. VectorClockMutable::get(opCtx)->tickClusterTimeTo(LogicalTime(returnAfterReachingTimestamp)); - { - stdx::lock_guard lk(_mutex); - _stateDoc.setRejectReadsBeforeTimestamp(selectRejectReadsBeforeTimestamp( - opCtx, returnAfterReachingTimestamp, donorRecipientOpTimePair.recipientOpTime)); - } + stdx::unique_lock lk(_mutex); + _stateDoc.setRejectReadsBeforeTimestamp(selectRejectReadsBeforeTimestamp( + opCtx, returnAfterReachingTimestamp, donorRecipientOpTimePair.recipientOpTime)); + const auto stateDoc = _stateDoc; + lk.unlock(); _stopOrHangOnFailPoint(&fpBeforePersistingRejectReadsBeforeTimestamp, opCtx); auto lastOpBeforeUpdate = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); - uassertStatusOK(tenantMigrationRecipientEntryHelpers::updateStateDoc(opCtx, _stateDoc)); + uassertStatusOK(tenantMigrationRecipientEntryHelpers::updateStateDoc(opCtx, stateDoc)); auto lastOpAfterUpdate = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(); auto replCoord = repl::ReplicationCoordinator::get(_serviceContext); if (lastOpBeforeUpdate == lastOpAfterUpdate) { @@ -1127,7 +1127,11 @@ void TenantMigrationRecipientService::Instance::_createOplogBuffer() { _donorOplogBuffer = std::move(bufferCollection); } - invariant(_stateDoc.getStartFetchingDonorOpTime()); + { + stdx::lock_guard lk(_mutex); + invariant(_stateDoc.getStartFetchingDonorOpTime()); + } + { // Ensure we are primary when trying to startup and create the oplog buffer collection. auto coordinator = repl::ReplicationCoordinator::get(opCtx.get()); @@ -1506,7 +1510,10 @@ bool TenantMigrationRecipientService::Instance::_isCloneCompletedMarkerSet(WithL OpTime TenantMigrationRecipientService::Instance::_getOplogResumeApplyingDonorOptime( const OpTime startApplyingDonorOpTime, const OpTime cloneFinishedRecipientOpTime) const { - invariant(_stateDoc.getCloneFinishedRecipientOpTime().has_value()); + { + stdx::lock_guard lk(_mutex); + invariant(_stateDoc.getCloneFinishedRecipientOpTime().has_value()); + } auto opCtx = cc().makeOperationContext(); OplogInterfaceLocal oplog(opCtx.get()); auto oplogIter = oplog.makeIterator(); @@ -2052,16 +2059,21 @@ SemiFuture<void> TenantMigrationRecipientService::Instance::run( stateDoc = _stateDoc; } uassertStatusOK(tenantMigrationRecipientEntryHelpers::updateStateDoc( - opCtx.get(), _stateDoc)); + opCtx.get(), stateDoc)); } else { // Avoid fulfilling the promise twice on restart of the future chain. _stateDocPersistedPromise.emplaceValue(); } - uassert(ErrorCodes::TenantMigrationForgotten, - str::stream() << "Migration " << getMigrationUUID() - << " already marked for garbage collect", - _stateDoc.getState() != TenantMigrationRecipientStateEnum::kDone && - !_stateDoc.getExpireAt()); + + { + stdx::lock_guard<Latch> lg(_mutex); + uassert(ErrorCodes::TenantMigrationForgotten, + str::stream() << "Migration " << getMigrationUUID() + << " already marked for garbage collect", + _stateDoc.getState() != + TenantMigrationRecipientStateEnum::kDone && + !_stateDoc.getExpireAt()); + } // Must abort if flagged for cancellation above. uassert(ErrorCodes::TenantMigrationAborted, @@ -2443,6 +2455,7 @@ void TenantMigrationRecipientService::Instance::_setMigrationStatsOnCompletion( bool success = false; if (completionStatus.code() == ErrorCodes::TenantMigrationForgotten) { + stdx::lock_guard lk(_mutex); if (_stateDoc.getExpireAt()) { // Avoid double counting tenant migration statistics after failover. return; |