diff options
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 34 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.h | 3 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_statistics.h | 5 |
4 files changed, 39 insertions, 5 deletions
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index d6d44e380d2..875e99ce5b9 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1084,6 +1084,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, invariant(!_max.isEmpty()); boost::optional<MoveTimingHelper> timing; + boost::optional<Timer> timeInCriticalSection; if (!skipToCritSecTaken) { timing.emplace( @@ -1568,6 +1569,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, opCtx, _nss, critSecReason, ShardingCatalogClient::kLocalWriteConcern); LOGV2(5899114, "Entered migration recipient critical section", "nss"_attr = _nss); + timeInCriticalSection.emplace(); }); if (getState() == FAIL) { @@ -1581,6 +1583,17 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, critSecReason, ShardingCatalogClient::kMajorityWriteConcern); + invariant(timeInCriticalSection); + const auto timeInCriticalSectionMs = timeInCriticalSection->millis(); + ShardingStatistics::get(opCtx) + .totalRecipientCriticalSectionTimeMillis.addAndFetch( + timeInCriticalSectionMs); + + LOGV2(5899115, + "Exited migration recipient critical section", + "nss"_attr = _nss, + "durationMillis"_attr = timeInCriticalSectionMs); + // Delete the recovery document migrationutil::deleteMigrationRecipientRecoveryDocument(opCtx, *_migrationId); }); @@ -1606,9 +1619,15 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx, cc().makeOperationContext(), outerOpCtx->getCancellationToken(), executor); auto opCtx = newOpCtxPtr.get(); + if (skipToCritSecTaken) { + timeInCriticalSection.emplace(); + } + invariant(timeInCriticalSection); + // Wait until signaled to exit the critical section and then release it. - runWithoutSession(outerOpCtx, - [&] { awaitCriticalSectionReleaseSignalAndCompleteMigration(opCtx); }); + runWithoutSession(outerOpCtx, [&] { + awaitCriticalSectionReleaseSignalAndCompleteMigration(opCtx, *timeInCriticalSection); + }); } _setState(DONE); @@ -1754,7 +1773,7 @@ bool MigrationDestinationManager::_flushPendingWrites(OperationContext* opCtx, } void MigrationDestinationManager::awaitCriticalSectionReleaseSignalAndCompleteMigration( - OperationContext* opCtx) { + OperationContext* opCtx, const Timer& timeInCriticalSection) { // Wait until the migrate thread is signaled to release the critical section LOGV2_DEBUG(5899111, 3, "Waiting for release critical section signal"); { @@ -1791,7 +1810,14 @@ void MigrationDestinationManager::awaitCriticalSectionReleaseSignalAndCompleteMi RecoverableCriticalSectionService::get(opCtx)->releaseRecoverableCriticalSection( opCtx, _nss, critSecReason, ShardingCatalogClient::kMajorityWriteConcern); - LOGV2(5899108, "Exited migration recipient critical section", "nss"_attr = _nss); + const auto timeInCriticalSectionMs = timeInCriticalSection.millis(); + ShardingStatistics::get(opCtx).totalRecipientCriticalSectionTimeMillis.addAndFetch( + timeInCriticalSectionMs); + + LOGV2(5899108, + "Exited migration recipient critical section", + "nss"_attr = _nss, + "durationMillis"_attr = timeInCriticalSectionMs); // Wait for the updates to the catalog cache to be written to disk before removing the // recovery document. This ensures that on case of stepdown, the new primary will know of a diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h index 37764e0e879..c9584279429 100644 --- a/src/mongo/db/s/migration_destination_manager.h +++ b/src/mongo/db/s/migration_destination_manager.h @@ -252,7 +252,8 @@ private: * Waits for _state to transition to EXIT_CRIT_SEC. Then, it performs a filtering metadata * refresh, releases the critical section and finally deletes the recovery document. */ - void awaitCriticalSectionReleaseSignalAndCompleteMigration(OperationContext* opCtx); + void awaitCriticalSectionReleaseSignalAndCompleteMigration(OperationContext* opCtx, + const Timer& timeInCriticalSection); // Mutex to guard all fields mutable Mutex _mutex = MONGO_MAKE_LATCH("MigrationDestinationManager::_mutex"); diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp index a2fac5ad48f..a32642776dd 100644 --- a/src/mongo/db/s/sharding_statistics.cpp +++ b/src/mongo/db/s/sharding_statistics.cpp @@ -58,6 +58,8 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const { builder->append("totalCriticalSectionCommitTimeMillis", totalCriticalSectionCommitTimeMillis.load()); builder->append("totalCriticalSectionTimeMillis", totalCriticalSectionTimeMillis.load()); + builder->append("totalRecipientCriticalSectionTimeMillis", + totalRecipientCriticalSectionTimeMillis.load()); builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load()); builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load()); builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load()); diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h index 79547043949..b4332452a8d 100644 --- a/src/mongo/db/s/sharding_statistics.h +++ b/src/mongo/db/s/sharding_statistics.h @@ -91,6 +91,11 @@ struct ShardingStatistics { // after timing out waiting to acquire a lock. AtomicWord<long long> countDonorMoveChunkLockTimeout{0}; + // Cumulative, always-increasing counter of how much time the migration recipient critical + // section took (this is the period of time when write operations on the collection on the + // recipient are blocked). + AtomicWord<long long> totalRecipientCriticalSectionTimeMillis{0}; + // Cumulative, always-increasing counter of the number of migrations aborted on this node // due to concurrent index operations. AtomicWord<long long> countDonorMoveChunkAbortConflictingIndexOperation{0}; |