summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJordi Serra Torrens <jordi.serra-torrens@mongodb.com>2021-11-05 07:49:40 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-11-05 08:17:06 +0000
commitd3c7f66ab934f19a9f6f22febcb4b27c1e7eaa06 (patch)
tree1d32c992dce3f01bae1b7472b83219b97ae7c548 /src
parent1384eee3fa37888fed9a5ef7df74bf9db9f116b1 (diff)
downloadmongo-d3c7f66ab934f19a9f6f22febcb4b27c1e7eaa06.tar.gz
SERVER-60984 Report time in recipient critical section on serverStatus shardingStatistics
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp34
-rw-r--r--src/mongo/db/s/migration_destination_manager.h3
-rw-r--r--src/mongo/db/s/sharding_statistics.cpp2
-rw-r--r--src/mongo/db/s/sharding_statistics.h5
4 files changed, 39 insertions, 5 deletions
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index d6d44e380d2..875e99ce5b9 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -1084,6 +1084,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
invariant(!_max.isEmpty());
boost::optional<MoveTimingHelper> timing;
+ boost::optional<Timer> timeInCriticalSection;
if (!skipToCritSecTaken) {
timing.emplace(
@@ -1568,6 +1569,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
opCtx, _nss, critSecReason, ShardingCatalogClient::kLocalWriteConcern);
LOGV2(5899114, "Entered migration recipient critical section", "nss"_attr = _nss);
+ timeInCriticalSection.emplace();
});
if (getState() == FAIL) {
@@ -1581,6 +1583,17 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
critSecReason,
ShardingCatalogClient::kMajorityWriteConcern);
+ invariant(timeInCriticalSection);
+ const auto timeInCriticalSectionMs = timeInCriticalSection->millis();
+ ShardingStatistics::get(opCtx)
+ .totalRecipientCriticalSectionTimeMillis.addAndFetch(
+ timeInCriticalSectionMs);
+
+ LOGV2(5899115,
+ "Exited migration recipient critical section",
+ "nss"_attr = _nss,
+ "durationMillis"_attr = timeInCriticalSectionMs);
+
// Delete the recovery document
migrationutil::deleteMigrationRecipientRecoveryDocument(opCtx, *_migrationId);
});
@@ -1606,9 +1619,15 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx,
cc().makeOperationContext(), outerOpCtx->getCancellationToken(), executor);
auto opCtx = newOpCtxPtr.get();
+ if (skipToCritSecTaken) {
+ timeInCriticalSection.emplace();
+ }
+ invariant(timeInCriticalSection);
+
// Wait until signaled to exit the critical section and then release it.
- runWithoutSession(outerOpCtx,
- [&] { awaitCriticalSectionReleaseSignalAndCompleteMigration(opCtx); });
+ runWithoutSession(outerOpCtx, [&] {
+ awaitCriticalSectionReleaseSignalAndCompleteMigration(opCtx, *timeInCriticalSection);
+ });
}
_setState(DONE);
@@ -1754,7 +1773,7 @@ bool MigrationDestinationManager::_flushPendingWrites(OperationContext* opCtx,
}
void MigrationDestinationManager::awaitCriticalSectionReleaseSignalAndCompleteMigration(
- OperationContext* opCtx) {
+ OperationContext* opCtx, const Timer& timeInCriticalSection) {
// Wait until the migrate thread is signaled to release the critical section
LOGV2_DEBUG(5899111, 3, "Waiting for release critical section signal");
{
@@ -1791,7 +1810,14 @@ void MigrationDestinationManager::awaitCriticalSectionReleaseSignalAndCompleteMi
RecoverableCriticalSectionService::get(opCtx)->releaseRecoverableCriticalSection(
opCtx, _nss, critSecReason, ShardingCatalogClient::kMajorityWriteConcern);
- LOGV2(5899108, "Exited migration recipient critical section", "nss"_attr = _nss);
+ const auto timeInCriticalSectionMs = timeInCriticalSection.millis();
+ ShardingStatistics::get(opCtx).totalRecipientCriticalSectionTimeMillis.addAndFetch(
+ timeInCriticalSectionMs);
+
+ LOGV2(5899108,
+ "Exited migration recipient critical section",
+ "nss"_attr = _nss,
+ "durationMillis"_attr = timeInCriticalSectionMs);
// Wait for the updates to the catalog cache to be written to disk before removing the
// recovery document. This ensures that on case of stepdown, the new primary will know of a
diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h
index 37764e0e879..c9584279429 100644
--- a/src/mongo/db/s/migration_destination_manager.h
+++ b/src/mongo/db/s/migration_destination_manager.h
@@ -252,7 +252,8 @@ private:
* Waits for _state to transition to EXIT_CRIT_SEC. Then, it performs a filtering metadata
* refresh, releases the critical section and finally deletes the recovery document.
*/
- void awaitCriticalSectionReleaseSignalAndCompleteMigration(OperationContext* opCtx);
+ void awaitCriticalSectionReleaseSignalAndCompleteMigration(OperationContext* opCtx,
+ const Timer& timeInCriticalSection);
// Mutex to guard all fields
mutable Mutex _mutex = MONGO_MAKE_LATCH("MigrationDestinationManager::_mutex");
diff --git a/src/mongo/db/s/sharding_statistics.cpp b/src/mongo/db/s/sharding_statistics.cpp
index a2fac5ad48f..a32642776dd 100644
--- a/src/mongo/db/s/sharding_statistics.cpp
+++ b/src/mongo/db/s/sharding_statistics.cpp
@@ -58,6 +58,8 @@ void ShardingStatistics::report(BSONObjBuilder* builder) const {
builder->append("totalCriticalSectionCommitTimeMillis",
totalCriticalSectionCommitTimeMillis.load());
builder->append("totalCriticalSectionTimeMillis", totalCriticalSectionTimeMillis.load());
+ builder->append("totalRecipientCriticalSectionTimeMillis",
+ totalRecipientCriticalSectionTimeMillis.load());
builder->append("countDocsClonedOnRecipient", countDocsClonedOnRecipient.load());
builder->append("countDocsClonedOnDonor", countDocsClonedOnDonor.load());
builder->append("countRecipientMoveChunkStarted", countRecipientMoveChunkStarted.load());
diff --git a/src/mongo/db/s/sharding_statistics.h b/src/mongo/db/s/sharding_statistics.h
index 79547043949..b4332452a8d 100644
--- a/src/mongo/db/s/sharding_statistics.h
+++ b/src/mongo/db/s/sharding_statistics.h
@@ -91,6 +91,11 @@ struct ShardingStatistics {
// after timing out waiting to acquire a lock.
AtomicWord<long long> countDonorMoveChunkLockTimeout{0};
+ // Cumulative, always-increasing counter of how much time the migration recipient critical
+ // section took (this is the period of time when write operations on the collection on the
+ // recipient are blocked).
+ AtomicWord<long long> totalRecipientCriticalSectionTimeMillis{0};
+
// Cumulative, always-increasing counter of the number of migrations aborted on this node
// due to concurrent index operations.
AtomicWord<long long> countDonorMoveChunkAbortConflictingIndexOperation{0};