diff options
author | Andrew Witten <andrew.witten@mongodb.com> | 2022-08-24 15:11:12 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-09-14 21:26:12 +0000 |
commit | 1db7b70c4ff5fd88fd4922350c65b09805e19710 (patch) | |
tree | 6859a9d4aabd134162c6d401136f6380a2767239 /src/mongo/db | |
parent | fadf5a97702162cda1642a5d6a4d47f0ece43994 (diff) | |
download | mongo-1db7b70c4ff5fd88fd4922350c65b09805e19710.tar.gz |
SERVER-67653 don't initiate critical section if remainingMillis is omitted
(cherry picked from commit abd6330d793235c8fbc5de7bf3ec53855ebea9d3)
SERVER-69693 Use lambda instead of repeating code in resharding coordinator
Diffstat (limited to 'src/mongo/db')
-rw-r--r-- | src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp | 34 | ||||
-rw-r--r-- | src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp | 41 |
2 files changed, 66 insertions, 9 deletions
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp index 374dcd6538f..5700f0326ae 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp @@ -164,13 +164,21 @@ CoordinatorCommitMonitor::queryRemainingOperationTimeForRecipients() const { uassertStatusOKWithContext(status, errorContext); const auto remainingTime = extractOperationRemainingTime(shardResponse.data); - // A recipient shard does not report the remaining operation time when there is no data - // to copy and no oplog entry to apply. - if (remainingTime && remainingTime.get() < minRemainingTime) { - minRemainingTime = remainingTime.get(); + + // If any recipient omits the "remainingMillis" field of the response then + // we cannot conclude that it is safe to begin the critical section. + // It is possible that the recipient just had a failover and + // was not able to restore its metrics before it replied to the + // _shardsvrReshardingOperationTime command. + if (!remainingTime) { + maxRemainingTime = Milliseconds::max(); + continue; + } + if (remainingTime.value() < minRemainingTime) { + minRemainingTime = remainingTime.value(); } - if (remainingTime && remainingTime.get() > maxRemainingTime) { - maxRemainingTime = remainingTime.get(); + if (remainingTime.value() > maxRemainingTime) { + maxRemainingTime = remainingTime.value(); } } @@ -203,12 +211,20 @@ ExecutorFuture<void> CoordinatorCommitMonitor::_makeFuture() const { "Encountered an error while querying recipients, will retry shortly", "error"_attr = status); - return RemainingOperationTimes{Milliseconds(0), Milliseconds::max()}; + // On error we definitely cannot begin the critical section. Therefore, + // return Milliseconds::max for remainingTimes.max (remainingTimes.max is used + // for determining whether the critical section should begin). + return RemainingOperationTimes{Milliseconds(-1), Milliseconds::max()}; }) .then([this, anchor = shared_from_this()](RemainingOperationTimes remainingTimes) { auto metrics = ReshardingMetrics::get(cc().getServiceContext()); - metrics->setMinRemainingOperationTime(remainingTimes.min); - metrics->setMaxRemainingOperationTime(remainingTimes.max); + // If remainingTimes.max (or remainingTimes.min) is Milliseconds::max, then use -1 so + // that the scale of the y-axis is still useful when looking at FTDC metrics. + auto clampIfMax = [](Milliseconds t) { + return t != Milliseconds::max() ? t : Milliseconds(-1); + }; + metrics->setMinRemainingOperationTime(clampIfMax(remainingTimes.min)); + metrics->setMaxRemainingOperationTime(clampIfMax(remainingTimes.max)); // Check if all recipient shards are within the commit threshold. if (remainingTimes.max <= _threshold) diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp index e7f90cc41fa..2fe3075f1fc 100644 --- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp +++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp @@ -92,6 +92,8 @@ protected: void tearDown() override; void mockCommandForRecipients(Milliseconds remainingOperationTime); + void mockOmitRemainingMillisForRecipients(); + void mockOmitRemainingMillisForOneRecipient(); void mockRemaingOperationTimesCommandForRecipients( CoordinatorCommitMonitor::RemainingOperationTimes remainingOperationTimes); @@ -180,6 +182,31 @@ void CoordinatorCommitMonitorTest::mockCommandForRecipients(Milliseconds remaini _recipientShards.begin(), _recipientShards.end(), [&](const ShardId&) { onCommand(func); }); } +void CoordinatorCommitMonitorTest::mockOmitRemainingMillisForRecipients() { + // Omit remainingMillis from all shard responses. + std::for_each(_recipientShards.begin(), _recipientShards.end(), [this](const ShardId&) { + onCommand([](const executor::RemoteCommandRequest& request) -> StatusWith<BSONObj> { + // Return an empty BSON object. + return BSONObj(); + }); + }); +} + +void CoordinatorCommitMonitorTest::mockOmitRemainingMillisForOneRecipient() { + // Omit remainingMillis from a single recipient. + for (const auto& shard : _recipientShards) { + onCommand([&](const executor::RemoteCommandRequest&) -> StatusWith<BSONObj> { + if (shard == _recipientShards.front()) { + // Return an empty BSON object. + return BSONObj(); + } + auto threshold = Milliseconds(gRemainingReshardingOperationTimeThresholdMillis.load()); + return BSON("remainingMillis" + << durationCount<Milliseconds>(threshold - Milliseconds(1))); + }); + } +} + void CoordinatorCommitMonitorTest::mockRemaingOperationTimesCommandForRecipients( CoordinatorCommitMonitor::RemainingOperationTimes remainingOperationTimes) { bool useMin = true; @@ -264,6 +291,20 @@ TEST_F(CoordinatorCommitMonitorTest, RetriesWhenEncountersErrorsWhileQueryingRec future.get(); } +TEST_F(CoordinatorCommitMonitorTest, BlocksWhenRemainingMillisIsOmitted) { + auto future = getCommitMonitor()->waitUntilRecipientsAreWithinCommitThreshold(); + + mockOmitRemainingMillisForRecipients(); + ASSERT(!future.isReady()); + + // If even a single shard omits remainingMillis, we cannot begin the critical section. + mockOmitRemainingMillisForOneRecipient(); + ASSERT(!future.isReady()); + + respondWithReadyToCommit(); + future.get(); +} + } // namespace } // namespace resharding } // namespace mongo |