summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Witten <andrew.witten@mongodb.com>2022-08-24 15:11:12 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-09-20 19:57:17 +0000
commitd5004b929a08ee46f9803b62a8f1239e3cc3f9bd (patch)
tree20f2361cd7f094f540bfce67b0120587cc96cd5e
parent832151ec174ca8ece535903664777d73d1ae0d37 (diff)
downloadmongo-d5004b929a08ee46f9803b62a8f1239e3cc3f9bd.tar.gz
SERVER-67653 don't initiate critical section if remainingMillis is omitted
(cherry picked from commit abd6330d793235c8fbc5de7bf3ec53855ebea9d3) SERVER-69693 use lambda instead of repeating code (cherry picked from commit dd9d37fa07a7af46ac4ee8dabeb235dc9571afb0)
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp30
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp41
2 files changed, 64 insertions, 7 deletions
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
index 4046e7d8448..963ec6a7c54 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
@@ -167,12 +167,20 @@ CoordinatorCommitMonitor::queryRemainingOperationTimeForRecipients() const {
uassertStatusOKWithContext(status, errorContext);
const auto remainingTime = extractOperationRemainingTime(shardResponse.data);
- // A recipient shard does not report the remaining operation time when there is no data
- // to copy and no oplog entry to apply.
- if (remainingTime && remainingTime.value() < minRemainingTime) {
+
+ // If any recipient omits the "remainingMillis" field of the response then
+ // we cannot conclude that it is safe to begin the critical section.
+ // It is possible that the recipient just had a failover and
+ // was not able to restore its metrics before it replied to the
+ // _shardsvrReshardingOperationTime command.
+ if (!remainingTime) {
+ maxRemainingTime = Milliseconds::max();
+ continue;
+ }
+ if (remainingTime.value() < minRemainingTime) {
minRemainingTime = remainingTime.value();
}
- if (remainingTime && remainingTime.value() > maxRemainingTime) {
+ if (remainingTime.value() > maxRemainingTime) {
maxRemainingTime = remainingTime.value();
}
}
@@ -206,11 +214,19 @@ ExecutorFuture<void> CoordinatorCommitMonitor::_makeFuture() const {
"Encountered an error while querying recipients, will retry shortly",
"error"_attr = status);
- return RemainingOperationTimes{Milliseconds(0), Milliseconds::max()};
+ // On error we definitely cannot begin the critical section. Therefore,
+ // return Milliseconds::max for remainingTimes.max (remainingTimes.max is used
+ // for determining whether the critical section should begin).
+ return RemainingOperationTimes{Milliseconds(-1), Milliseconds::max()};
})
.then([this, anchor = shared_from_this()](RemainingOperationTimes remainingTimes) {
- _metrics->setCoordinatorHighEstimateRemainingTimeMillis(remainingTimes.max);
- _metrics->setCoordinatorLowEstimateRemainingTimeMillis(remainingTimes.min);
+ // If remainingTimes.max (or remainingTimes.min) is Milliseconds::max, then use -1 so
+ // that the scale of the y-axis is still useful when looking at FTDC metrics.
+ auto clampIfMax = [](Milliseconds t) {
+ return t != Milliseconds::max() ? t : Milliseconds(-1);
+ };
+ _metrics->setCoordinatorHighEstimateRemainingTimeMillis(clampIfMax(remainingTimes.max));
+ _metrics->setCoordinatorLowEstimateRemainingTimeMillis(clampIfMax(remainingTimes.min));
// Check if all recipient shards are within the commit threshold.
if (remainingTimes.max <= _threshold)
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
index 11a54beed1e..b31766ddbbc 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
@@ -94,6 +94,8 @@ protected:
void tearDown() override;
void mockCommandForRecipients(Milliseconds remainingOperationTime);
+ void mockOmitRemainingMillisForRecipients();
+ void mockOmitRemainingMillisForOneRecipient();
void mockRemaingOperationTimesCommandForRecipients(
CoordinatorCommitMonitor::RemainingOperationTimes remainingOperationTimes);
@@ -195,6 +197,31 @@ void CoordinatorCommitMonitorTest::mockCommandForRecipients(Milliseconds remaini
_recipientShards.begin(), _recipientShards.end(), [&](const ShardId&) { onCommand(func); });
}
+void CoordinatorCommitMonitorTest::mockOmitRemainingMillisForRecipients() {
+ // Omit remainingMillis from all shard responses.
+ std::for_each(_recipientShards.begin(), _recipientShards.end(), [this](const ShardId&) {
+ onCommand([](const executor::RemoteCommandRequest& request) -> StatusWith<BSONObj> {
+ // Return an empty BSON object.
+ return BSONObj();
+ });
+ });
+}
+
+void CoordinatorCommitMonitorTest::mockOmitRemainingMillisForOneRecipient() {
+ // Omit remainingMillis from a single recipient.
+ for (const auto& shard : _recipientShards) {
+ onCommand([&](const executor::RemoteCommandRequest&) -> StatusWith<BSONObj> {
+ if (shard == _recipientShards.front()) {
+ // Return an empty BSON object.
+ return BSONObj();
+ }
+ auto threshold = Milliseconds(gRemainingReshardingOperationTimeThresholdMillis.load());
+ return BSON("remainingMillis"
+ << durationCount<Milliseconds>(threshold - Milliseconds(1)));
+ });
+ }
+}
+
void CoordinatorCommitMonitorTest::mockRemaingOperationTimesCommandForRecipients(
CoordinatorCommitMonitor::RemainingOperationTimes remainingOperationTimes) {
bool useMin = true;
@@ -279,6 +306,20 @@ TEST_F(CoordinatorCommitMonitorTest, RetriesWhenEncountersErrorsWhileQueryingRec
future.get();
}
+TEST_F(CoordinatorCommitMonitorTest, BlocksWhenRemainingMillisIsOmitted) {
+ auto future = getCommitMonitor()->waitUntilRecipientsAreWithinCommitThreshold();
+
+ mockOmitRemainingMillisForRecipients();
+ ASSERT(!future.isReady());
+
+ // If even a single shard omits remainingMillis, we cannot begin the critical section.
+ mockOmitRemainingMillisForOneRecipient();
+ ASSERT(!future.isReady());
+
+ respondWithReadyToCommit();
+ future.get();
+}
+
} // namespace
} // namespace resharding
} // namespace mongo