diff options
-rw-r--r-- | src/mongo/db/s/resharding/resharding_metrics.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/s/resharding/resharding_metrics_test.cpp | 78 |
2 files changed, 105 insertions, 19 deletions
diff --git a/src/mongo/db/s/resharding/resharding_metrics.cpp b/src/mongo/db/s/resharding/resharding_metrics.cpp index 5fabcb00e9e..9f8ada073ca 100644 --- a/src/mongo/db/s/resharding/resharding_metrics.cpp +++ b/src/mongo/db/s/resharding/resharding_metrics.cpp @@ -67,6 +67,20 @@ const auto getMetrics = ServiceContext::declareDecoration<MetricsPtr>(); const auto reshardingMetricsRegisterer = ServiceContext::ConstructorActionRegisterer{ "ReshardingMetrics", [](ServiceContext* ctx) { getMetrics(ctx) = std::make_unique<ReshardingMetrics>(ctx); }}; + +/** + * Given a constant rate of time per unit of work: + * totalTime / totalWork == elapsedTime / elapsedWork + * Solve for remaining time. + * remainingTime := totalTime - elapsedTime + * == (totalWork * (elapsedTime / elapsedWork)) - elapsedTime + * == elapsedTime * (totalWork / elapsedWork - 1) + */ +Milliseconds remainingTime(Milliseconds elapsedTime, double elapsedWork, double totalWork) { + elapsedWork = std::min(elapsedWork, totalWork); + double remainingMsec = 1.0 * elapsedTime.count() * (totalWork / elapsedWork - 1); + return Milliseconds(Milliseconds::rep(remainingMsec)); +} } // namespace ReshardingMetrics* ReshardingMetrics::get(ServiceContext* ctx) noexcept { @@ -242,28 +256,28 @@ void ReshardingMetrics::OperationMetrics::append(BSONObjBuilder* bob, Role role) return durationCount<Seconds>(interval.duration()); }; - auto estimateRemainingOperationTime = [&]() -> int64_t { - if (bytesCopied == 0 && oplogEntriesApplied == 0) - return -1; - else if (oplogEntriesApplied == 0) { - invariant(bytesCopied > 0); + auto remainingMsec = [&]() -> boost::optional<Milliseconds> { + if (oplogEntriesApplied > 0) { + // All fetched oplogEntries must be applied. Some of them already have been. + return remainingTime( + applyingOplogEntries.duration(), oplogEntriesApplied, oplogEntriesFetched); + } + if (bytesCopied > 0) { // Until the time to apply batches of oplog entries is measured, we assume that applying // all of them will take as long as copying did. - const auto elapsedCopyTime = getElapsedTime(copyingDocuments); - const auto approxTimeToCopy = - elapsedCopyTime * std::max((int64_t)0, bytesToCopy / bytesCopied - 1); - return elapsedCopyTime + 2 * approxTimeToCopy; - } else { - invariant(oplogEntriesApplied > 0); - const auto approxTimeToApply = getElapsedTime(applyingOplogEntries) * - std::max((int64_t)0, oplogEntriesFetched / oplogEntriesApplied - 1); - return approxTimeToApply; + return remainingTime(copyingDocuments.duration(), bytesCopied, 2 * bytesToCopy); } - }; + return {}; + }(); + const std::string kIntervalSuffix = role == Role::kAll ? "Millis" : ""; bob->append(kOpTimeElapsed + kIntervalSuffix, getElapsedTime(runningOperation)); - bob->append(kOpTimeRemaining + kIntervalSuffix, estimateRemainingOperationTime()); + + bob->append(kOpTimeRemaining + kIntervalSuffix, + !remainingMsec ? int64_t{-1} /** -1 is a specified integer null value */ + : role == Role::kAll ? durationCount<Milliseconds>(*remainingMsec) + : durationCount<Seconds>(*remainingMsec)); if (role == Role::kAll || role == Role::kRecipient) { bob->append(kDocumentsToCopy, documentsToCopy); diff --git a/src/mongo/db/s/resharding/resharding_metrics_test.cpp b/src/mongo/db/s/resharding/resharding_metrics_test.cpp index 798e7c2a103..b4e65ea0f31 100644 --- a/src/mongo/db/s/resharding/resharding_metrics_test.cpp +++ b/src/mongo/db/s/resharding/resharding_metrics_test.cpp @@ -27,22 +27,30 @@ * it in the license file. */ +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest + #include <fmt/format.h> #include "mongo/bson/bsonobjbuilder.h" #include "mongo/bson/json.h" #include "mongo/db/s/resharding/resharding_metrics.h" #include "mongo/db/service_context_test_fixture.h" +#include "mongo/logv2/log.h" #include "mongo/unittest/death_test.h" #include "mongo/unittest/unittest.h" #include "mongo/util/clock_source_mock.h" #include "mongo/util/uuid.h" namespace mongo { +namespace { + +using namespace fmt::literals; + +constexpr auto kOpTimeRemaining = "remainingOperationTimeEstimatedMillis"_sd; class ReshardingMetricsTest : public ServiceContextTest { public: - void setUp() { + void setUp() override { auto clockSource = std::make_unique<ClockSourceMock>(); _clockSource = clockSource.get(); getGlobalServiceContext()->setFastClockSource(std::move(clockSource)); @@ -55,8 +63,8 @@ public: // Timer step in milliseconds static constexpr auto kTimerStep = 100; - void advanceTime(Milliseconds interval = Milliseconds(kTimerStep)) { - _clockSource->advance(interval); + void advanceTime(Milliseconds step = Milliseconds{kTimerStep}) { + _clockSource->advance(step); } auto getReport() { @@ -371,4 +379,68 @@ TEST_F(ReshardingMetricsTest, CurrentOpReportForCoordinator) { ASSERT_BSONOBJ_EQ(expected, report); } +TEST_F(ReshardingMetricsTest, EstimatedRemainingOperationTimeCloning) { + // Copy N docs @ timePerDoc. Check the progression of the estimated time remaining. + auto m = getMetrics(); + m->onStart(); + m->setRecipientState(RecipientStateEnum::kCloning); + auto timePerDocument = Milliseconds{123}; + int64_t bytesPerDocument = 1024; + int64_t documentsToCopy = 409; + int64_t bytesToCopy = bytesPerDocument * documentsToCopy; + m->setDocumentsToCopy(documentsToCopy, bytesToCopy); + auto remainingTime = 2 * timePerDocument * documentsToCopy; + double maxAbsRelErr = 0; + for (int64_t copied = 0; copied < documentsToCopy; ++copied) { + double output = getReport()[kOpTimeRemaining].Number(); + if (copied == 0) { + ASSERT_EQ(output, -1); + } else { + ASSERT_GTE(output, 0); + auto expected = durationCount<Milliseconds>(remainingTime); + // Check that error is pretty small (it should get better as the operation progresses) + double absRelErr = std::abs((output - expected) / expected); + ASSERT_LT(absRelErr, 0.05) + << "output={}, expected={}, copied={}"_format(output, expected, copied); + maxAbsRelErr = std::max(maxAbsRelErr, absRelErr); + } + m->onDocumentsCopied(1, bytesPerDocument); + advanceTime(timePerDocument); + remainingTime -= timePerDocument; + } + LOGV2_DEBUG( + 5422700, 3, "Max absolute relative error observed", "maxAbsRelErr"_attr = maxAbsRelErr); +} + +TEST_F(ReshardingMetricsTest, EstimatedRemainingOperationTimeApplying) { + // Perform N ops @ timePerOp. Check the progression of the estimated time remaining. + auto m = getMetrics(); + m->onStart(); + m->setRecipientState(RecipientStateEnum::kApplying); + auto timePerOp = Milliseconds{123}; + int64_t fetched = 10000; + m->onOplogEntriesFetched(fetched); + auto remainingTime = timePerOp * fetched; + double maxAbsRelErr = 0; + for (int64_t applied = 0; applied < fetched; ++applied) { + double output = getReport()[kOpTimeRemaining].Number(); + if (applied == 0) { + ASSERT_EQ(output, -1); + } else { + auto expected = durationCount<Milliseconds>(remainingTime); + // Check that error is pretty small (it should get better as the operation progresses) + double absRelErr = std::abs((output - expected) / expected); + ASSERT_LT(absRelErr, 0.05) + << "output={}, expected={}, applied={}"_format(output, expected, applied); + maxAbsRelErr = std::max(maxAbsRelErr, absRelErr); + } + advanceTime(timePerOp); + m->onOplogEntriesApplied(1); + remainingTime -= timePerOp; + } + LOGV2_DEBUG( + 5422701, 3, "Max absolute relative error observed", "maxAbsRelErr"_attr = maxAbsRelErr); +} + +} // namespace } // namespace mongo |