diff options
author | Matt Walak <matt.walak@mongodb.com> | 2021-08-02 19:15:01 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-08-12 15:41:36 +0000 |
commit | 98a21127ae4957594276a35228a95d8d79d6ad25 (patch) | |
tree | b8bc46e2045a9d81ccb82f377bd571abac65bdea /src/mongo/db/s | |
parent | fce8274e6d47ed28d915e8e2db5149876dbcae25 (diff) | |
download | mongo-98a21127ae4957594276a35228a95d8d79d6ad25.tar.gz |
SERVER-57700 Measure latency/throughput of resharding::data_copy::fillBatchForInsert in ReshardingCollectionCloner::doOneBatch
Diffstat (limited to 'src/mongo/db/s')
5 files changed, 116 insertions, 49 deletions
diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp index 2af6caace49..7971ca21424 100644 --- a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp +++ b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp @@ -258,8 +258,12 @@ std::unique_ptr<Pipeline, PipelineDeleter> ReshardingCollectionCloner::_restartP bool ReshardingCollectionCloner::doOneBatch(OperationContext* opCtx, Pipeline& pipeline) { pipeline.reattachToOperationContext(opCtx); ON_BLOCK_EXIT([&pipeline] { pipeline.detachFromOperationContext(); }); + + Timer latencyTimer; auto batch = resharding::data_copy::fillBatchForInsert( pipeline, resharding::gReshardingCollectionClonerBatchSizeInBytes.load()); + _env->metrics()->onCollClonerFillBatchForInsert( + duration_cast<Milliseconds>(latencyTimer.elapsed())); if (batch.empty()) { return false; diff --git a/src/mongo/db/s/resharding/resharding_metrics.cpp b/src/mongo/db/s/resharding/resharding_metrics.cpp index 9cc77c64a1a..f95372e3ad2 100644 --- a/src/mongo/db/s/resharding/resharding_metrics.cpp +++ b/src/mongo/db/s/resharding/resharding_metrics.cpp @@ -69,7 +69,9 @@ constexpr auto kLastOpEndingChunkImbalance = "lastOpEndingChunkImbalance"; constexpr auto kOpCounters = "opcounters"; constexpr auto kMinRemainingOperationTime = "minShardRemainingOperationTimeEstimatedMillis"; constexpr auto kMaxRemainingOperationTime = "maxShardRemainingOperationTimeEstimatedMillis"; -constexpr auto kOplogBatchApplyLatencyMillis = "oplogBatchApplyLatencyMillis"; +constexpr auto kOplogApplierApplyBatchLatencyMillis = "oplogApplierApplyBatchLatencyMillis"; +constexpr auto kCollClonerFillBatchForInsertLatencyMillis = + "collClonerFillBatchForInsertLatencyMillis"; using MetricsPtr = std::unique_ptr<ReshardingMetrics>; @@ -93,6 +95,13 @@ Milliseconds remainingTime(Milliseconds elapsedTime, double elapsedWork, double return Milliseconds(Milliseconds::rep(remainingMsec)); } +void appendHistogram(BSONObjBuilder* bob, + const IntegerHistogram<kLatencyHistogramBucketsCount>& hist) { + BSONObjBuilder histogramBuilder; + hist.append(histogramBuilder, false); + bob->appendElements(histogramBuilder.obj()); +} + static StringData serializeState(boost::optional<RecipientStateEnum> e) { return RecipientState_serializer(*e); } @@ -220,8 +229,11 @@ public: int64_t chunkImbalanceCount = 0; - IntegerHistogram<kLatencyHistogramBucketsCount> oplogBatchApplyLatencyMillis = - IntegerHistogram<kLatencyHistogramBucketsCount>(kOplogBatchApplyLatencyMillis, + IntegerHistogram<kLatencyHistogramBucketsCount> oplogApplierApplyBatchLatencyMillis = + IntegerHistogram<kLatencyHistogramBucketsCount>(kOplogApplierApplyBatchLatencyMillis, + latencyHistogramBuckets); + IntegerHistogram<kLatencyHistogramBucketsCount> collClonerFillBatchForInsertLatencyMillis = + IntegerHistogram<kLatencyHistogramBucketsCount>(kCollClonerFillBatchForInsertLatencyMillis, latencyHistogramBuckets); // The ops done by resharding to keep up with the client writes. @@ -306,13 +318,8 @@ void ReshardingMetrics::OperationMetrics::appendCurrentOpMetrics(BSONObjBuilder* serializeState(recipientState.get_value_or(RecipientStateEnum::kUnused))); bob->append(kOpStatus, ReshardingOperationStatus_serializer(opStatus)); - { - BSONObjBuilder histogramBuilder; - oplogBatchApplyLatencyMillis.append(histogramBuilder, false); - BSONObj histogram = histogramBuilder.obj(); - bob->appendElements(histogram); - } - + appendHistogram(bob, oplogApplierApplyBatchLatencyMillis); + appendHistogram(bob, collClonerFillBatchForInsertLatencyMillis); break; case Role::kCoordinator: bob->append(kCoordinatorState, serializeState(coordinatorState)); @@ -573,14 +580,27 @@ void ReshardingMetrics::gotInserts(int n) noexcept { _cumulativeOp->gotInserts(n); } -void ReshardingMetrics::onApplyOplogBatch(Milliseconds latency) { +void ReshardingMetrics::onOplogApplierApplyBatch(Milliseconds latency) { stdx::lock_guard<Latch> lk(_mutex); invariant(_currentOp, kNoOperationInProgress); invariant(checkState(*_currentOp->recipientState, {RecipientStateEnum::kApplying, RecipientStateEnum::kError})); - _currentOp->oplogBatchApplyLatencyMillis.increment(durationCount<Milliseconds>(latency)); - _cumulativeOp->oplogBatchApplyLatencyMillis.increment(durationCount<Milliseconds>(latency)); + _currentOp->oplogApplierApplyBatchLatencyMillis.increment(durationCount<Milliseconds>(latency)); + _cumulativeOp->oplogApplierApplyBatchLatencyMillis.increment( + durationCount<Milliseconds>(latency)); +} + +void ReshardingMetrics::onCollClonerFillBatchForInsert(Milliseconds latency) { + stdx::lock_guard<Latch> lk(_mutex); + invariant(_currentOp, kNoOperationInProgress); + invariant(checkState(*_currentOp->recipientState, + {RecipientStateEnum::kCloning, RecipientStateEnum::kError})); + + _currentOp->collClonerFillBatchForInsertLatencyMillis.increment( + durationCount<Milliseconds>(latency)); + _cumulativeOp->collClonerFillBatchForInsertLatencyMillis.increment( + durationCount<Milliseconds>(latency)); } void ReshardingMetrics::gotInsert() noexcept { @@ -761,12 +781,8 @@ void ReshardingMetrics::serializeCumulativeOpMetrics(BSONObjBuilder* bob) const bob->append(kMaxRemainingOperationTime, getRemainingOperationTime(ops.maxRemainingOperationTime)); - { - BSONObjBuilder histogramBuilder; - ops.oplogBatchApplyLatencyMillis.append(histogramBuilder, false); - BSONObj histogram = histogramBuilder.obj(); - bob->appendElements(histogram); - } + appendHistogram(bob, ops.oplogApplierApplyBatchLatencyMillis); + appendHistogram(bob, ops.collClonerFillBatchForInsertLatencyMillis); } Date_t ReshardingMetrics::_now() const { diff --git a/src/mongo/db/s/resharding/resharding_metrics.h b/src/mongo/db/s/resharding/resharding_metrics.h index b63ef01bb9b..24ab1a9a38a 100644 --- a/src/mongo/db/s/resharding/resharding_metrics.h +++ b/src/mongo/db/s/resharding/resharding_metrics.h @@ -106,8 +106,12 @@ public: void enterCriticalSection(Date_t start); void leaveCriticalSection(Date_t end); - // Records latency and throughput of batch oplog applies during resharding. - void onApplyOplogBatch(Milliseconds latency); + // Records latency and throughput of calls to ReshardingOplogApplier::_applyBatch + void onOplogApplierApplyBatch(Milliseconds latency); + + // Records latency and throughput of calls to resharding::data_copy::fillBatchForInsert + // in ReshardingCollectionCloner::doOneBatch + void onCollClonerFillBatchForInsert(Milliseconds latency); // Allows updating "oplog entries to apply" metrics when the recipient is in applying state. void onOplogEntriesFetched(int64_t entries) noexcept; diff --git a/src/mongo/db/s/resharding/resharding_metrics_test.cpp b/src/mongo/db/s/resharding/resharding_metrics_test.cpp index 77b8be78fb2..4b979936f51 100644 --- a/src/mongo/db/s/resharding/resharding_metrics_test.cpp +++ b/src/mongo/db/s/resharding/resharding_metrics_test.cpp @@ -49,7 +49,9 @@ namespace { using namespace fmt::literals; constexpr auto kOpTimeRemaining = "remainingOperationTimeEstimatedSecs"_sd; -constexpr auto kOplogBatchApplyLatencyMillis = "oplogBatchApplyLatencyMillis"; +constexpr auto kOplogApplierApplyBatchLatencyMillis = "oplogApplierApplyBatchLatencyMillis"; +constexpr auto kCollClonerFillBatchForInsertLatencyMillis = + "collClonerFillBatchForInsertLatencyMillis"; class ReshardingMetricsTest : public ServiceContextTest { public: @@ -129,15 +131,18 @@ public: << fmt::format("{}: {}", errMsg, report.toString()); }; - void appendExpectedHistogramResult(BSONObjBuilder& builder, + void appendExpectedHistogramResult(BSONObjBuilder* bob, + std::string tag, const std::vector<int64_t>& latencies) { - IntegerHistogram<kLatencyHistogramBucketsCount> hist(kOplogBatchApplyLatencyMillis, - latencyHistogramBuckets); + IntegerHistogram<kLatencyHistogramBucketsCount> hist(tag, latencyHistogramBuckets); for (size_t i = 0; i < latencies.size(); i++) { hist.increment(latencies[i]); } - hist.append(builder, false); + BSONObjBuilder histogramBuilder; + hist.append(histogramBuilder, false); + BSONObj histogram = histogramBuilder.obj(); + bob->appendElements(histogram); } private: @@ -589,21 +594,34 @@ TEST_F(ReshardingMetricsTest, CurrentOpReportForRecipient) { BSONObjBuilder expectedBuilder(std::move(expectedPrefix)); - // Append histogram data for batch oplog applies. - appendExpectedHistogramResult(expectedBuilder, {}); - BSONObj expected = expectedBuilder.obj(); + // Append histogram latency data. + appendExpectedHistogramResult(&expectedBuilder, kOplogApplierApplyBatchLatencyMillis, {}); + appendExpectedHistogramResult(&expectedBuilder, kCollClonerFillBatchForInsertLatencyMillis, {}); + + BSONObj expected = expectedBuilder.done(); const auto report = getMetrics()->reportForCurrentOp(options); ASSERT_BSONOBJ_EQ(expected, report); } -TEST_F(ReshardingMetricsTest, TestOplogBatchLatencyHistogramForRecipient) { - const std::vector<int64_t> latencies_1{3, 427, 0, 6004, 320, 10056, 12300, 105, 70}; - const std::vector<int64_t> latencies_2{800, 20, 5, 1025, 10567}; +TEST_F(ReshardingMetricsTest, TestHistogramMetricsForRecipient) { + const std::vector<int64_t> applyLatencies_1{3, 427, 0, 6004, 320, 10056, 12300, 105, 70}; + const std::vector<int64_t> applyLatencies_2{800, 20, 5, 1025, 10567}; + const std::vector<int64_t> insertLatencies_1{120, 7, 110, 50, 0, 16500, 77000, 667, 7980}; + const std::vector<int64_t> insertLatencies_2{12450, 2400, 760, 57, 2}; + + const auto combineLatencies = [](std::vector<int64_t>* allLatencies, + const std::vector<int64_t>& latencies_1, + const std::vector<int64_t>& latencies_2) { + allLatencies->insert(allLatencies->end(), latencies_1.begin(), latencies_1.end()); + allLatencies->insert(allLatencies->end(), latencies_2.begin(), latencies_2.end()); + }; + + std::vector<int64_t> allApplyLatencies; + combineLatencies(&allApplyLatencies, applyLatencies_1, applyLatencies_2); + std::vector<int64_t> allInsertLatencies; + combineLatencies(&allInsertLatencies, insertLatencies_1, insertLatencies_2); - std::vector<int64_t> allLatencies; - allLatencies.insert(allLatencies.end(), latencies_1.begin(), latencies_1.end()); - allLatencies.insert(allLatencies.end(), latencies_2.begin(), latencies_2.end()); const ReshardingMetrics::ReporterOptions options( ReshardingMetrics::Role::kRecipient, @@ -612,31 +630,54 @@ TEST_F(ReshardingMetricsTest, TestOplogBatchLatencyHistogramForRecipient) { BSON("id" << 1), false); - // First test that histogram metrics appear in currentOp. Next, test that histogram metrics - // accumulate in cumulativeOp. - const size_t kNumTests = 2; - std::vector<int64_t> testLatencies[kNumTests] = {latencies_1, latencies_2}; - std::vector<int64_t> expectedLatencies[kNumTests] = {latencies_1, allLatencies}; + // Test that all histogram metrics appear in both currentOp and cumulativeOp. + const size_t kNumTests = 4; + std::vector<int64_t> testLatencies[kNumTests] = { + applyLatencies_1, applyLatencies_2, insertLatencies_1, insertLatencies_2}; + std::vector<int64_t> expectedLatencies[kNumTests] = { + applyLatencies_1, allApplyLatencies, insertLatencies_1, allInsertLatencies}; OpReportType testReportTypes[kNumTests] = {OpReportType::CurrentOpReportRecipientRole, + OpReportType::CumulativeReport, + OpReportType::CurrentOpReportRecipientRole, OpReportType::CumulativeReport}; + std::string histogramTag[kNumTests] = {kOplogApplierApplyBatchLatencyMillis, + kOplogApplierApplyBatchLatencyMillis, + kCollClonerFillBatchForInsertLatencyMillis, + kCollClonerFillBatchForInsertLatencyMillis}; auto testLatencyHistogram = [&](std::vector<int64_t> latencies, OpReportType reportType, - std::vector<int64_t> expectedLatencies) { + std::vector<int64_t> expectedLatencies, + std::string histogramTag) { + LOGV2(57700, + "TestHistogramMetricsForRecipient test case", + "reportType"_attr = reportType, + "histogramTag"_attr = histogramTag); + startOperation(ReshardingMetrics::Role::kRecipient); - getMetrics()->setRecipientState(RecipientStateEnum::kApplying); + RecipientStateEnum state = (histogramTag.compare(kOplogApplierApplyBatchLatencyMillis) == 0 + ? RecipientStateEnum::kApplying + : RecipientStateEnum::kCloning); + getMetrics()->setRecipientState(state); + for (size_t i = 0; i < latencies.size(); i++) { - getMetrics()->onApplyOplogBatch(Milliseconds(latencies[i])); + if (histogramTag.compare(kOplogApplierApplyBatchLatencyMillis) == 0) { + getMetrics()->onOplogApplierApplyBatch(Milliseconds(latencies[i])); + } else if (histogramTag.compare(kCollClonerFillBatchForInsertLatencyMillis) == 0) { + getMetrics()->onCollClonerFillBatchForInsert(Milliseconds(latencies[i])); + } else { + MONGO_UNREACHABLE; + } } const auto report = getReport(reportType); - const auto buckets = report[kOplogBatchApplyLatencyMillis]; + const auto buckets = report[histogramTag]; BSONObjBuilder expectedBuilder; - appendExpectedHistogramResult(expectedBuilder, expectedLatencies); - const auto expectedHist = expectedBuilder.obj(); - const auto expectedBuckets = expectedHist[kOplogBatchApplyLatencyMillis]; + appendExpectedHistogramResult(&expectedBuilder, histogramTag, expectedLatencies); + const auto expectedHist = expectedBuilder.done(); + const auto expectedBuckets = expectedHist[histogramTag]; ASSERT_EQ(buckets.woCompare(expectedBuckets), 0); @@ -645,7 +686,8 @@ TEST_F(ReshardingMetricsTest, TestOplogBatchLatencyHistogramForRecipient) { }; for (size_t i = 0; i < kNumTests; i++) { - testLatencyHistogram(testLatencies[i], testReportTypes[i], expectedLatencies[i]); + testLatencyHistogram( + testLatencies[i], testReportTypes[i], expectedLatencies[i], histogramTag[i]); } } diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp index 06ce65384c8..ad2f2e399ad 100644 --- a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp +++ b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp @@ -116,7 +116,8 @@ SemiFuture<void> ReshardingOplogApplier::_applyBatch( return status; }) .onCompletion([this, latencyTimer](Status status) { - _env->metrics()->onApplyOplogBatch(duration_cast<Milliseconds>(latencyTimer.elapsed())); + _env->metrics()->onOplogApplierApplyBatch( + duration_cast<Milliseconds>(latencyTimer.elapsed())); return status; }) .semi(); |