summaryrefslogtreecommitdiff
path: root/src/mongo/db/s
diff options
context:
space:
mode:
authorMatt Walak <matt.walak@mongodb.com>2021-08-02 19:15:01 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-08-12 15:41:36 +0000
commit98a21127ae4957594276a35228a95d8d79d6ad25 (patch)
treeb8bc46e2045a9d81ccb82f377bd571abac65bdea /src/mongo/db/s
parentfce8274e6d47ed28d915e8e2db5149876dbcae25 (diff)
downloadmongo-98a21127ae4957594276a35228a95d8d79d6ad25.tar.gz
SERVER-57700 Measure latency/throughput of resharding::data_copy::fillBatchForInsert in ReshardingCollectionCloner::doOneBatch
Diffstat (limited to 'src/mongo/db/s')
-rw-r--r--src/mongo/db/s/resharding/resharding_collection_cloner.cpp4
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics.cpp54
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics.h8
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics_test.cpp96
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_applier.cpp3
5 files changed, 116 insertions, 49 deletions
diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
index 2af6caace49..7971ca21424 100644
--- a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
+++ b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
@@ -258,8 +258,12 @@ std::unique_ptr<Pipeline, PipelineDeleter> ReshardingCollectionCloner::_restartP
bool ReshardingCollectionCloner::doOneBatch(OperationContext* opCtx, Pipeline& pipeline) {
pipeline.reattachToOperationContext(opCtx);
ON_BLOCK_EXIT([&pipeline] { pipeline.detachFromOperationContext(); });
+
+ Timer latencyTimer;
auto batch = resharding::data_copy::fillBatchForInsert(
pipeline, resharding::gReshardingCollectionClonerBatchSizeInBytes.load());
+ _env->metrics()->onCollClonerFillBatchForInsert(
+ duration_cast<Milliseconds>(latencyTimer.elapsed()));
if (batch.empty()) {
return false;
diff --git a/src/mongo/db/s/resharding/resharding_metrics.cpp b/src/mongo/db/s/resharding/resharding_metrics.cpp
index 9cc77c64a1a..f95372e3ad2 100644
--- a/src/mongo/db/s/resharding/resharding_metrics.cpp
+++ b/src/mongo/db/s/resharding/resharding_metrics.cpp
@@ -69,7 +69,9 @@ constexpr auto kLastOpEndingChunkImbalance = "lastOpEndingChunkImbalance";
constexpr auto kOpCounters = "opcounters";
constexpr auto kMinRemainingOperationTime = "minShardRemainingOperationTimeEstimatedMillis";
constexpr auto kMaxRemainingOperationTime = "maxShardRemainingOperationTimeEstimatedMillis";
-constexpr auto kOplogBatchApplyLatencyMillis = "oplogBatchApplyLatencyMillis";
+constexpr auto kOplogApplierApplyBatchLatencyMillis = "oplogApplierApplyBatchLatencyMillis";
+constexpr auto kCollClonerFillBatchForInsertLatencyMillis =
+ "collClonerFillBatchForInsertLatencyMillis";
using MetricsPtr = std::unique_ptr<ReshardingMetrics>;
@@ -93,6 +95,13 @@ Milliseconds remainingTime(Milliseconds elapsedTime, double elapsedWork, double
return Milliseconds(Milliseconds::rep(remainingMsec));
}
+void appendHistogram(BSONObjBuilder* bob,
+ const IntegerHistogram<kLatencyHistogramBucketsCount>& hist) {
+ BSONObjBuilder histogramBuilder;
+ hist.append(histogramBuilder, false);
+ bob->appendElements(histogramBuilder.obj());
+}
+
static StringData serializeState(boost::optional<RecipientStateEnum> e) {
return RecipientState_serializer(*e);
}
@@ -220,8 +229,11 @@ public:
int64_t chunkImbalanceCount = 0;
- IntegerHistogram<kLatencyHistogramBucketsCount> oplogBatchApplyLatencyMillis =
- IntegerHistogram<kLatencyHistogramBucketsCount>(kOplogBatchApplyLatencyMillis,
+ IntegerHistogram<kLatencyHistogramBucketsCount> oplogApplierApplyBatchLatencyMillis =
+ IntegerHistogram<kLatencyHistogramBucketsCount>(kOplogApplierApplyBatchLatencyMillis,
+ latencyHistogramBuckets);
+ IntegerHistogram<kLatencyHistogramBucketsCount> collClonerFillBatchForInsertLatencyMillis =
+ IntegerHistogram<kLatencyHistogramBucketsCount>(kCollClonerFillBatchForInsertLatencyMillis,
latencyHistogramBuckets);
// The ops done by resharding to keep up with the client writes.
@@ -306,13 +318,8 @@ void ReshardingMetrics::OperationMetrics::appendCurrentOpMetrics(BSONObjBuilder*
serializeState(recipientState.get_value_or(RecipientStateEnum::kUnused)));
bob->append(kOpStatus, ReshardingOperationStatus_serializer(opStatus));
- {
- BSONObjBuilder histogramBuilder;
- oplogBatchApplyLatencyMillis.append(histogramBuilder, false);
- BSONObj histogram = histogramBuilder.obj();
- bob->appendElements(histogram);
- }
-
+ appendHistogram(bob, oplogApplierApplyBatchLatencyMillis);
+ appendHistogram(bob, collClonerFillBatchForInsertLatencyMillis);
break;
case Role::kCoordinator:
bob->append(kCoordinatorState, serializeState(coordinatorState));
@@ -573,14 +580,27 @@ void ReshardingMetrics::gotInserts(int n) noexcept {
_cumulativeOp->gotInserts(n);
}
-void ReshardingMetrics::onApplyOplogBatch(Milliseconds latency) {
+void ReshardingMetrics::onOplogApplierApplyBatch(Milliseconds latency) {
stdx::lock_guard<Latch> lk(_mutex);
invariant(_currentOp, kNoOperationInProgress);
invariant(checkState(*_currentOp->recipientState,
{RecipientStateEnum::kApplying, RecipientStateEnum::kError}));
- _currentOp->oplogBatchApplyLatencyMillis.increment(durationCount<Milliseconds>(latency));
- _cumulativeOp->oplogBatchApplyLatencyMillis.increment(durationCount<Milliseconds>(latency));
+ _currentOp->oplogApplierApplyBatchLatencyMillis.increment(durationCount<Milliseconds>(latency));
+ _cumulativeOp->oplogApplierApplyBatchLatencyMillis.increment(
+ durationCount<Milliseconds>(latency));
+}
+
+void ReshardingMetrics::onCollClonerFillBatchForInsert(Milliseconds latency) {
+ stdx::lock_guard<Latch> lk(_mutex);
+ invariant(_currentOp, kNoOperationInProgress);
+ invariant(checkState(*_currentOp->recipientState,
+ {RecipientStateEnum::kCloning, RecipientStateEnum::kError}));
+
+ _currentOp->collClonerFillBatchForInsertLatencyMillis.increment(
+ durationCount<Milliseconds>(latency));
+ _cumulativeOp->collClonerFillBatchForInsertLatencyMillis.increment(
+ durationCount<Milliseconds>(latency));
}
void ReshardingMetrics::gotInsert() noexcept {
@@ -761,12 +781,8 @@ void ReshardingMetrics::serializeCumulativeOpMetrics(BSONObjBuilder* bob) const
bob->append(kMaxRemainingOperationTime,
getRemainingOperationTime(ops.maxRemainingOperationTime));
- {
- BSONObjBuilder histogramBuilder;
- ops.oplogBatchApplyLatencyMillis.append(histogramBuilder, false);
- BSONObj histogram = histogramBuilder.obj();
- bob->appendElements(histogram);
- }
+ appendHistogram(bob, ops.oplogApplierApplyBatchLatencyMillis);
+ appendHistogram(bob, ops.collClonerFillBatchForInsertLatencyMillis);
}
Date_t ReshardingMetrics::_now() const {
diff --git a/src/mongo/db/s/resharding/resharding_metrics.h b/src/mongo/db/s/resharding/resharding_metrics.h
index b63ef01bb9b..24ab1a9a38a 100644
--- a/src/mongo/db/s/resharding/resharding_metrics.h
+++ b/src/mongo/db/s/resharding/resharding_metrics.h
@@ -106,8 +106,12 @@ public:
void enterCriticalSection(Date_t start);
void leaveCriticalSection(Date_t end);
- // Records latency and throughput of batch oplog applies during resharding.
- void onApplyOplogBatch(Milliseconds latency);
+ // Records latency and throughput of calls to ReshardingOplogApplier::_applyBatch
+ void onOplogApplierApplyBatch(Milliseconds latency);
+
+ // Records latency and throughput of calls to resharding::data_copy::fillBatchForInsert
+ // in ReshardingCollectionCloner::doOneBatch
+ void onCollClonerFillBatchForInsert(Milliseconds latency);
// Allows updating "oplog entries to apply" metrics when the recipient is in applying state.
void onOplogEntriesFetched(int64_t entries) noexcept;
diff --git a/src/mongo/db/s/resharding/resharding_metrics_test.cpp b/src/mongo/db/s/resharding/resharding_metrics_test.cpp
index 77b8be78fb2..4b979936f51 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_metrics_test.cpp
@@ -49,7 +49,9 @@ namespace {
using namespace fmt::literals;
constexpr auto kOpTimeRemaining = "remainingOperationTimeEstimatedSecs"_sd;
-constexpr auto kOplogBatchApplyLatencyMillis = "oplogBatchApplyLatencyMillis";
+constexpr auto kOplogApplierApplyBatchLatencyMillis = "oplogApplierApplyBatchLatencyMillis";
+constexpr auto kCollClonerFillBatchForInsertLatencyMillis =
+ "collClonerFillBatchForInsertLatencyMillis";
class ReshardingMetricsTest : public ServiceContextTest {
public:
@@ -129,15 +131,18 @@ public:
<< fmt::format("{}: {}", errMsg, report.toString());
};
- void appendExpectedHistogramResult(BSONObjBuilder& builder,
+ void appendExpectedHistogramResult(BSONObjBuilder* bob,
+ std::string tag,
const std::vector<int64_t>& latencies) {
- IntegerHistogram<kLatencyHistogramBucketsCount> hist(kOplogBatchApplyLatencyMillis,
- latencyHistogramBuckets);
+ IntegerHistogram<kLatencyHistogramBucketsCount> hist(tag, latencyHistogramBuckets);
for (size_t i = 0; i < latencies.size(); i++) {
hist.increment(latencies[i]);
}
- hist.append(builder, false);
+ BSONObjBuilder histogramBuilder;
+ hist.append(histogramBuilder, false);
+ BSONObj histogram = histogramBuilder.obj();
+ bob->appendElements(histogram);
}
private:
@@ -589,21 +594,34 @@ TEST_F(ReshardingMetricsTest, CurrentOpReportForRecipient) {
BSONObjBuilder expectedBuilder(std::move(expectedPrefix));
- // Append histogram data for batch oplog applies.
- appendExpectedHistogramResult(expectedBuilder, {});
- BSONObj expected = expectedBuilder.obj();
+ // Append histogram latency data.
+ appendExpectedHistogramResult(&expectedBuilder, kOplogApplierApplyBatchLatencyMillis, {});
+ appendExpectedHistogramResult(&expectedBuilder, kCollClonerFillBatchForInsertLatencyMillis, {});
+
+ BSONObj expected = expectedBuilder.done();
const auto report = getMetrics()->reportForCurrentOp(options);
ASSERT_BSONOBJ_EQ(expected, report);
}
-TEST_F(ReshardingMetricsTest, TestOplogBatchLatencyHistogramForRecipient) {
- const std::vector<int64_t> latencies_1{3, 427, 0, 6004, 320, 10056, 12300, 105, 70};
- const std::vector<int64_t> latencies_2{800, 20, 5, 1025, 10567};
+TEST_F(ReshardingMetricsTest, TestHistogramMetricsForRecipient) {
+ const std::vector<int64_t> applyLatencies_1{3, 427, 0, 6004, 320, 10056, 12300, 105, 70};
+ const std::vector<int64_t> applyLatencies_2{800, 20, 5, 1025, 10567};
+ const std::vector<int64_t> insertLatencies_1{120, 7, 110, 50, 0, 16500, 77000, 667, 7980};
+ const std::vector<int64_t> insertLatencies_2{12450, 2400, 760, 57, 2};
+
+ const auto combineLatencies = [](std::vector<int64_t>* allLatencies,
+ const std::vector<int64_t>& latencies_1,
+ const std::vector<int64_t>& latencies_2) {
+ allLatencies->insert(allLatencies->end(), latencies_1.begin(), latencies_1.end());
+ allLatencies->insert(allLatencies->end(), latencies_2.begin(), latencies_2.end());
+ };
+
+ std::vector<int64_t> allApplyLatencies;
+ combineLatencies(&allApplyLatencies, applyLatencies_1, applyLatencies_2);
+ std::vector<int64_t> allInsertLatencies;
+ combineLatencies(&allInsertLatencies, insertLatencies_1, insertLatencies_2);
- std::vector<int64_t> allLatencies;
- allLatencies.insert(allLatencies.end(), latencies_1.begin(), latencies_1.end());
- allLatencies.insert(allLatencies.end(), latencies_2.begin(), latencies_2.end());
const ReshardingMetrics::ReporterOptions options(
ReshardingMetrics::Role::kRecipient,
@@ -612,31 +630,54 @@ TEST_F(ReshardingMetricsTest, TestOplogBatchLatencyHistogramForRecipient) {
BSON("id" << 1),
false);
- // First test that histogram metrics appear in currentOp. Next, test that histogram metrics
- // accumulate in cumulativeOp.
- const size_t kNumTests = 2;
- std::vector<int64_t> testLatencies[kNumTests] = {latencies_1, latencies_2};
- std::vector<int64_t> expectedLatencies[kNumTests] = {latencies_1, allLatencies};
+ // Test that all histogram metrics appear in both currentOp and cumulativeOp.
+ const size_t kNumTests = 4;
+ std::vector<int64_t> testLatencies[kNumTests] = {
+ applyLatencies_1, applyLatencies_2, insertLatencies_1, insertLatencies_2};
+ std::vector<int64_t> expectedLatencies[kNumTests] = {
+ applyLatencies_1, allApplyLatencies, insertLatencies_1, allInsertLatencies};
OpReportType testReportTypes[kNumTests] = {OpReportType::CurrentOpReportRecipientRole,
+ OpReportType::CumulativeReport,
+ OpReportType::CurrentOpReportRecipientRole,
OpReportType::CumulativeReport};
+ std::string histogramTag[kNumTests] = {kOplogApplierApplyBatchLatencyMillis,
+ kOplogApplierApplyBatchLatencyMillis,
+ kCollClonerFillBatchForInsertLatencyMillis,
+ kCollClonerFillBatchForInsertLatencyMillis};
auto testLatencyHistogram = [&](std::vector<int64_t> latencies,
OpReportType reportType,
- std::vector<int64_t> expectedLatencies) {
+ std::vector<int64_t> expectedLatencies,
+ std::string histogramTag) {
+ LOGV2(57700,
+ "TestHistogramMetricsForRecipient test case",
+ "reportType"_attr = reportType,
+ "histogramTag"_attr = histogramTag);
+
startOperation(ReshardingMetrics::Role::kRecipient);
- getMetrics()->setRecipientState(RecipientStateEnum::kApplying);
+ RecipientStateEnum state = (histogramTag.compare(kOplogApplierApplyBatchLatencyMillis) == 0
+ ? RecipientStateEnum::kApplying
+ : RecipientStateEnum::kCloning);
+ getMetrics()->setRecipientState(state);
+
for (size_t i = 0; i < latencies.size(); i++) {
- getMetrics()->onApplyOplogBatch(Milliseconds(latencies[i]));
+ if (histogramTag.compare(kOplogApplierApplyBatchLatencyMillis) == 0) {
+ getMetrics()->onOplogApplierApplyBatch(Milliseconds(latencies[i]));
+ } else if (histogramTag.compare(kCollClonerFillBatchForInsertLatencyMillis) == 0) {
+ getMetrics()->onCollClonerFillBatchForInsert(Milliseconds(latencies[i]));
+ } else {
+ MONGO_UNREACHABLE;
+ }
}
const auto report = getReport(reportType);
- const auto buckets = report[kOplogBatchApplyLatencyMillis];
+ const auto buckets = report[histogramTag];
BSONObjBuilder expectedBuilder;
- appendExpectedHistogramResult(expectedBuilder, expectedLatencies);
- const auto expectedHist = expectedBuilder.obj();
- const auto expectedBuckets = expectedHist[kOplogBatchApplyLatencyMillis];
+ appendExpectedHistogramResult(&expectedBuilder, histogramTag, expectedLatencies);
+ const auto expectedHist = expectedBuilder.done();
+ const auto expectedBuckets = expectedHist[histogramTag];
ASSERT_EQ(buckets.woCompare(expectedBuckets), 0);
@@ -645,7 +686,8 @@ TEST_F(ReshardingMetricsTest, TestOplogBatchLatencyHistogramForRecipient) {
};
for (size_t i = 0; i < kNumTests; i++) {
- testLatencyHistogram(testLatencies[i], testReportTypes[i], expectedLatencies[i]);
+ testLatencyHistogram(
+ testLatencies[i], testReportTypes[i], expectedLatencies[i], histogramTag[i]);
}
}
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
index 06ce65384c8..ad2f2e399ad 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
@@ -116,7 +116,8 @@ SemiFuture<void> ReshardingOplogApplier::_applyBatch(
return status;
})
.onCompletion([this, latencyTimer](Status status) {
- _env->metrics()->onApplyOplogBatch(duration_cast<Milliseconds>(latencyTimer.elapsed()));
+ _env->metrics()->onOplogApplierApplyBatch(
+ duration_cast<Milliseconds>(latencyTimer.elapsed()));
return status;
})
.semi();