summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2020-05-07 14:39:46 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-07 19:07:57 +0000
commit049b10ad31be3233130dbf4313b6be4a3ecd47c9 (patch)
tree8493bfca3123aacdb8df13d39ce5efda5b77c77f
parentc969984b5f61baa0e045a499cee2c3a75883bf97 (diff)
downloadmongo-049b10ad31be3233130dbf4313b6be4a3ecd47c9.tar.gz
SERVER-47644 Enable Histogram Latency Stats in FTDC
-rw-r--r--src/mongo/db/ftdc/ftdc_server.cpp61
-rw-r--r--src/mongo/db/ftdc/ftdc_server.idl5
-rw-r--r--src/mongo/db/stats/SConscript1
-rw-r--r--src/mongo/db/stats/latency_server_status_section.cpp4
-rw-r--r--src/mongo/db/stats/operation_latency_histogram.cpp45
-rw-r--r--src/mongo/db/stats/operation_latency_histogram.h3
-rw-r--r--src/mongo/db/stats/operation_latency_histogram_test.cpp46
-rw-r--r--src/mongo/db/stats/top.cpp8
-rw-r--r--src/mongo/db/stats/top.h4
9 files changed, 148 insertions, 29 deletions
diff --git a/src/mongo/db/ftdc/ftdc_server.cpp b/src/mongo/db/ftdc/ftdc_server.cpp
index fdb2171707f..fa215af1dce 100644
--- a/src/mongo/db/ftdc/ftdc_server.cpp
+++ b/src/mongo/db/ftdc/ftdc_server.cpp
@@ -187,6 +187,52 @@ std::string FTDCSimpleInternalCommandCollector::name() const {
return _name;
}
+
+/**
+ * A FTDC Collector for serverStatus
+ */
+class FTDCServerStatusCommandCollector : public FTDCCollectorInterface {
+private:
+ constexpr static StringData kName = "serverStatus"_sd;
+ constexpr static StringData kCommand = "serverStatus"_sd;
+
+public:
+ FTDCServerStatusCommandCollector() {}
+
+ void collect(OperationContext* opCtx, BSONObjBuilder& builder) final {
+ // CmdServerStatus
+ // The "sharding" section is filtered out because at this time it only consists of strings
+ // in migration status. This section triggers too many schema changes in the serverStatus
+ // which hurt ftdc compression efficiency, because its output varies depending on the list
+ // of active migrations.
+ // "timing" is filtered out because it triggers frequent schema changes.
+ // "defaultRWConcern" is excluded because it changes rarely and instead included in rotation
+
+ BSONObjBuilder commandBuilder;
+ commandBuilder.append(kCommand, 1);
+ commandBuilder.append("tcMalloc", true);
+ commandBuilder.append("sharding", false);
+ commandBuilder.append("timing", false);
+ commandBuilder.append("defaultRWConcern", false);
+
+ if (gDiagnosticDataCollectionEnableLatencyHistograms.load()) {
+ BSONObjBuilder subObjBuilder(commandBuilder.subobjStart("opLatencies"));
+ subObjBuilder.append("histograms", true);
+ subObjBuilder.append("slowBuckets", true);
+ }
+
+ commandBuilder.done();
+
+ auto request = OpMsgRequest::fromDBAndBody("", commandBuilder.obj());
+ auto result = CommandHelpers::runCommandDirectly(opCtx, request);
+ builder.appendElements(result);
+ }
+
+ std::string name() const final {
+ return kName.toString();
+ }
+};
+
// Register the FTDC system
// Note: This must be run before the server parameters are parsed during startup
// so that the FTDCController is initialized.
@@ -216,20 +262,7 @@ void startFTDC(boost::filesystem::path& path,
// These are collected on the period interval in FTDCConfig.
// NOTE: For each command here, there must be an equivalent privilege check in
// GetDiagnosticDataCommand
-
- // CmdServerStatus
- // The "sharding" section is filtered out because at this time it only consists of strings in
- // migration status. This section triggers too many schema changes in the serverStatus which
- // hurt ftdc compression efficiency, because its output varies depending on the list of active
- // migrations.
- // "timing" is filtered out because it triggers frequent schema changes.
- // TODO: do we need to enable "sharding" on MongoS?
- controller->addPeriodicCollector(std::make_unique<FTDCSimpleInternalCommandCollector>(
- "serverStatus",
- "serverStatus",
- "",
- BSON("serverStatus" << 1 << "tcMalloc" << true << "sharding" << false << "timing" << false
- << "defaultRWConcern" << false)));
+ controller->addPeriodicCollector(std::make_unique<FTDCServerStatusCommandCollector>());
registerCollectors(controller.get());
diff --git a/src/mongo/db/ftdc/ftdc_server.idl b/src/mongo/db/ftdc/ftdc_server.idl
index e9125fe492b..cb940b59816 100644
--- a/src/mongo/db/ftdc/ftdc_server.idl
+++ b/src/mongo/db/ftdc/ftdc_server.idl
@@ -87,3 +87,8 @@ server_parameters:
set_at: [startup, runtime]
cpp_class: DiagnosticDataCollectionDirectoryPathServerParameter
+ diagnosticDataCollectionEnableLatencyHistograms:
+ description: "Enable the capture of opLatencies: { histograms: true } } in FTDC."
+ set_at: [startup, runtime]
+ cpp_vartype: 'AtomicWord<bool>'
+ cpp_varname: gDiagnosticDataCollectionEnableLatencyHistograms
diff --git a/src/mongo/db/stats/SConscript b/src/mongo/db/stats/SConscript
index 594233298e2..00dd038556f 100644
--- a/src/mongo/db/stats/SConscript
+++ b/src/mongo/db/stats/SConscript
@@ -23,6 +23,7 @@ env.Library(
],
LIBDEPS=[
'$BUILD_DIR/mongo/db/service_context',
+ '$BUILD_DIR/mongo/db/server_options_core',
],
)
diff --git a/src/mongo/db/stats/latency_server_status_section.cpp b/src/mongo/db/stats/latency_server_status_section.cpp
index 9b15c5cb1dc..7df644d898c 100644
--- a/src/mongo/db/stats/latency_server_status_section.cpp
+++ b/src/mongo/db/stats/latency_server_status_section.cpp
@@ -50,11 +50,13 @@ public:
BSONObj generateSection(OperationContext* opCtx, const BSONElement& configElem) const override {
BSONObjBuilder latencyBuilder;
bool includeHistograms = false;
+ bool slowBuckets = false;
if (configElem.type() == BSONType::Object) {
includeHistograms = configElem.Obj()["histograms"].trueValue();
+ slowBuckets = configElem.Obj()["slowBuckets"].trueValue();
}
Top::get(opCtx->getServiceContext())
- .appendGlobalLatencyStats(includeHistograms, &latencyBuilder);
+ .appendGlobalLatencyStats(includeHistograms, slowBuckets, &latencyBuilder);
return latencyBuilder.obj();
}
} globalHistogramServerStatusSection;
diff --git a/src/mongo/db/stats/operation_latency_histogram.cpp b/src/mongo/db/stats/operation_latency_histogram.cpp
index 2c881eab1e0..d9a16e9cff2 100644
--- a/src/mongo/db/stats/operation_latency_histogram.cpp
+++ b/src/mongo/db/stats/operation_latency_histogram.cpp
@@ -95,31 +95,62 @@ const std::array<uint64_t, OperationLatencyHistogram::kMaxBuckets>
void OperationLatencyHistogram::_append(const HistogramData& data,
const char* key,
bool includeHistograms,
+ bool slowMSBucketsOnly,
BSONObjBuilder* builder) const {
+ uint64_t filteredCount = 0;
+ bool filterBuckets = slowMSBucketsOnly && serverGlobalParams.slowMS >= 0;
+ size_t lowestFilteredBound = 0;
+
BSONObjBuilder histogramBuilder(builder->subobjStart(key));
if (includeHistograms) {
BSONArrayBuilder arrayBuilder(histogramBuilder.subarrayStart("histogram"));
- for (int i = 0; i < kMaxBuckets; i++) {
- if (data.buckets[i] == 0)
+ for (size_t i = 0; i < kMaxBuckets; i++) {
+ if (data.buckets[i] == 0) {
+ continue;
+ }
+
+ if (filterBuckets &&
+ (kLowerBounds[i] / 1000) >= static_cast<unsigned int>(serverGlobalParams.slowMS)) {
+ if (lowestFilteredBound == 0) {
+ lowestFilteredBound = kLowerBounds[i];
+ }
+
+ filteredCount += data.buckets[i];
continue;
+ }
+
BSONObjBuilder entryBuilder(arrayBuilder.subobjStart());
entryBuilder.append("micros", static_cast<long long>(kLowerBounds[i]));
entryBuilder.append("count", static_cast<long long>(data.buckets[i]));
entryBuilder.doneFast();
}
+
+ // Append final bucket only if it contains values to minimize data in FTDC
+ // Final bucket is aggregate of all buckets >= slowMS with bucket labeled as minimum latency
+ // of bucket
+ if (filterBuckets && filteredCount > 0) {
+ BSONObjBuilder entryBuilder(arrayBuilder.subobjStart());
+ entryBuilder.append("micros", static_cast<long long>(lowestFilteredBound + 1));
+ entryBuilder.append("count", static_cast<long long>(filteredCount));
+ entryBuilder.doneFast();
+ }
+
arrayBuilder.doneFast();
}
+
histogramBuilder.append("latency", static_cast<long long>(data.sum));
histogramBuilder.append("ops", static_cast<long long>(data.entryCount));
histogramBuilder.doneFast();
}
-void OperationLatencyHistogram::append(bool includeHistograms, BSONObjBuilder* builder) const {
- _append(_reads, "reads", includeHistograms, builder);
- _append(_writes, "writes", includeHistograms, builder);
- _append(_commands, "commands", includeHistograms, builder);
- _append(_transactions, "transactions", includeHistograms, builder);
+void OperationLatencyHistogram::append(bool includeHistograms,
+ bool slowMSBucketsOnly,
+ BSONObjBuilder* builder) const {
+ _append(_reads, "reads", includeHistograms, slowMSBucketsOnly, builder);
+ _append(_writes, "writes", includeHistograms, slowMSBucketsOnly, builder);
+ _append(_commands, "commands", includeHistograms, slowMSBucketsOnly, builder);
+ _append(_transactions, "transactions", includeHistograms, slowMSBucketsOnly, builder);
}
// Computes the log base 2 of value, and checks for cases of split buckets.
diff --git a/src/mongo/db/stats/operation_latency_histogram.h b/src/mongo/db/stats/operation_latency_histogram.h
index 7167e92eb63..ae78f119b53 100644
--- a/src/mongo/db/stats/operation_latency_histogram.h
+++ b/src/mongo/db/stats/operation_latency_histogram.h
@@ -57,7 +57,7 @@ public:
/**
* Appends the four histograms with latency totals and operation counts.
*/
- void append(bool includeHistograms, BSONObjBuilder* builder) const;
+ void append(bool includeHistograms, bool slowMSBucketsOnly, BSONObjBuilder* builder) const;
private:
struct HistogramData {
@@ -73,6 +73,7 @@ private:
void _append(const HistogramData& data,
const char* key,
bool includeHistograms,
+ bool slowMSBucketsOnly,
BSONObjBuilder* builder) const;
void _incrementData(uint64_t latency, int bucket, HistogramData* data);
diff --git a/src/mongo/db/stats/operation_latency_histogram_test.cpp b/src/mongo/db/stats/operation_latency_histogram_test.cpp
index 0d1dd0d5efe..c1cf84e082a 100644
--- a/src/mongo/db/stats/operation_latency_histogram_test.cpp
+++ b/src/mongo/db/stats/operation_latency_histogram_test.cpp
@@ -56,7 +56,7 @@ TEST(OperationLatencyHistogram, EnsureIncrementsStored) {
hist.increment(i, Command::ReadWriteType::kTransaction);
}
BSONObjBuilder outBuilder;
- hist.append(false, &outBuilder);
+ hist.append(false, false, &outBuilder);
BSONObj out = outBuilder.done();
ASSERT_EQUALS(out["reads"]["ops"].Long(), kMaxBuckets);
ASSERT_EQUALS(out["writes"]["ops"].Long(), kMaxBuckets);
@@ -78,7 +78,7 @@ TEST(OperationLatencyHistogram, CheckBucketCountsAndTotalLatency) {
// The additional +1 because of the first boundary.
uint64_t expectedSum = 3 * std::accumulate(kLowerBounds.begin(), kLowerBounds.end(), 0ULL) + 1;
BSONObjBuilder outBuilder;
- hist.append(true, &outBuilder);
+ hist.append(true, false, &outBuilder);
BSONObj out = outBuilder.done();
ASSERT_EQUALS(static_cast<uint64_t>(out["reads"]["latency"].Long()), expectedSum);
@@ -92,4 +92,46 @@ TEST(OperationLatencyHistogram, CheckBucketCountsAndTotalLatency) {
ASSERT_EQUALS(bucket["count"].Long(), (i < kMaxBuckets - 1) ? 3 : 2);
}
}
+
+TEST(OperationLatencyHistogram, CheckBucketCountsAndTotalLatencySlowBuckets) {
+ OperationLatencyHistogram hist;
+ // Increment at the boundary, boundary+1, and boundary-1.
+ for (int i = 0; i < kMaxBuckets; i++) {
+ hist.increment(kLowerBounds[i], Command::ReadWriteType::kRead);
+ hist.increment(kLowerBounds[i] + 1, Command::ReadWriteType::kRead);
+ if (i > 0) {
+ hist.increment(kLowerBounds[i] - 1, Command::ReadWriteType::kRead);
+ }
+ }
+
+ auto orig = serverGlobalParams.slowMS;
+ serverGlobalParams.slowMS = 100;
+ auto g1 = makeGuard([orig] { serverGlobalParams.slowMS = orig; });
+
+ // The additional +1 because of the first boundary.
+ uint64_t expectedSum = 3 * std::accumulate(kLowerBounds.begin(), kLowerBounds.end(), 0ULL) + 1;
+ BSONObjBuilder outBuilder;
+ hist.append(true, true, &outBuilder);
+ BSONObj out = outBuilder.done();
+ ASSERT_EQUALS(static_cast<uint64_t>(out["reads"]["latency"].Long()), expectedSum);
+
+ const size_t kMaxUnFilteredBuckets = 23;
+ // Each bucket has three counts with the exception of the last bucket, which has two.
+ ASSERT_EQUALS(out["reads"]["ops"].Long(), 3 * kMaxBuckets - 1);
+ std::vector<BSONElement> readBuckets = out["reads"]["histogram"].Array();
+ ASSERT_EQUALS(readBuckets.size(), kMaxUnFilteredBuckets + 1);
+ for (size_t i = 0; i < kMaxUnFilteredBuckets; i++) {
+ BSONObj bucket = readBuckets[i].Obj();
+ ASSERT_EQUALS(static_cast<uint64_t>(bucket["micros"].Long()), kLowerBounds[i]);
+ ASSERT_EQUALS(bucket["count"].Long(), (i < kMaxBuckets - 1) ? 3 : 2);
+ }
+
+ // Handle the last bucket which is an aggregate
+ {
+ BSONObj bucket = readBuckets[kMaxUnFilteredBuckets].Obj();
+ ASSERT_EQUALS(static_cast<uint64_t>(bucket["micros"].Long()),
+ kLowerBounds[kMaxUnFilteredBuckets] + 1);
+ ASSERT_EQUALS(bucket["count"].Long(), 83);
+ }
+}
} // namespace mongo
diff --git a/src/mongo/db/stats/top.cpp b/src/mongo/db/stats/top.cpp
index f491e3694c5..e445515cf4b 100644
--- a/src/mongo/db/stats/top.cpp
+++ b/src/mongo/db/stats/top.cpp
@@ -192,7 +192,7 @@ void Top::appendLatencyStats(const NamespaceString& nss,
auto hashedNs = UsageMap::hasher().hashed_key(nss.ns());
stdx::lock_guard<SimpleMutex> lk(_lock);
BSONObjBuilder latencyStatsBuilder;
- _usage[hashedNs].opLatencyHistogram.append(includeHistograms, &latencyStatsBuilder);
+ _usage[hashedNs].opLatencyHistogram.append(includeHistograms, false, &latencyStatsBuilder);
builder->append("ns", nss.ns());
builder->append("latencyStats", latencyStatsBuilder.obj());
}
@@ -204,9 +204,11 @@ void Top::incrementGlobalLatencyStats(OperationContext* opCtx,
_incrementHistogram(opCtx, latency, &_globalHistogramStats, readWriteType);
}
-void Top::appendGlobalLatencyStats(bool includeHistograms, BSONObjBuilder* builder) {
+void Top::appendGlobalLatencyStats(bool includeHistograms,
+ bool slowMSBucketsOnly,
+ BSONObjBuilder* builder) {
stdx::lock_guard<SimpleMutex> guard(_lock);
- _globalHistogramStats.append(includeHistograms, builder);
+ _globalHistogramStats.append(includeHistograms, slowMSBucketsOnly, builder);
}
void Top::incrementGlobalTransactionLatencyStats(uint64_t latency) {
diff --git a/src/mongo/db/stats/top.h b/src/mongo/db/stats/top.h
index 635fda68a1e..5aa3034688b 100644
--- a/src/mongo/db/stats/top.h
+++ b/src/mongo/db/stats/top.h
@@ -132,7 +132,9 @@ public:
/**
* Appends the global latency statistics.
*/
- void appendGlobalLatencyStats(bool includeHistograms, BSONObjBuilder* builder);
+ void appendGlobalLatencyStats(bool includeHistograms,
+ bool slowMSBucketsOnly,
+ BSONObjBuilder* builder);
private:
void _appendToUsageMap(BSONObjBuilder& b, const UsageMap& map) const;