diff options
author | Mark Benvenuto <mark.benvenuto@mongodb.com> | 2020-05-07 14:39:46 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-07 19:07:57 +0000 |
commit | 049b10ad31be3233130dbf4313b6be4a3ecd47c9 (patch) | |
tree | 8493bfca3123aacdb8df13d39ce5efda5b77c77f /src/mongo | |
parent | c969984b5f61baa0e045a499cee2c3a75883bf97 (diff) | |
download | mongo-049b10ad31be3233130dbf4313b6be4a3ecd47c9.tar.gz |
SERVER-47644 Enable Histogram Latency Stats in FTDC
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/db/ftdc/ftdc_server.cpp | 61 | ||||
-rw-r--r-- | src/mongo/db/ftdc/ftdc_server.idl | 5 | ||||
-rw-r--r-- | src/mongo/db/stats/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/stats/latency_server_status_section.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/stats/operation_latency_histogram.cpp | 45 | ||||
-rw-r--r-- | src/mongo/db/stats/operation_latency_histogram.h | 3 | ||||
-rw-r--r-- | src/mongo/db/stats/operation_latency_histogram_test.cpp | 46 | ||||
-rw-r--r-- | src/mongo/db/stats/top.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/stats/top.h | 4 |
9 files changed, 148 insertions, 29 deletions
diff --git a/src/mongo/db/ftdc/ftdc_server.cpp b/src/mongo/db/ftdc/ftdc_server.cpp index fdb2171707f..fa215af1dce 100644 --- a/src/mongo/db/ftdc/ftdc_server.cpp +++ b/src/mongo/db/ftdc/ftdc_server.cpp @@ -187,6 +187,52 @@ std::string FTDCSimpleInternalCommandCollector::name() const { return _name; } + +/** + * A FTDC Collector for serverStatus + */ +class FTDCServerStatusCommandCollector : public FTDCCollectorInterface { +private: + constexpr static StringData kName = "serverStatus"_sd; + constexpr static StringData kCommand = "serverStatus"_sd; + +public: + FTDCServerStatusCommandCollector() {} + + void collect(OperationContext* opCtx, BSONObjBuilder& builder) final { + // CmdServerStatus + // The "sharding" section is filtered out because at this time it only consists of strings + // in migration status. This section triggers too many schema changes in the serverStatus + // which hurt ftdc compression efficiency, because its output varies depending on the list + // of active migrations. + // "timing" is filtered out because it triggers frequent schema changes. + // "defaultRWConcern" is excluded because it changes rarely and instead included in rotation + + BSONObjBuilder commandBuilder; + commandBuilder.append(kCommand, 1); + commandBuilder.append("tcMalloc", true); + commandBuilder.append("sharding", false); + commandBuilder.append("timing", false); + commandBuilder.append("defaultRWConcern", false); + + if (gDiagnosticDataCollectionEnableLatencyHistograms.load()) { + BSONObjBuilder subObjBuilder(commandBuilder.subobjStart("opLatencies")); + subObjBuilder.append("histograms", true); + subObjBuilder.append("slowBuckets", true); + } + + commandBuilder.done(); + + auto request = OpMsgRequest::fromDBAndBody("", commandBuilder.obj()); + auto result = CommandHelpers::runCommandDirectly(opCtx, request); + builder.appendElements(result); + } + + std::string name() const final { + return kName.toString(); + } +}; + // Register the FTDC system // Note: This must be run before the server parameters are parsed during startup // so that the FTDCController is initialized. @@ -216,20 +262,7 @@ void startFTDC(boost::filesystem::path& path, // These are collected on the period interval in FTDCConfig. // NOTE: For each command here, there must be an equivalent privilege check in // GetDiagnosticDataCommand - - // CmdServerStatus - // The "sharding" section is filtered out because at this time it only consists of strings in - // migration status. This section triggers too many schema changes in the serverStatus which - // hurt ftdc compression efficiency, because its output varies depending on the list of active - // migrations. - // "timing" is filtered out because it triggers frequent schema changes. - // TODO: do we need to enable "sharding" on MongoS? - controller->addPeriodicCollector(std::make_unique<FTDCSimpleInternalCommandCollector>( - "serverStatus", - "serverStatus", - "", - BSON("serverStatus" << 1 << "tcMalloc" << true << "sharding" << false << "timing" << false - << "defaultRWConcern" << false))); + controller->addPeriodicCollector(std::make_unique<FTDCServerStatusCommandCollector>()); registerCollectors(controller.get()); diff --git a/src/mongo/db/ftdc/ftdc_server.idl b/src/mongo/db/ftdc/ftdc_server.idl index e9125fe492b..cb940b59816 100644 --- a/src/mongo/db/ftdc/ftdc_server.idl +++ b/src/mongo/db/ftdc/ftdc_server.idl @@ -87,3 +87,8 @@ server_parameters: set_at: [startup, runtime] cpp_class: DiagnosticDataCollectionDirectoryPathServerParameter + diagnosticDataCollectionEnableLatencyHistograms: + description: "Enable the capture of opLatencies: { histograms: true } } in FTDC." + set_at: [startup, runtime] + cpp_vartype: 'AtomicWord<bool>' + cpp_varname: gDiagnosticDataCollectionEnableLatencyHistograms diff --git a/src/mongo/db/stats/SConscript b/src/mongo/db/stats/SConscript index 594233298e2..00dd038556f 100644 --- a/src/mongo/db/stats/SConscript +++ b/src/mongo/db/stats/SConscript @@ -23,6 +23,7 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/db/service_context', + '$BUILD_DIR/mongo/db/server_options_core', ], ) diff --git a/src/mongo/db/stats/latency_server_status_section.cpp b/src/mongo/db/stats/latency_server_status_section.cpp index 9b15c5cb1dc..7df644d898c 100644 --- a/src/mongo/db/stats/latency_server_status_section.cpp +++ b/src/mongo/db/stats/latency_server_status_section.cpp @@ -50,11 +50,13 @@ public: BSONObj generateSection(OperationContext* opCtx, const BSONElement& configElem) const override { BSONObjBuilder latencyBuilder; bool includeHistograms = false; + bool slowBuckets = false; if (configElem.type() == BSONType::Object) { includeHistograms = configElem.Obj()["histograms"].trueValue(); + slowBuckets = configElem.Obj()["slowBuckets"].trueValue(); } Top::get(opCtx->getServiceContext()) - .appendGlobalLatencyStats(includeHistograms, &latencyBuilder); + .appendGlobalLatencyStats(includeHistograms, slowBuckets, &latencyBuilder); return latencyBuilder.obj(); } } globalHistogramServerStatusSection; diff --git a/src/mongo/db/stats/operation_latency_histogram.cpp b/src/mongo/db/stats/operation_latency_histogram.cpp index 2c881eab1e0..d9a16e9cff2 100644 --- a/src/mongo/db/stats/operation_latency_histogram.cpp +++ b/src/mongo/db/stats/operation_latency_histogram.cpp @@ -95,31 +95,62 @@ const std::array<uint64_t, OperationLatencyHistogram::kMaxBuckets> void OperationLatencyHistogram::_append(const HistogramData& data, const char* key, bool includeHistograms, + bool slowMSBucketsOnly, BSONObjBuilder* builder) const { + uint64_t filteredCount = 0; + bool filterBuckets = slowMSBucketsOnly && serverGlobalParams.slowMS >= 0; + size_t lowestFilteredBound = 0; + BSONObjBuilder histogramBuilder(builder->subobjStart(key)); if (includeHistograms) { BSONArrayBuilder arrayBuilder(histogramBuilder.subarrayStart("histogram")); - for (int i = 0; i < kMaxBuckets; i++) { - if (data.buckets[i] == 0) + for (size_t i = 0; i < kMaxBuckets; i++) { + if (data.buckets[i] == 0) { + continue; + } + + if (filterBuckets && + (kLowerBounds[i] / 1000) >= static_cast<unsigned int>(serverGlobalParams.slowMS)) { + if (lowestFilteredBound == 0) { + lowestFilteredBound = kLowerBounds[i]; + } + + filteredCount += data.buckets[i]; continue; + } + BSONObjBuilder entryBuilder(arrayBuilder.subobjStart()); entryBuilder.append("micros", static_cast<long long>(kLowerBounds[i])); entryBuilder.append("count", static_cast<long long>(data.buckets[i])); entryBuilder.doneFast(); } + + // Append final bucket only if it contains values to minimize data in FTDC + // Final bucket is aggregate of all buckets >= slowMS with bucket labeled as minimum latency + // of bucket + if (filterBuckets && filteredCount > 0) { + BSONObjBuilder entryBuilder(arrayBuilder.subobjStart()); + entryBuilder.append("micros", static_cast<long long>(lowestFilteredBound + 1)); + entryBuilder.append("count", static_cast<long long>(filteredCount)); + entryBuilder.doneFast(); + } + arrayBuilder.doneFast(); } + histogramBuilder.append("latency", static_cast<long long>(data.sum)); histogramBuilder.append("ops", static_cast<long long>(data.entryCount)); histogramBuilder.doneFast(); } -void OperationLatencyHistogram::append(bool includeHistograms, BSONObjBuilder* builder) const { - _append(_reads, "reads", includeHistograms, builder); - _append(_writes, "writes", includeHistograms, builder); - _append(_commands, "commands", includeHistograms, builder); - _append(_transactions, "transactions", includeHistograms, builder); +void OperationLatencyHistogram::append(bool includeHistograms, + bool slowMSBucketsOnly, + BSONObjBuilder* builder) const { + _append(_reads, "reads", includeHistograms, slowMSBucketsOnly, builder); + _append(_writes, "writes", includeHistograms, slowMSBucketsOnly, builder); + _append(_commands, "commands", includeHistograms, slowMSBucketsOnly, builder); + _append(_transactions, "transactions", includeHistograms, slowMSBucketsOnly, builder); } // Computes the log base 2 of value, and checks for cases of split buckets. diff --git a/src/mongo/db/stats/operation_latency_histogram.h b/src/mongo/db/stats/operation_latency_histogram.h index 7167e92eb63..ae78f119b53 100644 --- a/src/mongo/db/stats/operation_latency_histogram.h +++ b/src/mongo/db/stats/operation_latency_histogram.h @@ -57,7 +57,7 @@ public: /** * Appends the four histograms with latency totals and operation counts. */ - void append(bool includeHistograms, BSONObjBuilder* builder) const; + void append(bool includeHistograms, bool slowMSBucketsOnly, BSONObjBuilder* builder) const; private: struct HistogramData { @@ -73,6 +73,7 @@ private: void _append(const HistogramData& data, const char* key, bool includeHistograms, + bool slowMSBucketsOnly, BSONObjBuilder* builder) const; void _incrementData(uint64_t latency, int bucket, HistogramData* data); diff --git a/src/mongo/db/stats/operation_latency_histogram_test.cpp b/src/mongo/db/stats/operation_latency_histogram_test.cpp index 0d1dd0d5efe..c1cf84e082a 100644 --- a/src/mongo/db/stats/operation_latency_histogram_test.cpp +++ b/src/mongo/db/stats/operation_latency_histogram_test.cpp @@ -56,7 +56,7 @@ TEST(OperationLatencyHistogram, EnsureIncrementsStored) { hist.increment(i, Command::ReadWriteType::kTransaction); } BSONObjBuilder outBuilder; - hist.append(false, &outBuilder); + hist.append(false, false, &outBuilder); BSONObj out = outBuilder.done(); ASSERT_EQUALS(out["reads"]["ops"].Long(), kMaxBuckets); ASSERT_EQUALS(out["writes"]["ops"].Long(), kMaxBuckets); @@ -78,7 +78,7 @@ TEST(OperationLatencyHistogram, CheckBucketCountsAndTotalLatency) { // The additional +1 because of the first boundary. uint64_t expectedSum = 3 * std::accumulate(kLowerBounds.begin(), kLowerBounds.end(), 0ULL) + 1; BSONObjBuilder outBuilder; - hist.append(true, &outBuilder); + hist.append(true, false, &outBuilder); BSONObj out = outBuilder.done(); ASSERT_EQUALS(static_cast<uint64_t>(out["reads"]["latency"].Long()), expectedSum); @@ -92,4 +92,46 @@ TEST(OperationLatencyHistogram, CheckBucketCountsAndTotalLatency) { ASSERT_EQUALS(bucket["count"].Long(), (i < kMaxBuckets - 1) ? 3 : 2); } } + +TEST(OperationLatencyHistogram, CheckBucketCountsAndTotalLatencySlowBuckets) { + OperationLatencyHistogram hist; + // Increment at the boundary, boundary+1, and boundary-1. + for (int i = 0; i < kMaxBuckets; i++) { + hist.increment(kLowerBounds[i], Command::ReadWriteType::kRead); + hist.increment(kLowerBounds[i] + 1, Command::ReadWriteType::kRead); + if (i > 0) { + hist.increment(kLowerBounds[i] - 1, Command::ReadWriteType::kRead); + } + } + + auto orig = serverGlobalParams.slowMS; + serverGlobalParams.slowMS = 100; + auto g1 = makeGuard([orig] { serverGlobalParams.slowMS = orig; }); + + // The additional +1 because of the first boundary. + uint64_t expectedSum = 3 * std::accumulate(kLowerBounds.begin(), kLowerBounds.end(), 0ULL) + 1; + BSONObjBuilder outBuilder; + hist.append(true, true, &outBuilder); + BSONObj out = outBuilder.done(); + ASSERT_EQUALS(static_cast<uint64_t>(out["reads"]["latency"].Long()), expectedSum); + + const size_t kMaxUnFilteredBuckets = 23; + // Each bucket has three counts with the exception of the last bucket, which has two. + ASSERT_EQUALS(out["reads"]["ops"].Long(), 3 * kMaxBuckets - 1); + std::vector<BSONElement> readBuckets = out["reads"]["histogram"].Array(); + ASSERT_EQUALS(readBuckets.size(), kMaxUnFilteredBuckets + 1); + for (size_t i = 0; i < kMaxUnFilteredBuckets; i++) { + BSONObj bucket = readBuckets[i].Obj(); + ASSERT_EQUALS(static_cast<uint64_t>(bucket["micros"].Long()), kLowerBounds[i]); + ASSERT_EQUALS(bucket["count"].Long(), (i < kMaxBuckets - 1) ? 3 : 2); + } + + // Handle the last bucket which is an aggregate + { + BSONObj bucket = readBuckets[kMaxUnFilteredBuckets].Obj(); + ASSERT_EQUALS(static_cast<uint64_t>(bucket["micros"].Long()), + kLowerBounds[kMaxUnFilteredBuckets] + 1); + ASSERT_EQUALS(bucket["count"].Long(), 83); + } +} } // namespace mongo diff --git a/src/mongo/db/stats/top.cpp b/src/mongo/db/stats/top.cpp index f491e3694c5..e445515cf4b 100644 --- a/src/mongo/db/stats/top.cpp +++ b/src/mongo/db/stats/top.cpp @@ -192,7 +192,7 @@ void Top::appendLatencyStats(const NamespaceString& nss, auto hashedNs = UsageMap::hasher().hashed_key(nss.ns()); stdx::lock_guard<SimpleMutex> lk(_lock); BSONObjBuilder latencyStatsBuilder; - _usage[hashedNs].opLatencyHistogram.append(includeHistograms, &latencyStatsBuilder); + _usage[hashedNs].opLatencyHistogram.append(includeHistograms, false, &latencyStatsBuilder); builder->append("ns", nss.ns()); builder->append("latencyStats", latencyStatsBuilder.obj()); } @@ -204,9 +204,11 @@ void Top::incrementGlobalLatencyStats(OperationContext* opCtx, _incrementHistogram(opCtx, latency, &_globalHistogramStats, readWriteType); } -void Top::appendGlobalLatencyStats(bool includeHistograms, BSONObjBuilder* builder) { +void Top::appendGlobalLatencyStats(bool includeHistograms, + bool slowMSBucketsOnly, + BSONObjBuilder* builder) { stdx::lock_guard<SimpleMutex> guard(_lock); - _globalHistogramStats.append(includeHistograms, builder); + _globalHistogramStats.append(includeHistograms, slowMSBucketsOnly, builder); } void Top::incrementGlobalTransactionLatencyStats(uint64_t latency) { diff --git a/src/mongo/db/stats/top.h b/src/mongo/db/stats/top.h index 635fda68a1e..5aa3034688b 100644 --- a/src/mongo/db/stats/top.h +++ b/src/mongo/db/stats/top.h @@ -132,7 +132,9 @@ public: /** * Appends the global latency statistics. */ - void appendGlobalLatencyStats(bool includeHistograms, BSONObjBuilder* builder); + void appendGlobalLatencyStats(bool includeHistograms, + bool slowMSBucketsOnly, + BSONObjBuilder* builder); private: void _appendToUsageMap(BSONObjBuilder& b, const UsageMap& map) const; |