summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2019-11-19 16:46:51 +0000
committerevergreen <evergreen@mongodb.com>2019-11-19 16:46:51 +0000
commitcffaa3c53fa3a404273f767f55f1a93f1320a209 (patch)
tree017b6a9060f15f27cae1df5dc766e9a1e56def0a
parent60c957304e503dbca360838627cf0f8402764929 (diff)
downloadmongo-cffaa3c53fa3a404273f767f55f1a93f1320a209.tar.gz
SERVER-41185 Make ftdc failures process fatal
-rw-r--r--src/mongo/db/ftdc/controller.cpp119
-rw-r--r--src/mongo/db/ftdc/controller.h2
2 files changed, 58 insertions, 63 deletions
diff --git a/src/mongo/db/ftdc/controller.cpp b/src/mongo/db/ftdc/controller.cpp
index 947fd004dfe..937fe0dbc7d 100644
--- a/src/mongo/db/ftdc/controller.cpp
+++ b/src/mongo/db/ftdc/controller.cpp
@@ -185,82 +185,77 @@ void FTDCController::stop() {
}
}
-void FTDCController::doLoop() {
- try {
- // Update config
+void FTDCController::doLoop() noexcept {
+ // Note: All exceptions thrown in this loop are considered process fatal. The default terminate
+ // is used to provide a good stack trace of the issue.
+ Client::initThread("ftdc");
+ Client* client = &cc();
+
+ // Update config
+ {
+ stdx::lock_guard<Latch> lock(_mutex);
+ _config = _configTemp;
+ }
+
+ while (true) {
+ // Compute the next interval to run regardless of how we were woken up
+ // Skipping an interval due to a race condition with a config signal is harmless.
+ auto now = getGlobalServiceContext()->getPreciseClockSource()->now();
+
+ // Get next time to run at
+ auto next_time = FTDCUtil::roundTime(now, _config.period);
+
+ // Wait for the next run or signal to shutdown
{
- stdx::lock_guard<Latch> lock(_mutex);
- _config = _configTemp;
- }
+ stdx::unique_lock<Latch> lock(_mutex);
+ MONGO_IDLE_THREAD_BLOCK;
- Client::initThread("ftdc");
- Client* client = &cc();
+ // We ignore spurious wakeups by just doing an iteration of the loop
+ auto status = _condvar.wait_until(lock, next_time.toSystemTimePoint());
- while (true) {
- // Compute the next interval to run regardless of how we were woken up
- // Skipping an interval due to a race condition with a config signal is harmless.
- auto now = getGlobalServiceContext()->getPreciseClockSource()->now();
+ // Are we done running?
+ if (_state == State::kStopRequested) {
+ break;
+ }
- // Get next time to run at
- auto next_time = FTDCUtil::roundTime(now, _config.period);
+ // Update the current configuration settings always
+ // In unit tests, we may never get a signal when the timeout is 1ms on Windows since
+ // MSVC 2013 converts wait_until(now() + 1ms) into ~ wait_for(0) which means it will
+ // not wait for the condition variable to be signaled because it uses
+ // GetFileSystemTime for now which has ~10 ms granularity.
+ _config = _configTemp;
- // Wait for the next run or signal to shutdown
- {
- stdx::unique_lock<Latch> lock(_mutex);
- MONGO_IDLE_THREAD_BLOCK;
-
- // We ignore spurious wakeups by just doing an iteration of the loop
- auto status = _condvar.wait_until(lock, next_time.toSystemTimePoint());
-
- // Are we done running?
- if (_state == State::kStopRequested) {
- break;
- }
-
- // Update the current configuration settings always
- // In unit tests, we may never get a signal when the timeout is 1ms on Windows since
- // MSVC 2013 converts wait_until(now() + 1ms) into ~ wait_for(0) which means it will
- // not wait for the condition variable to be signaled because it uses
- // GetFileSystemTime for now which has ~10 ms granularity.
- _config = _configTemp;
-
- // if we hit a timeout on the condvar, we need to do another collection
- // if we were signalled, then we have a config update only or were asked to stop
- if (status == stdx::cv_status::no_timeout) {
- continue;
- }
+ // if we hit a timeout on the condvar, we need to do another collection
+ // if we were signalled, then we have a config update only or were asked to stop
+ if (status == stdx::cv_status::no_timeout) {
+ continue;
}
+ }
- // TODO: consider only running this thread if we are enabled
- // for now, we just keep an idle thread as it is simpler
- if (_config.enabled) {
- // Delay initialization of FTDCFileManager until we are sure the user has enabled
- // FTDC
- if (!_mgr) {
- auto swMgr =
- FTDCFileManager::create(&_config, _path, &_rotateCollectors, client);
+ // TODO: consider only running this thread if we are enabled
+ // for now, we just keep an idle thread as it is simpler
+ if (_config.enabled) {
+ // Delay initialization of FTDCFileManager until we are sure the user has enabled
+ // FTDC
+ if (!_mgr) {
+ auto swMgr = FTDCFileManager::create(&_config, _path, &_rotateCollectors, client);
- _mgr = uassertStatusOK(std::move(swMgr));
- }
+ _mgr = uassertStatusOK(std::move(swMgr));
+ }
- auto collectSample = _periodicCollectors.collect(client);
+ auto collectSample = _periodicCollectors.collect(client);
- Status s = _mgr->writeSampleAndRotateIfNeeded(
- client, std::get<0>(collectSample), std::get<1>(collectSample));
+ Status s = _mgr->writeSampleAndRotateIfNeeded(
+ client, std::get<0>(collectSample), std::get<1>(collectSample));
- uassertStatusOK(s);
+ uassertStatusOK(s);
- // Store a reference to the most recent document from the periodic collectors
- {
- stdx::lock_guard<Latch> lock(_mutex);
- _mostRecentPeriodicDocument = std::get<0>(collectSample);
- }
+ // Store a reference to the most recent document from the periodic collectors
+ {
+ stdx::lock_guard<Latch> lock(_mutex);
+ _mostRecentPeriodicDocument = std::get<0>(collectSample);
}
}
- } catch (...) {
- warning() << "Uncaught exception in '" << exceptionToStatus()
- << "' in full-time diagnostic data capture subsystem. Shutting down the "
- "full-time diagnostic data capture subsystem.";
}
}
diff --git a/src/mongo/db/ftdc/controller.h b/src/mongo/db/ftdc/controller.h
index 50d6ae48886..f7b244e5fcc 100644
--- a/src/mongo/db/ftdc/controller.h
+++ b/src/mongo/db/ftdc/controller.h
@@ -146,7 +146,7 @@ private:
/**
* Do periodic statistics collection, and all other work on the background thread.
*/
- void doLoop();
+ void doLoop() noexcept;
private:
/**