summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAmirsaman Memaripour <amirsaman.memaripour@mongodb.com>2020-10-14 19:35:32 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-19 22:01:23 +0000
commit4739fb232284a655f0ae89c9a43490ed2219cb5e (patch)
tree7e26424d85e12344d0965193d259dd6c7c268a59 /src
parent48fcb9bd106ca5e6a5a361a8a51992392ee8b335 (diff)
downloadmongo-4739fb232284a655f0ae89c9a43490ed2219cb5e.tar.gz
SERVER-51601 Make OperationCPUTimer initializer more resilient
Diffstat (limited to 'src')
-rw-r--r--src/mongo/db/operation_cpu_timer.cpp43
-rw-r--r--src/mongo/db/operation_cpu_timer_test.cpp52
2 files changed, 85 insertions, 10 deletions
diff --git a/src/mongo/db/operation_cpu_timer.cpp b/src/mongo/db/operation_cpu_timer.cpp
index 7627c8af11c..34cb63eb3d4 100644
--- a/src/mongo/db/operation_cpu_timer.cpp
+++ b/src/mongo/db/operation_cpu_timer.cpp
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
+
#include <boost/optional.hpp>
#include <fmt/format.h>
@@ -39,6 +41,7 @@
#include "mongo/base/error_codes.h"
#include "mongo/db/client.h"
#include "mongo/db/operation_context.h"
+#include "mongo/logv2/log.h"
#include "mongo/stdx/thread.h"
#include "mongo/util/assert_util.h"
#include "mongo/util/errno_util.h"
@@ -46,11 +49,22 @@
namespace mongo {
+using namespace fmt::literals;
+
#if defined(__linux__)
namespace {
-using namespace fmt::literals;
+// Reads the thread timer, and throws with `InternalError` if that fails.
+Nanoseconds getThreadCPUTime() {
+ struct timespec t;
+ if (auto ret = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t); ret != 0) {
+ int ec = errno;
+ internalAssert(Status(ErrorCodes::InternalError,
+ "Unable to get time: {}"_format(errnoWithDescription(ec))));
+ }
+ return Seconds(t.tv_sec) + Nanoseconds(t.tv_nsec);
+}
MONGO_FAIL_POINT_DEFINE(hangCPUTimerAfterOnThreadAttach);
MONGO_FAIL_POINT_DEFINE(hangCPUTimerAfterOnThreadDetach);
@@ -132,14 +146,13 @@ void PosixTimer::onThreadDetach() {
hangCPUTimerAfterOnThreadDetach.pauseWhileSet();
}
-Nanoseconds PosixTimer::_getThreadTime() const {
- struct timespec t;
- if (auto ret = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t); ret != 0) {
- int ec = errno;
- internalAssert(Status(ErrorCodes::InternalError,
- "Unable to get time: {}"_format(errnoWithDescription(ec))));
- }
- return Seconds(t.tv_sec) + Nanoseconds(t.tv_nsec);
+Nanoseconds PosixTimer::_getThreadTime() const try {
+ return getThreadCPUTime();
+} catch (const ExceptionFor<ErrorCodes::InternalError>& ex) {
+ // Abort the process as the timer cannot account for the elapsed time. This path is only
+ // reachable if the platform supports CPU time measurement at startup, but returns an error
+ // for a subsequent attempt to get thread-specific CPU consumption.
+ LOGV2_FATAL(4744601, "Failed to read the CPU time for the current thread", "error"_attr = ex);
}
static auto getCPUTimer = OperationContext::declareDecoration<PosixTimer>();
@@ -154,7 +167,17 @@ OperationCPUTimer* OperationCPUTimer::get(OperationContext* opCtx) {
// of SMP systems.
static bool isTimeSupported = [] {
clockid_t cid;
- return clock_getcpuclockid(0, &cid) == 0;
+ if (clock_getcpuclockid(0, &cid) != 0)
+ return false;
+
+ try {
+ getThreadCPUTime();
+ } catch (const ExceptionFor<ErrorCodes::InternalError>&) {
+ // Unable to collect the CPU time for the current thread.
+ return false;
+ }
+
+ return true;
}();
if (!isTimeSupported)
diff --git a/src/mongo/db/operation_cpu_timer_test.cpp b/src/mongo/db/operation_cpu_timer_test.cpp
index e498ce168b6..8d579e9937c 100644
--- a/src/mongo/db/operation_cpu_timer_test.cpp
+++ b/src/mongo/db/operation_cpu_timer_test.cpp
@@ -27,9 +27,14 @@
* it in the license file.
*/
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
+
#include "mongo/db/operation_cpu_timer.h"
#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/logv2/log.h"
#include "mongo/platform/atomic_word.h"
+#include "mongo/stdx/condition_variable.h"
+#include "mongo/stdx/mutex.h"
#include "mongo/stdx/thread.h"
#include "mongo/unittest/barrier.h"
#include "mongo/unittest/death_test.h"
@@ -159,6 +164,53 @@ DEATH_TEST_F(OperationCPUTimerTest, GetElapsedForPausedTimer, "Not attached to c
timer->getElapsed();
}
+TEST_F(OperationCPUTimerTest, TimerPausesOnBlockingSleep) {
+ // This test checks if the time measured by `OperationCPUTimer` does not include the period of
+ // time the operation was blocked (e.g., waiting on a condition variable). The idea is to have
+ // the operation block for `kSomeDelay`, ensure the elapsed time observed by the timer is always
+ // less than `kSomeDelay`, and repeat the test `kRepeats` times. To account for the sporadic
+ // wake ups, the test does not fail unless the number of failures exceeds `kMaxFailures`. This
+ // is just a best-effort, and the number of failures is not guaranteed to not exceed the upper
+ // bound (i.e., `kMaxFailures`).
+ const auto kSomeDelay = Milliseconds(1);
+ const auto kRepeats = 1000;
+ const auto kMaxFailureRate = 0.1;
+ const auto kMaxFailures = kMaxFailureRate * kRepeats;
+
+ auto timer = getTimer();
+
+ auto checkTimer = [&] {
+ auto elapsed = timer->getElapsed();
+ if (elapsed < kSomeDelay)
+ return true;
+ LOGV2_WARNING(5160101,
+ "Elapsed operation time exceeded the upper bound",
+ "elapsed"_attr = elapsed,
+ "delay"_attr = kSomeDelay);
+ return false;
+ };
+
+ auto failures = 0;
+ for (auto i = 0; i < kRepeats; i++) {
+ timer->start();
+ sleepFor(kSomeDelay);
+ if (!checkTimer())
+ failures++;
+ timer->stop();
+
+ stdx::condition_variable cv;
+ auto mutex = MONGO_MAKE_LATCH("TimerPausesOnBlockingSleep");
+ timer->start();
+ stdx::unique_lock lk(mutex);
+ cv.wait_for(lk, kSomeDelay.toSystemDuration(), [] { return false; });
+ if (!checkTimer())
+ failures++;
+ timer->stop();
+ }
+
+ ASSERT_LTE(failures, kMaxFailures);
+}
+
#else
TEST_F(OperationCPUTimerTest, TimerNotSetIfNotSupported) {