summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYu Jin Kang Park <yujin.kang@mongodb.com>2022-07-26 08:52:36 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-07-26 09:23:30 +0000
commit519c7ea0dbf08591d967f1d1aa77ca77af257a7d (patch)
treec94239ea01856eaebdf05b81c88a187a1b392008
parenteb16c5b485db24ec49aa11f01046998095106f6b (diff)
downloadmongo-519c7ea0dbf08591d967f1d1aa77ca77af257a7d.tar.gz
SERVER-67990: TestTimerElapsedAfterMultipleDetachAttach change to multiple runs with fail threshold
-rw-r--r--src/mongo/db/operation_cpu_timer_test.cpp77
1 files changed, 48 insertions, 29 deletions
diff --git a/src/mongo/db/operation_cpu_timer_test.cpp b/src/mongo/db/operation_cpu_timer_test.cpp
index 28355125e77..04c0e0b97df 100644
--- a/src/mongo/db/operation_cpu_timer_test.cpp
+++ b/src/mongo/db/operation_cpu_timer_test.cpp
@@ -32,12 +32,14 @@
#include "mongo/db/service_context_test_fixture.h"
#include "mongo/logv2/log.h"
#include "mongo/platform/atomic_word.h"
+#include "mongo/stdx/chrono.h"
#include "mongo/stdx/condition_variable.h"
#include "mongo/stdx/mutex.h"
#include "mongo/stdx/thread.h"
#include "mongo/unittest/barrier.h"
#include "mongo/unittest/death_test.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/duration.h"
#include "mongo/util/time_support.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
@@ -59,16 +61,11 @@ public:
_opCtx = getGlobalServiceContext()->makeOperationContext(Client::getCurrent());
}
- void busyWait(Nanoseconds delay) const {
- AtomicWord<bool> mayJoin{false};
- stdx::thread blocker([&] {
- sleepFor(delay);
- mayJoin.store(true);
- });
- while (!mayJoin.load()) {
- // Busy wait for the blocker thread.
+ void busyWait(Microseconds delay) const {
+ auto start = stdx::chrono::steady_clock::now();
+ auto end = start + stdx::chrono::microseconds{durationCount<Microseconds>(delay)};
+ while (stdx::chrono::steady_clock::now() <= end) {
}
- blocker.join();
}
private:
@@ -81,12 +78,12 @@ TEST_F(OperationCPUTimerTest, TestTimer) {
auto timer = getTimer();
timer->start();
- busyWait(Nanoseconds(100)); // A small delay to make sure the timer advances.
+ busyWait(Microseconds(1)); // A small delay to make sure the timer advances.
ASSERT_GT(timer->getElapsed(), Nanoseconds(0));
timer->stop();
const auto elapsedAfterStop = timer->getElapsed();
- busyWait(Microseconds(1)); // A relatively longer delay to ensure the timer doesn't proceed.
+ busyWait(Microseconds(10)); // A relatively longer delay to ensure the timer doesn't proceed.
const auto elapsedAfterSleep = timer->getElapsed();
ASSERT_EQ(elapsedAfterStop, elapsedAfterSleep);
}
@@ -131,30 +128,52 @@ TEST_F(OperationCPUTimerTest, TestTimerDetachAndAttachHandlers) {
}
TEST_F(OperationCPUTimerTest, TestTimerElapsedAfterMultipleDetachAttach) {
- // Do not try to use sub-millisecond precision. The way busyWait is implemented may cause this
- // test to succeed when it should not.
- auto timer = getTimer();
- timer->start();
- busyWait(Milliseconds(100)); // Elapse 100 ms.
- timer->onThreadDetach();
+ auto checkTimerAccumulatesElapsedTime = [&]() {
+ auto timer = getTimer();
+ timer->start();
+ busyWait(Microseconds(100)); // Elapse 100 us.
+ timer->onThreadDetach();
- busyWait(Milliseconds(500));
+ busyWait(Microseconds(100));
- timer->onThreadAttach();
- busyWait(Milliseconds(1)); // Elapse 1 ms.
- timer->onThreadDetach();
+ timer->onThreadAttach();
+ busyWait(Microseconds(10)); // Elapse 10 us.
+ timer->onThreadDetach();
- busyWait(Milliseconds(500));
+ busyWait(Microseconds(100));
- timer->onThreadAttach();
- busyWait(Milliseconds(1)); // Elapse 1 ms.
- timer->stop();
+ timer->onThreadAttach();
+ busyWait(Microseconds(10)); // Elapse 10 us.
+ timer->stop();
- // Total elapsed while attached should be GTE 102 ms.
- ASSERT_GTE(timer->getElapsed(), Milliseconds(102));
- // But less than the 1000 ms (500ms x 2) elapsed while detached.
- ASSERT_LT(timer->getElapsed(), Milliseconds(1000));
+ return timer->getElapsed();
+ };
+
+ // PosixTimer relies on clock_gettime, which might give bad results if the thread is migrated to
+ // another CPU when using a GLIBC < 2.4. Additionally, even on systems with GLIBC >= 2.4
+ // reported time can occasionally be incorrect due to precision issues. E.g. 119999000 ns < 120
+ // ms.
+
+ // We try to work around this issue by executing the test multiple times and establishing a
+ // threshold failure rate. This does not guarantee that the test will not fail, but should
+ // reduce the amount of noise.
+
+ const auto kRepeats = 1000;
+ const auto kMaxFailureRate = 0.1;
+ const auto kMaxFailures = kMaxFailureRate * kRepeats;
+ auto nFailures = 0;
+ auto nRetries = 0;
+ while (nRetries++ < kRepeats) {
+ auto testResult = checkTimerAccumulatesElapsedTime();
+ // Total elapsed while attached should be GTE 120 us.
+ if (testResult < Microseconds(120)) {
+ ++nFailures;
+ }
+ // Verifies we do not compute detached time.
+ ASSERT_LT(testResult, Microseconds(200));
+ }
+ ASSERT_LTE(nFailures, kMaxFailures);
}
DEATH_TEST_F(OperationCPUTimerTest,