summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Carey <jcarey@argv.me>2019-03-15 15:33:31 -0400
committerJason Carey <jcarey@argv.me>2019-04-19 14:01:54 -0400
commit43fb7ff820793bcc94e160133cc1dacd12bec1c8 (patch)
treebf8fbc5e4671e4ec237b64e4e9e5ae6da9831de1
parentbdd204a6e1552e5a13a8b4ffb89be80e60bdca77 (diff)
downloadmongo-43fb7ff820793bcc94e160133cc1dacd12bec1c8.tar.gz
SERVER-40166 Force BG clock now >= Date_t::lastNow
Currently our fast clock is implemented by a background thread which wakes up every 10ms if callers continue to check it's time. If that background thread get's descheduled for a long time for some reason, this can cause the background threads time to drift substantially. If we: * force every read of date_t::now to update a new date_t::lastNow * change the background thread so that it's responsibility is to wake up and call Date_t::now() (if readers are still present) * change the background thread clock source now() to return date_t::lastNow We'll get a world in which we have the same target granularity + thread quiescence as before, but additionally ensure that even if the background thread can't be scheduled, that lastNow() will still be pushed loosely forward if there are callers of Date_t::now() elsewhere in the system. This should ensure a tighter bound on the spread between the precise and fast clock sources (cherry picked from commit d32ddac33c48b5e2d2885c2862d6abd88e7f26d2)
-rw-r--r--src/mongo/util/background_thread_clock_source.cpp141
-rw-r--r--src/mongo/util/background_thread_clock_source.h50
-rw-r--r--src/mongo/util/background_thread_clock_source_test.cpp107
-rw-r--r--src/mongo/util/time_support.cpp21
-rw-r--r--src/mongo/util/time_support.h28
-rw-r--r--src/mongo/util/time_support_test.cpp11
6 files changed, 230 insertions, 128 deletions
diff --git a/src/mongo/util/background_thread_clock_source.cpp b/src/mongo/util/background_thread_clock_source.cpp
index 4233b016477..c39f534747e 100644
--- a/src/mongo/util/background_thread_clock_source.cpp
+++ b/src/mongo/util/background_thread_clock_source.cpp
@@ -47,9 +47,9 @@ namespace mongo {
BackgroundThreadClockSource::BackgroundThreadClockSource(std::unique_ptr<ClockSource> clockSource,
Milliseconds granularity)
- : _clockSource(std::move(clockSource)), _granularity(granularity) {
+ : _clockSource(std::move(clockSource)), _state(kTimerPaused), _granularity(granularity) {
_startTimerThread();
- _tracksSystemClock = _clockSource->tracksSystemClock();
+ _tracksSystemClock = true;
}
BackgroundThreadClockSource::~BackgroundThreadClockSource() {
@@ -67,40 +67,82 @@ Milliseconds BackgroundThreadClockSource::getPrecision() {
}
Status BackgroundThreadClockSource::setAlarm(Date_t when, stdx::function<void()> action) {
- return _clockSource->setAlarm(when, action);
+ MONGO_UNREACHABLE;
}
Date_t BackgroundThreadClockSource::now() {
// Since this is called very frequently by many threads, the common case should not write to
// shared memory.
- if (MONGO_unlikely(_timerWillPause.load())) {
- return _slowNow();
+ //
+ // If we read ReaderHasRead, we have at least the last time from a previous reader, or the
+ // background thread.
+ if (MONGO_unlikely(_state.load() != kReaderHasRead)) { // acquire
+ _updateClockAndWakeTimerIfNeeded();
}
- auto now = _current.load();
- if (MONGO_unlikely(!now)) {
- return _slowNow();
- }
- return Date_t::fromMillisSinceEpoch(now);
+
+ return Date_t::lastNow();
+}
+
+void BackgroundThreadClockSource::_updateClock() {
+ // We capture the lastUpdate time now to ensure that we sleep for the right target granularity,
+ // even if it takes a while for the background thread to wake up.
+ _lastUpdate = _clockSource->now();
+
+ // Updates Date_t::lastNow by calling Date_t::now()
+ Date_t::now();
}
// This will be called at most once per _granularity per thread. In common cases it will only be
// called by a single thread per _granularity.
-Date_t BackgroundThreadClockSource::_slowNow() {
- _timerWillPause.store(false);
- auto now = _current.load();
- if (!now) {
- stdx::lock_guard<stdx::mutex> lock(_mutex);
- // Reload and check after locking since someone else may have done this for us.
- now = _current.load();
- if (!now) {
- // Wake up timer but have it pause if no else calls now() for the next _granularity.
- _condition.notify_one();
- _timerWillPause.store(true);
-
- now = _updateCurrent_inlock();
- }
+void BackgroundThreadClockSource::_updateClockAndWakeTimerIfNeeded() {
+ // Try to go from TimerWillPause to ReaderHasRead.
+ if (_state.compareAndSwap(kTimerWillPause, kReaderHasRead) != kTimerPaused) {
+ // There are three possible states _state could have been in before this cas:
+ //
+ // kTimerWillPause - In this case, we've transitioned to kReaderHasRead, telling the timer
+ // it still has recent readers and should continue to loop. In that case,
+ // we have no more to do, return.
+ //
+ // kReaderHasRead - Another thread had already performed the kTimerWillPause ->
+ // kReaderHasRead transition, we have no work to do, return.
+ //
+ // kTimerPaused - The timer was paused, so we have to wake it up. Don't return and attempt
+ // to wake the timer thread.
+ //
+ // For the first two cases, we can be sure we've acquired an up to date notion of time (from
+ // the timer thread or a reader that has woken calling updateClock()), from either
+ // succeeding or failing the cas above.
+ return;
+ }
+
+ // We may be in timer paused
+ stdx::lock_guard<stdx::mutex> lock(_mutex);
+ // See if we still observe paused, after taking a lock. This prevents multiple threads from
+ // racing to update the clock.
+ if (_state.load() != kTimerPaused) {
+ // If not, someone else has taken care of it
+ return;
}
- return Date_t::fromMillisSinceEpoch(now);
+
+ // If we were still in pause, there are a couple of tasks we have to do:
+ //
+ // 1. update the clock
+ // 2. store kReaderHasRead
+ // 3. wake the background thread
+ //
+ // It's important that we do them in that order, so that the background thread sleeps
+ // exactly granularity from now, and so that readers that observe kReaderHasRead pick up the
+ // updated time. Failing to keep that order may cause them to observe what may be a very
+ // stale read (if the background timer was a sleep for an extended period).
+ _updateClock();
+ _state.store(kReaderHasRead); // release
+
+ _condition.notify_one();
+}
+
+size_t BackgroundThreadClockSource::timesPausedForTest() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ return _timesPaused;
}
void BackgroundThreadClockSource::_startTimerThread() {
@@ -113,19 +155,42 @@ void BackgroundThreadClockSource::_startTimerThread() {
_condition.notify_one();
while (!_inShutdown) {
- if (!_timerWillPause.swap(true)) {
- _updateCurrent_inlock();
- } else {
+ // update the clock every pass
+ _updateClock();
+
+ // Always transition to will pause on every run.
+ auto old = _state.swap(kTimerWillPause); // release
+
+ // There are 3 possible states _state could have been in:
+ //
+ // kTimerWillPause - We slept until the next tick without a reader. We should pause
+ //
+ // kReaderHasRead - A reader has read since our last sleep. We should sleep again
+ //
+ // kTimerPaused - We were asleep and spuriously woke or we just started (we start in
+ // kTimerPaused)
+ //
+ // If we do pause our wake up will indicate:
+ // 1. That we've had a reader and it's time to tick again
+ // 2. That we're in shutdown, where we'll early return from the next condvar wait
+ // 3. Experiencing a spurious wake, which may make us tick an extra time, but no more
+ // than once per granularity
+
+ if (old != kReaderHasRead) {
// Stop running if nothing has read the time since we last updated the time.
- _current.store(0);
+ _state.store(kTimerPaused);
+
+ _timesPaused++;
+
+ // We don't care about spurious wake ups here, at worst we'll update the clock an
+ // extra time.
MONGO_IDLE_THREAD_BLOCK;
- _condition.wait(lock, [this] { return _inShutdown || _current.load() != 0; });
+ _condition.wait(lock);
}
- const auto sleepUntil = Date_t::fromMillisSinceEpoch(_current.load()) + _granularity;
MONGO_IDLE_THREAD_BLOCK;
_clockSource->waitForConditionUntil(
- _condition, lock, sleepUntil, [this] { return _inShutdown; });
+ _condition, lock, _lastUpdate + _granularity, [this] { return _inShutdown; });
}
});
@@ -136,16 +201,4 @@ void BackgroundThreadClockSource::_startTimerThread() {
_condition.wait(lock, [this] { return _started; });
}
-int64_t BackgroundThreadClockSource::_updateCurrent_inlock() {
- auto now = _clockSource->now().toMillisSinceEpoch();
- if (!now) {
- // We use 0 to indicate that the thread isn't running.
- severe() << "ClockSource " << demangleName(typeid(*_clockSource)) << " reported time 0."
- << " Is it 1970?";
- fassertFailed(40399);
- }
- _current.store(now);
- return now;
-}
-
} // namespace mongo
diff --git a/src/mongo/util/background_thread_clock_source.h b/src/mongo/util/background_thread_clock_source.h
index a251ba4f2ce..eea2711ef12 100644
--- a/src/mongo/util/background_thread_clock_source.h
+++ b/src/mongo/util/background_thread_clock_source.h
@@ -45,10 +45,12 @@
namespace mongo {
/**
- * A clock source that uses a periodic timer to build a low-resolution, fast-to-read clock.
- * Essentially uses a background thread that repeatedly sleeps for X amount of milliseconds
- * and wakes up to store the current time. If nothing reads the time for a whole granularity, the
- * thread will sleep until it is needed again.
+ * A clock source that reads the Date_t lastNow time and uses a background thread to ensure
+ * Date_t::now is called at least every X amount of milliseconds. If nothing reads the time for a
+ * whole granularity, the thread will sleep until it is needed again.
+ *
+ * Its now() returns Date_t::lastNow(), the passed in clock source is only used to control the
+ * sleeps for the background thread.
*/
class BackgroundThreadClockSource final : public ClockSource {
MONGO_DISALLOW_COPYING(BackgroundThreadClockSource);
@@ -60,23 +62,35 @@ public:
Date_t now() override;
Status setAlarm(Date_t when, stdx::function<void()> action) override;
- /**
- * Doesn't count as a call to now() for determining whether this ClockSource is idle.
- *
- * Unlike now(), returns Date_t() if the thread is currently paused.
- */
- Date_t peekNowForTest() const {
- return Date_t::fromMillisSinceEpoch(_current.load());
- }
+ size_t timesPausedForTest();
private:
- Date_t _slowNow();
+ void _updateClockAndWakeTimerIfNeeded();
+ void _updateClock();
void _startTimerThread();
- int64_t _updateCurrent_inlock();
const std::unique_ptr<ClockSource> _clockSource;
- AtomicInt64 _current{0}; // 0 if _timer is paused due to idleness.
- AtomicBool _timerWillPause{true}; // If true when _timer wakes up, it will pause.
+
+ // Expected transitions:
+ //
+ // Starting the clock source
+ // _ -> kTimerPaused
+ //
+ // Timer thread has woken from its timed sleep
+ // kTimerPaused
+ // kReaderRead -> kTimerWillPause
+ //
+ // Reader reads a time and the timer isn't paused
+ // kTimerWillPause -> kReaderHasRead
+ //
+ // Reader wakes up the timer thread
+ // kTimerPaused -> kReaderHasRead
+ enum States : uint8_t {
+ kReaderHasRead,
+ kTimerWillPause,
+ kTimerPaused,
+ };
+ AtomicWord<uint8_t> _state;
const Milliseconds _granularity;
@@ -84,6 +98,10 @@ private:
stdx::condition_variable _condition;
bool _inShutdown = false;
bool _started = false;
+ Date_t _lastUpdate;
+
+ // This is used exclusively for tests, to verify when we've actually gone to sleep
+ size_t _timesPaused = 0;
stdx::thread _timer;
};
diff --git a/src/mongo/util/background_thread_clock_source_test.cpp b/src/mongo/util/background_thread_clock_source_test.cpp
index cf1eb7dd1ac..b8a859f6ab5 100644
--- a/src/mongo/util/background_thread_clock_source_test.cpp
+++ b/src/mongo/util/background_thread_clock_source_test.cpp
@@ -35,6 +35,7 @@
#include "mongo/util/background_thread_clock_source.h"
#include "mongo/util/clock_source.h"
#include "mongo/util/clock_source_mock.h"
+#include "mongo/util/system_clock_source.h"
#include "mongo/util/time_support.h"
namespace {
@@ -44,29 +45,40 @@ using namespace mongo;
class BTCSTest : public mongo::unittest::Test {
public:
void setUpClocks(Milliseconds granularity) {
- auto csMock = stdx::make_unique<ClockSourceMock>();
+ auto csMock = std::make_unique<ClockSourceMock>();
_csMock = csMock.get();
- _csMock->advance(granularity); // Make sure the mock doesn't return time 0.
_btcs = stdx::make_unique<BackgroundThreadClockSource>(std::move(csMock), granularity);
}
+ void setUpRealClocks(Milliseconds granularity) {
+ _btcs = stdx::make_unique<BackgroundThreadClockSource>(
+ std::make_unique<SystemClockSource>(), granularity);
+ }
+
void tearDown() override {
_btcs.reset();
}
protected:
- void waitForIdleDetection() {
- auto start = _csMock->now();
- while (_btcs->peekNowForTest() != Date_t()) {
- // If the bg thread doesn't notice idleness within a minute, something is wrong.
- ASSERT_LT(_csMock->now() - start, Minutes(1));
+ size_t waitForIdleDetection() {
+ auto lastTimesPaused = _lastTimesPaused;
+ while (lastTimesPaused == checkTimesPaused()) {
_csMock->advance(Milliseconds(1));
sleepFor(Milliseconds(1));
}
+
+ return _lastTimesPaused;
+ }
+
+ size_t checkTimesPaused() {
+ _lastTimesPaused = _btcs->timesPausedForTest();
+
+ return _lastTimesPaused;
}
- std::unique_ptr<BackgroundThreadClockSource> _btcs;
ClockSourceMock* _csMock;
+ std::unique_ptr<BackgroundThreadClockSource> _btcs;
+ size_t _lastTimesPaused = 0;
};
TEST_F(BTCSTest, CreateAndTerminate) {
@@ -77,13 +89,22 @@ TEST_F(BTCSTest, CreateAndTerminate) {
_btcs.reset();
}
-TEST_F(BTCSTest, TimeKeeping) {
+TEST_F(BTCSTest, PausesAfterRead) {
setUpClocks(Milliseconds(1));
- ASSERT_EQUALS(_btcs->now(), _csMock->now());
+ auto preCount = waitForIdleDetection();
+ _btcs->now();
+ auto after = waitForIdleDetection();
+ ASSERT_LT(preCount, after);
+}
- waitForIdleDetection();
+TEST_F(BTCSTest, NowWorks) {
+ setUpRealClocks(Milliseconds(1));
- ASSERT_EQUALS(_btcs->now(), _csMock->now());
+ const auto then = _btcs->now();
+ sleepFor(Milliseconds(100));
+ const auto now = _btcs->now();
+ ASSERT_GT(now, then);
+ ASSERT_LTE(now, _btcs->now());
}
TEST_F(BTCSTest, GetPrecision) {
@@ -93,70 +114,22 @@ TEST_F(BTCSTest, GetPrecision) {
TEST_F(BTCSTest, StartsPaused) {
setUpClocks(Milliseconds(1));
- ASSERT_EQUALS(_btcs->peekNowForTest(), Date_t());
-}
-
-TEST_F(BTCSTest, PausesAfterRead) {
- const auto kGranularity = Milliseconds(5);
- setUpClocks(kGranularity);
-
- // Wake it up.
- const auto now = _btcs->now();
- ASSERT_NE(now, Date_t());
- ASSERT_EQ(_btcs->peekNowForTest(), now);
- _csMock->advance(kGranularity - Milliseconds(1));
- ASSERT_EQ(_btcs->now(), now);
-
- waitForIdleDetection(); // Only returns when the thread is paused.
+ ASSERT_EQUALS(checkTimesPaused(), 1ull);
}
TEST_F(BTCSTest, DoesntPauseWhenInUse) {
- const auto kGranularity = Milliseconds(5);
+ const auto kGranularity = Milliseconds(3);
setUpClocks(kGranularity);
- auto lastTime = _btcs->now();
- ASSERT_NE(lastTime, Date_t());
- ASSERT_EQ(lastTime, _btcs->now()); // Mark the timer as still in use.
- auto ticks = 0; // Count of when times change.
- while (ticks < 10) {
- if (_btcs->peekNowForTest() == lastTime) {
- _csMock->advance(Milliseconds(1));
- ASSERT_LT(_csMock->now() - lastTime, Minutes(1));
- sleepFor(Milliseconds(1));
- continue;
- }
- ticks++;
+ _btcs->now();
+ auto count = waitForIdleDetection();
- ASSERT_NE(_btcs->peekNowForTest(), Date_t());
- lastTime = _btcs->now();
- ASSERT_NE(lastTime, Date_t());
- ASSERT_EQ(lastTime, _btcs->peekNowForTest());
- }
-}
-
-TEST_F(BTCSTest, WakesAfterPause) {
- const auto kGranularity = Milliseconds(5);
- setUpClocks(kGranularity);
-
- // Wake it up.
- const auto now = _btcs->now();
- ASSERT_NE(now, Date_t());
- ASSERT_EQ(_btcs->peekNowForTest(), now);
- _csMock->advance(kGranularity - Milliseconds(1));
- ASSERT_EQ(_btcs->now(), now);
-
- waitForIdleDetection();
-
- // Wake it up again and ensure it ticks at least once.
- const auto lastTime = _btcs->now();
- ASSERT_NE(lastTime, Date_t());
- ASSERT_EQ(lastTime, _btcs->now()); // Mark the timer as still in use.
- while (_btcs->peekNowForTest() == lastTime) {
+ for (int i = 0; i < 100; ++i) {
+ ASSERT_EQ(count, checkTimesPaused());
+ _btcs->now();
_csMock->advance(Milliseconds(1));
- ASSERT_LT(_csMock->now() - lastTime, Minutes(1));
sleepFor(Milliseconds(1));
}
- ASSERT_NE(_btcs->peekNowForTest(), Date_t());
}
} // namespace
diff --git a/src/mongo/util/time_support.cpp b/src/mongo/util/time_support.cpp
index 73cc2374aaf..03cd691a923 100644
--- a/src/mongo/util/time_support.cpp
+++ b/src/mongo/util/time_support.cpp
@@ -65,8 +65,27 @@ extern "C" time_t timegm(struct tm* const tmp);
namespace mongo {
+AtomicWord<long long> Date_t::lastNowVal;
+
Date_t Date_t::now() {
- return fromMillisSinceEpoch(curTimeMillis64());
+ int64_t curTime = curTimeMillis64();
+ int64_t oldLastNow = lastNowVal.loadRelaxed();
+
+ // If curTime is different than old last now, unconditionally try to cas it to the new value.
+ // This is an optimization to avoid performing stores for multiple clock reads in the same
+ // millisecond.
+ //
+ // It's important that this is a non-equality (rather than a >), so that we avoid stalling time
+ // if someone moves the system clock backwards.
+ if (curTime != oldLastNow) {
+ // If we fail to comp exchange, it means someone else concurrently called Date_t::now(), in
+ // which case it's likely their time is also recent. It's important that we don't loop so
+ // that we avoid forcing time backwards if we have multiple callers at a millisecond
+ // boundary.
+ lastNowVal.compareAndSwap(oldLastNow, curTime);
+ }
+
+ return fromMillisSinceEpoch(curTime);
}
Date_t::Date_t(stdx::chrono::system_clock::time_point tp)
diff --git a/src/mongo/util/time_support.h b/src/mongo/util/time_support.h
index 457ff516469..10f552b03fd 100644
--- a/src/mongo/util/time_support.h
+++ b/src/mongo/util/time_support.h
@@ -39,12 +39,15 @@
#include <string>
#include "mongo/base/status_with.h"
+#include "mongo/platform/atomic_word.h"
#include "mongo/stdx/chrono.h"
#include "mongo/stdx/mutex.h"
#include "mongo/util/duration.h"
namespace mongo {
+class BackgroundThreadClockSource;
+
template <typename Allocator>
class StringBuilderImpl;
@@ -232,10 +235,35 @@ public:
return out;
}
+ /**
+ * Only exposed for testing. See lastNow()
+ */
+ static Date_t lastNowForTest() {
+ return lastNow();
+ }
+
private:
constexpr explicit Date_t(long long m) : millis(m) {}
long long millis = 0;
+
+ friend class BackgroundThreadClockSource;
+
+ /**
+ * Returns the last time fetched from Date_t::now()
+ *
+ * Note that this is a private semi-implementation detail for BackgroundThreadClockSource. Use
+ * svc->getFastClockSource()->now() over calling this method.
+ *
+ * If you think you have another use for it, please reconsider, or at least consider very
+ * carefully as correctly using it is complicated.
+ */
+ static Date_t lastNow() {
+ return fromMillisSinceEpoch(lastNowVal.loadRelaxed());
+ }
+
+ // Holds the last value returned from now()
+ static AtomicWord<long long> lastNowVal;
};
// uses ISO 8601 dates without trailing Z
diff --git a/src/mongo/util/time_support_test.cpp b/src/mongo/util/time_support_test.cpp
index b827da75867..c186e914dc5 100644
--- a/src/mongo/util/time_support_test.cpp
+++ b/src/mongo/util/time_support_test.cpp
@@ -893,5 +893,16 @@ TEST(DateTArithmetic, SubtractionOverflowThrows) {
ErrorCodes::DurationOverflow);
ASSERT_THROWS_CODE(Date_t::max() - Milliseconds(-1), DBException, ErrorCodes::DurationOverflow);
}
+
+TEST(BasicNow, NowUpdatesLastNow) {
+ const auto then = Date_t::now();
+ ASSERT_EQ(then, Date_t::lastNowForTest());
+ sleepFor(Milliseconds(100));
+ ASSERT_EQ(then, Date_t::lastNowForTest());
+ const auto now = Date_t::now();
+ ASSERT_EQ(now, Date_t::lastNowForTest());
+ ASSERT_GT(now, then);
+}
+
} // namespace
} // namespace mongo