diff options
author | Jason Carey <jcarey@argv.me> | 2019-03-15 15:33:31 -0400 |
---|---|---|
committer | Jason Carey <jcarey@argv.me> | 2019-04-19 14:01:54 -0400 |
commit | 43fb7ff820793bcc94e160133cc1dacd12bec1c8 (patch) | |
tree | bf8fbc5e4671e4ec237b64e4e9e5ae6da9831de1 | |
parent | bdd204a6e1552e5a13a8b4ffb89be80e60bdca77 (diff) | |
download | mongo-43fb7ff820793bcc94e160133cc1dacd12bec1c8.tar.gz |
SERVER-40166 Force BG clock now >= Date_t::lastNow
Currently our fast clock is implemented by a background thread which
wakes up every 10ms if callers continue to check it's time. If that
background thread get's descheduled for a long time for some reason,
this can cause the background threads time to drift substantially.
If we:
* force every read of date_t::now to update a new date_t::lastNow
* change the background thread so that it's responsibility is to wake up
and call Date_t::now() (if readers are still present)
* change the background thread clock source now() to return
date_t::lastNow
We'll get a world in which we have the same target granularity + thread
quiescence as before, but additionally ensure that even if the
background thread can't be scheduled, that lastNow() will still be
pushed loosely forward if there are callers of Date_t::now() elsewhere
in the system. This should ensure a tighter bound on the spread between
the precise and fast clock sources
(cherry picked from commit d32ddac33c48b5e2d2885c2862d6abd88e7f26d2)
-rw-r--r-- | src/mongo/util/background_thread_clock_source.cpp | 141 | ||||
-rw-r--r-- | src/mongo/util/background_thread_clock_source.h | 50 | ||||
-rw-r--r-- | src/mongo/util/background_thread_clock_source_test.cpp | 107 | ||||
-rw-r--r-- | src/mongo/util/time_support.cpp | 21 | ||||
-rw-r--r-- | src/mongo/util/time_support.h | 28 | ||||
-rw-r--r-- | src/mongo/util/time_support_test.cpp | 11 |
6 files changed, 230 insertions, 128 deletions
diff --git a/src/mongo/util/background_thread_clock_source.cpp b/src/mongo/util/background_thread_clock_source.cpp index 4233b016477..c39f534747e 100644 --- a/src/mongo/util/background_thread_clock_source.cpp +++ b/src/mongo/util/background_thread_clock_source.cpp @@ -47,9 +47,9 @@ namespace mongo { BackgroundThreadClockSource::BackgroundThreadClockSource(std::unique_ptr<ClockSource> clockSource, Milliseconds granularity) - : _clockSource(std::move(clockSource)), _granularity(granularity) { + : _clockSource(std::move(clockSource)), _state(kTimerPaused), _granularity(granularity) { _startTimerThread(); - _tracksSystemClock = _clockSource->tracksSystemClock(); + _tracksSystemClock = true; } BackgroundThreadClockSource::~BackgroundThreadClockSource() { @@ -67,40 +67,82 @@ Milliseconds BackgroundThreadClockSource::getPrecision() { } Status BackgroundThreadClockSource::setAlarm(Date_t when, stdx::function<void()> action) { - return _clockSource->setAlarm(when, action); + MONGO_UNREACHABLE; } Date_t BackgroundThreadClockSource::now() { // Since this is called very frequently by many threads, the common case should not write to // shared memory. - if (MONGO_unlikely(_timerWillPause.load())) { - return _slowNow(); + // + // If we read ReaderHasRead, we have at least the last time from a previous reader, or the + // background thread. + if (MONGO_unlikely(_state.load() != kReaderHasRead)) { // acquire + _updateClockAndWakeTimerIfNeeded(); } - auto now = _current.load(); - if (MONGO_unlikely(!now)) { - return _slowNow(); - } - return Date_t::fromMillisSinceEpoch(now); + + return Date_t::lastNow(); +} + +void BackgroundThreadClockSource::_updateClock() { + // We capture the lastUpdate time now to ensure that we sleep for the right target granularity, + // even if it takes a while for the background thread to wake up. + _lastUpdate = _clockSource->now(); + + // Updates Date_t::lastNow by calling Date_t::now() + Date_t::now(); } // This will be called at most once per _granularity per thread. In common cases it will only be // called by a single thread per _granularity. -Date_t BackgroundThreadClockSource::_slowNow() { - _timerWillPause.store(false); - auto now = _current.load(); - if (!now) { - stdx::lock_guard<stdx::mutex> lock(_mutex); - // Reload and check after locking since someone else may have done this for us. - now = _current.load(); - if (!now) { - // Wake up timer but have it pause if no else calls now() for the next _granularity. - _condition.notify_one(); - _timerWillPause.store(true); - - now = _updateCurrent_inlock(); - } +void BackgroundThreadClockSource::_updateClockAndWakeTimerIfNeeded() { + // Try to go from TimerWillPause to ReaderHasRead. + if (_state.compareAndSwap(kTimerWillPause, kReaderHasRead) != kTimerPaused) { + // There are three possible states _state could have been in before this cas: + // + // kTimerWillPause - In this case, we've transitioned to kReaderHasRead, telling the timer + // it still has recent readers and should continue to loop. In that case, + // we have no more to do, return. + // + // kReaderHasRead - Another thread had already performed the kTimerWillPause -> + // kReaderHasRead transition, we have no work to do, return. + // + // kTimerPaused - The timer was paused, so we have to wake it up. Don't return and attempt + // to wake the timer thread. + // + // For the first two cases, we can be sure we've acquired an up to date notion of time (from + // the timer thread or a reader that has woken calling updateClock()), from either + // succeeding or failing the cas above. + return; + } + + // We may be in timer paused + stdx::lock_guard<stdx::mutex> lock(_mutex); + // See if we still observe paused, after taking a lock. This prevents multiple threads from + // racing to update the clock. + if (_state.load() != kTimerPaused) { + // If not, someone else has taken care of it + return; } - return Date_t::fromMillisSinceEpoch(now); + + // If we were still in pause, there are a couple of tasks we have to do: + // + // 1. update the clock + // 2. store kReaderHasRead + // 3. wake the background thread + // + // It's important that we do them in that order, so that the background thread sleeps + // exactly granularity from now, and so that readers that observe kReaderHasRead pick up the + // updated time. Failing to keep that order may cause them to observe what may be a very + // stale read (if the background timer was a sleep for an extended period). + _updateClock(); + _state.store(kReaderHasRead); // release + + _condition.notify_one(); +} + +size_t BackgroundThreadClockSource::timesPausedForTest() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + return _timesPaused; } void BackgroundThreadClockSource::_startTimerThread() { @@ -113,19 +155,42 @@ void BackgroundThreadClockSource::_startTimerThread() { _condition.notify_one(); while (!_inShutdown) { - if (!_timerWillPause.swap(true)) { - _updateCurrent_inlock(); - } else { + // update the clock every pass + _updateClock(); + + // Always transition to will pause on every run. + auto old = _state.swap(kTimerWillPause); // release + + // There are 3 possible states _state could have been in: + // + // kTimerWillPause - We slept until the next tick without a reader. We should pause + // + // kReaderHasRead - A reader has read since our last sleep. We should sleep again + // + // kTimerPaused - We were asleep and spuriously woke or we just started (we start in + // kTimerPaused) + // + // If we do pause our wake up will indicate: + // 1. That we've had a reader and it's time to tick again + // 2. That we're in shutdown, where we'll early return from the next condvar wait + // 3. Experiencing a spurious wake, which may make us tick an extra time, but no more + // than once per granularity + + if (old != kReaderHasRead) { // Stop running if nothing has read the time since we last updated the time. - _current.store(0); + _state.store(kTimerPaused); + + _timesPaused++; + + // We don't care about spurious wake ups here, at worst we'll update the clock an + // extra time. MONGO_IDLE_THREAD_BLOCK; - _condition.wait(lock, [this] { return _inShutdown || _current.load() != 0; }); + _condition.wait(lock); } - const auto sleepUntil = Date_t::fromMillisSinceEpoch(_current.load()) + _granularity; MONGO_IDLE_THREAD_BLOCK; _clockSource->waitForConditionUntil( - _condition, lock, sleepUntil, [this] { return _inShutdown; }); + _condition, lock, _lastUpdate + _granularity, [this] { return _inShutdown; }); } }); @@ -136,16 +201,4 @@ void BackgroundThreadClockSource::_startTimerThread() { _condition.wait(lock, [this] { return _started; }); } -int64_t BackgroundThreadClockSource::_updateCurrent_inlock() { - auto now = _clockSource->now().toMillisSinceEpoch(); - if (!now) { - // We use 0 to indicate that the thread isn't running. - severe() << "ClockSource " << demangleName(typeid(*_clockSource)) << " reported time 0." - << " Is it 1970?"; - fassertFailed(40399); - } - _current.store(now); - return now; -} - } // namespace mongo diff --git a/src/mongo/util/background_thread_clock_source.h b/src/mongo/util/background_thread_clock_source.h index a251ba4f2ce..eea2711ef12 100644 --- a/src/mongo/util/background_thread_clock_source.h +++ b/src/mongo/util/background_thread_clock_source.h @@ -45,10 +45,12 @@ namespace mongo { /** - * A clock source that uses a periodic timer to build a low-resolution, fast-to-read clock. - * Essentially uses a background thread that repeatedly sleeps for X amount of milliseconds - * and wakes up to store the current time. If nothing reads the time for a whole granularity, the - * thread will sleep until it is needed again. + * A clock source that reads the Date_t lastNow time and uses a background thread to ensure + * Date_t::now is called at least every X amount of milliseconds. If nothing reads the time for a + * whole granularity, the thread will sleep until it is needed again. + * + * Its now() returns Date_t::lastNow(), the passed in clock source is only used to control the + * sleeps for the background thread. */ class BackgroundThreadClockSource final : public ClockSource { MONGO_DISALLOW_COPYING(BackgroundThreadClockSource); @@ -60,23 +62,35 @@ public: Date_t now() override; Status setAlarm(Date_t when, stdx::function<void()> action) override; - /** - * Doesn't count as a call to now() for determining whether this ClockSource is idle. - * - * Unlike now(), returns Date_t() if the thread is currently paused. - */ - Date_t peekNowForTest() const { - return Date_t::fromMillisSinceEpoch(_current.load()); - } + size_t timesPausedForTest(); private: - Date_t _slowNow(); + void _updateClockAndWakeTimerIfNeeded(); + void _updateClock(); void _startTimerThread(); - int64_t _updateCurrent_inlock(); const std::unique_ptr<ClockSource> _clockSource; - AtomicInt64 _current{0}; // 0 if _timer is paused due to idleness. - AtomicBool _timerWillPause{true}; // If true when _timer wakes up, it will pause. + + // Expected transitions: + // + // Starting the clock source + // _ -> kTimerPaused + // + // Timer thread has woken from its timed sleep + // kTimerPaused + // kReaderRead -> kTimerWillPause + // + // Reader reads a time and the timer isn't paused + // kTimerWillPause -> kReaderHasRead + // + // Reader wakes up the timer thread + // kTimerPaused -> kReaderHasRead + enum States : uint8_t { + kReaderHasRead, + kTimerWillPause, + kTimerPaused, + }; + AtomicWord<uint8_t> _state; const Milliseconds _granularity; @@ -84,6 +98,10 @@ private: stdx::condition_variable _condition; bool _inShutdown = false; bool _started = false; + Date_t _lastUpdate; + + // This is used exclusively for tests, to verify when we've actually gone to sleep + size_t _timesPaused = 0; stdx::thread _timer; }; diff --git a/src/mongo/util/background_thread_clock_source_test.cpp b/src/mongo/util/background_thread_clock_source_test.cpp index cf1eb7dd1ac..b8a859f6ab5 100644 --- a/src/mongo/util/background_thread_clock_source_test.cpp +++ b/src/mongo/util/background_thread_clock_source_test.cpp @@ -35,6 +35,7 @@ #include "mongo/util/background_thread_clock_source.h" #include "mongo/util/clock_source.h" #include "mongo/util/clock_source_mock.h" +#include "mongo/util/system_clock_source.h" #include "mongo/util/time_support.h" namespace { @@ -44,29 +45,40 @@ using namespace mongo; class BTCSTest : public mongo::unittest::Test { public: void setUpClocks(Milliseconds granularity) { - auto csMock = stdx::make_unique<ClockSourceMock>(); + auto csMock = std::make_unique<ClockSourceMock>(); _csMock = csMock.get(); - _csMock->advance(granularity); // Make sure the mock doesn't return time 0. _btcs = stdx::make_unique<BackgroundThreadClockSource>(std::move(csMock), granularity); } + void setUpRealClocks(Milliseconds granularity) { + _btcs = stdx::make_unique<BackgroundThreadClockSource>( + std::make_unique<SystemClockSource>(), granularity); + } + void tearDown() override { _btcs.reset(); } protected: - void waitForIdleDetection() { - auto start = _csMock->now(); - while (_btcs->peekNowForTest() != Date_t()) { - // If the bg thread doesn't notice idleness within a minute, something is wrong. - ASSERT_LT(_csMock->now() - start, Minutes(1)); + size_t waitForIdleDetection() { + auto lastTimesPaused = _lastTimesPaused; + while (lastTimesPaused == checkTimesPaused()) { _csMock->advance(Milliseconds(1)); sleepFor(Milliseconds(1)); } + + return _lastTimesPaused; + } + + size_t checkTimesPaused() { + _lastTimesPaused = _btcs->timesPausedForTest(); + + return _lastTimesPaused; } - std::unique_ptr<BackgroundThreadClockSource> _btcs; ClockSourceMock* _csMock; + std::unique_ptr<BackgroundThreadClockSource> _btcs; + size_t _lastTimesPaused = 0; }; TEST_F(BTCSTest, CreateAndTerminate) { @@ -77,13 +89,22 @@ TEST_F(BTCSTest, CreateAndTerminate) { _btcs.reset(); } -TEST_F(BTCSTest, TimeKeeping) { +TEST_F(BTCSTest, PausesAfterRead) { setUpClocks(Milliseconds(1)); - ASSERT_EQUALS(_btcs->now(), _csMock->now()); + auto preCount = waitForIdleDetection(); + _btcs->now(); + auto after = waitForIdleDetection(); + ASSERT_LT(preCount, after); +} - waitForIdleDetection(); +TEST_F(BTCSTest, NowWorks) { + setUpRealClocks(Milliseconds(1)); - ASSERT_EQUALS(_btcs->now(), _csMock->now()); + const auto then = _btcs->now(); + sleepFor(Milliseconds(100)); + const auto now = _btcs->now(); + ASSERT_GT(now, then); + ASSERT_LTE(now, _btcs->now()); } TEST_F(BTCSTest, GetPrecision) { @@ -93,70 +114,22 @@ TEST_F(BTCSTest, GetPrecision) { TEST_F(BTCSTest, StartsPaused) { setUpClocks(Milliseconds(1)); - ASSERT_EQUALS(_btcs->peekNowForTest(), Date_t()); -} - -TEST_F(BTCSTest, PausesAfterRead) { - const auto kGranularity = Milliseconds(5); - setUpClocks(kGranularity); - - // Wake it up. - const auto now = _btcs->now(); - ASSERT_NE(now, Date_t()); - ASSERT_EQ(_btcs->peekNowForTest(), now); - _csMock->advance(kGranularity - Milliseconds(1)); - ASSERT_EQ(_btcs->now(), now); - - waitForIdleDetection(); // Only returns when the thread is paused. + ASSERT_EQUALS(checkTimesPaused(), 1ull); } TEST_F(BTCSTest, DoesntPauseWhenInUse) { - const auto kGranularity = Milliseconds(5); + const auto kGranularity = Milliseconds(3); setUpClocks(kGranularity); - auto lastTime = _btcs->now(); - ASSERT_NE(lastTime, Date_t()); - ASSERT_EQ(lastTime, _btcs->now()); // Mark the timer as still in use. - auto ticks = 0; // Count of when times change. - while (ticks < 10) { - if (_btcs->peekNowForTest() == lastTime) { - _csMock->advance(Milliseconds(1)); - ASSERT_LT(_csMock->now() - lastTime, Minutes(1)); - sleepFor(Milliseconds(1)); - continue; - } - ticks++; + _btcs->now(); + auto count = waitForIdleDetection(); - ASSERT_NE(_btcs->peekNowForTest(), Date_t()); - lastTime = _btcs->now(); - ASSERT_NE(lastTime, Date_t()); - ASSERT_EQ(lastTime, _btcs->peekNowForTest()); - } -} - -TEST_F(BTCSTest, WakesAfterPause) { - const auto kGranularity = Milliseconds(5); - setUpClocks(kGranularity); - - // Wake it up. - const auto now = _btcs->now(); - ASSERT_NE(now, Date_t()); - ASSERT_EQ(_btcs->peekNowForTest(), now); - _csMock->advance(kGranularity - Milliseconds(1)); - ASSERT_EQ(_btcs->now(), now); - - waitForIdleDetection(); - - // Wake it up again and ensure it ticks at least once. - const auto lastTime = _btcs->now(); - ASSERT_NE(lastTime, Date_t()); - ASSERT_EQ(lastTime, _btcs->now()); // Mark the timer as still in use. - while (_btcs->peekNowForTest() == lastTime) { + for (int i = 0; i < 100; ++i) { + ASSERT_EQ(count, checkTimesPaused()); + _btcs->now(); _csMock->advance(Milliseconds(1)); - ASSERT_LT(_csMock->now() - lastTime, Minutes(1)); sleepFor(Milliseconds(1)); } - ASSERT_NE(_btcs->peekNowForTest(), Date_t()); } } // namespace diff --git a/src/mongo/util/time_support.cpp b/src/mongo/util/time_support.cpp index 73cc2374aaf..03cd691a923 100644 --- a/src/mongo/util/time_support.cpp +++ b/src/mongo/util/time_support.cpp @@ -65,8 +65,27 @@ extern "C" time_t timegm(struct tm* const tmp); namespace mongo { +AtomicWord<long long> Date_t::lastNowVal; + Date_t Date_t::now() { - return fromMillisSinceEpoch(curTimeMillis64()); + int64_t curTime = curTimeMillis64(); + int64_t oldLastNow = lastNowVal.loadRelaxed(); + + // If curTime is different than old last now, unconditionally try to cas it to the new value. + // This is an optimization to avoid performing stores for multiple clock reads in the same + // millisecond. + // + // It's important that this is a non-equality (rather than a >), so that we avoid stalling time + // if someone moves the system clock backwards. + if (curTime != oldLastNow) { + // If we fail to comp exchange, it means someone else concurrently called Date_t::now(), in + // which case it's likely their time is also recent. It's important that we don't loop so + // that we avoid forcing time backwards if we have multiple callers at a millisecond + // boundary. + lastNowVal.compareAndSwap(oldLastNow, curTime); + } + + return fromMillisSinceEpoch(curTime); } Date_t::Date_t(stdx::chrono::system_clock::time_point tp) diff --git a/src/mongo/util/time_support.h b/src/mongo/util/time_support.h index 457ff516469..10f552b03fd 100644 --- a/src/mongo/util/time_support.h +++ b/src/mongo/util/time_support.h @@ -39,12 +39,15 @@ #include <string> #include "mongo/base/status_with.h" +#include "mongo/platform/atomic_word.h" #include "mongo/stdx/chrono.h" #include "mongo/stdx/mutex.h" #include "mongo/util/duration.h" namespace mongo { +class BackgroundThreadClockSource; + template <typename Allocator> class StringBuilderImpl; @@ -232,10 +235,35 @@ public: return out; } + /** + * Only exposed for testing. See lastNow() + */ + static Date_t lastNowForTest() { + return lastNow(); + } + private: constexpr explicit Date_t(long long m) : millis(m) {} long long millis = 0; + + friend class BackgroundThreadClockSource; + + /** + * Returns the last time fetched from Date_t::now() + * + * Note that this is a private semi-implementation detail for BackgroundThreadClockSource. Use + * svc->getFastClockSource()->now() over calling this method. + * + * If you think you have another use for it, please reconsider, or at least consider very + * carefully as correctly using it is complicated. + */ + static Date_t lastNow() { + return fromMillisSinceEpoch(lastNowVal.loadRelaxed()); + } + + // Holds the last value returned from now() + static AtomicWord<long long> lastNowVal; }; // uses ISO 8601 dates without trailing Z diff --git a/src/mongo/util/time_support_test.cpp b/src/mongo/util/time_support_test.cpp index b827da75867..c186e914dc5 100644 --- a/src/mongo/util/time_support_test.cpp +++ b/src/mongo/util/time_support_test.cpp @@ -893,5 +893,16 @@ TEST(DateTArithmetic, SubtractionOverflowThrows) { ErrorCodes::DurationOverflow); ASSERT_THROWS_CODE(Date_t::max() - Milliseconds(-1), DBException, ErrorCodes::DurationOverflow); } + +TEST(BasicNow, NowUpdatesLastNow) { + const auto then = Date_t::now(); + ASSERT_EQ(then, Date_t::lastNowForTest()); + sleepFor(Milliseconds(100)); + ASSERT_EQ(then, Date_t::lastNowForTest()); + const auto now = Date_t::now(); + ASSERT_EQ(now, Date_t::lastNowForTest()); + ASSERT_GT(now, then); +} + } // namespace } // namespace mongo |