diff options
Diffstat (limited to 'src/mongo')
-rw-r--r-- | src/mongo/SConscript | 2 | ||||
-rw-r--r-- | src/mongo/client/replica_set_monitor_manager.h | 2 | ||||
-rw-r--r-- | src/mongo/db/db.cpp | 3 | ||||
-rw-r--r-- | src/mongo/db/session_killer.h | 2 | ||||
-rw-r--r-- | src/mongo/embedded/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/executor/thread_pool_task_executor.h | 5 | ||||
-rw-r--r-- | src/mongo/platform/mutex.cpp | 65 | ||||
-rw-r--r-- | src/mongo/platform/mutex.h | 249 | ||||
-rw-r--r-- | src/mongo/platform/mutex_test.cpp | 2 | ||||
-rw-r--r-- | src/mongo/platform/source_location.h | 13 | ||||
-rw-r--r-- | src/mongo/s/server.cpp | 3 | ||||
-rw-r--r-- | src/mongo/util/SConscript | 4 | ||||
-rw-r--r-- | src/mongo/util/background.cpp | 4 | ||||
-rw-r--r-- | src/mongo/util/diagnostic_info.cpp | 12 | ||||
-rw-r--r-- | src/mongo/util/latch_analyzer.cpp | 352 | ||||
-rw-r--r-- | src/mongo/util/latch_analyzer.h | 69 | ||||
-rw-r--r-- | src/mongo/util/latch_analyzer_test.cpp | 51 |
17 files changed, 653 insertions, 186 deletions
diff --git a/src/mongo/SConscript b/src/mongo/SConscript index c114999ae51..fd7123f49e4 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -463,6 +463,7 @@ mongod = env.Program( 'util/clock_sources', 'util/elapsed_tracker', 'util/fail_point', + 'util/latch_analyzer', 'util/net/network', 'util/ntservice', 'util/options_parser/options_parser_init', @@ -571,6 +572,7 @@ mongos = env.Program( 'transport/transport_layer_manager', 'util/clock_sources', 'util/fail_point', + 'util/latch_analyzer', 'util/net/ssl_options_server' if get_option('ssl') == 'on' else '', 'util/ntservice', 'util/version_impl', diff --git a/src/mongo/client/replica_set_monitor_manager.h b/src/mongo/client/replica_set_monitor_manager.h index c464551145f..49c3e2e7f12 100644 --- a/src/mongo/client/replica_set_monitor_manager.h +++ b/src/mongo/client/replica_set_monitor_manager.h @@ -106,7 +106,7 @@ private: // Protects access to the replica set monitors mutable Mutex _mutex = - MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "ReplicaSetMonitorManager::_mutex"); + MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(4), "ReplicaSetMonitorManager::_mutex"); // Executor for monitoring replica sets. std::unique_ptr<executor::TaskExecutor> _taskExecutor; diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index fb02bce6ed4..75e3321a280 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -167,6 +167,7 @@ #include "mongo/util/exit.h" #include "mongo/util/fail_point.h" #include "mongo/util/fast_clock_source_factory.h" +#include "mongo/util/latch_analyzer.h" #include "mongo/util/log.h" #include "mongo/util/net/socket_utils.h" #include "mongo/util/net/ssl_manager.h" @@ -1050,6 +1051,8 @@ void shutdownTask(const ShutdownTaskArgs& shutdownArgs) { log(LogComponent::kControl) << "now exiting"; audit::logShutdown(client); + + LatchAnalyzer::get(serviceContext).dump(); } int mongoDbMain(int argc, char* argv[], char** envp) { diff --git a/src/mongo/db/session_killer.h b/src/mongo/db/session_killer.h index 8b5c14bd2ba..45ca8b27a86 100644 --- a/src/mongo/db/session_killer.h +++ b/src/mongo/db/session_killer.h @@ -132,7 +132,7 @@ private: stdx::thread _thread; - Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "SessionKiller::_mutex"); + Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(2), "SessionKiller::_mutex"); stdx::condition_variable _callerCV; stdx::condition_variable _killerCV; diff --git a/src/mongo/embedded/SConscript b/src/mongo/embedded/SConscript index c5a9ed4da18..dad3c6865f9 100644 --- a/src/mongo/embedded/SConscript +++ b/src/mongo/embedded/SConscript @@ -122,6 +122,7 @@ env.Library( '$BUILD_DIR/mongo/db/storage/storage_options', '$BUILD_DIR/mongo/db/wire_version', '$BUILD_DIR/mongo/rpc/client_metadata', + '$BUILD_DIR/mongo/util/latch_analyzer', '$BUILD_DIR/mongo/util/options_parser/options_parser', '$BUILD_DIR/mongo/util/version_impl', ] diff --git a/src/mongo/executor/thread_pool_task_executor.h b/src/mongo/executor/thread_pool_task_executor.h index 1ac9c385a98..1bcfe806a39 100644 --- a/src/mongo/executor/thread_pool_task_executor.h +++ b/src/mongo/executor/thread_pool_task_executor.h @@ -189,8 +189,9 @@ private: std::shared_ptr<ThreadPoolInterface> _pool; // Mutex guarding all remaining fields. - mutable Mutex _mutex = - MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(1), "ThreadPoolTaskExecutor::_mutex"); + mutable Mutex _mutex = MONGO_MAKE_LATCH( + // This is sadly held for a subset of task execution HierarchicalAcquisitionLevel(1), + "ThreadPoolTaskExecutor::_mutex"); // Queue containing all items currently scheduled into the thread pool but not yet completed. WorkQueue _poolInProgressQueue; diff --git a/src/mongo/platform/mutex.cpp b/src/mongo/platform/mutex.cpp index f284697be07..131103f561d 100644 --- a/src/mongo/platform/mutex.cpp +++ b/src/mongo/platform/mutex.cpp @@ -31,24 +31,33 @@ namespace mongo { +Mutex::Mutex(std::shared_ptr<latch_detail::Data> data) : _data{std::move(data)} { + invariant(_data); + + _data->counts().created.fetchAndAdd(1); +} + Mutex::~Mutex() { invariant(!_isLocked); + + _data->counts().destroyed.fetchAndAdd(1); } void Mutex::lock() { if (_mutex.try_lock()) { _isLocked = true; - _onQuickLock(_id); + _onQuickLock(); return; } - _onContendedLock(_id); + _onContendedLock(); _mutex.lock(); _isLocked = true; - _onSlowLock(_id); + _onSlowLock(); } + void Mutex::unlock() { - _onUnlock(_id); + _onUnlock(); _isLocked = false; _mutex.unlock(); } @@ -58,41 +67,53 @@ bool Mutex::try_lock() { } _isLocked = true; - _onQuickLock(_id); + _onQuickLock(); return true; } +StringData Mutex::getName() const { + return StringData(_data->identity().name()); +} + void Mutex::addLockListener(LockListener* listener) { auto& state = _getListenerState(); - state.list.push_back(listener); + state.list.add(listener); } -void Mutex::_onContendedLock(const Identity& id) noexcept { - auto& state = _getListenerState(); - for (auto listener : state.list) { - listener->onContendedLock(id); +void Mutex::_onContendedLock() noexcept { + _data->counts().contended.fetchAndAdd(1); + + auto it = _getListenerState().list.iter(); + while (auto listener = it.next()) { + listener->onContendedLock(_data->identity()); } } -void Mutex::_onQuickLock(const Identity& id) noexcept { - auto& state = _getListenerState(); - for (auto listener : state.list) { - listener->onQuickLock(id); +void Mutex::_onQuickLock() noexcept { + _data->counts().acquired.fetchAndAdd(1); + + auto it = _getListenerState().list.iter(); + while (auto listener = it.next()) { + listener->onQuickLock(_data->identity()); } } -void Mutex::_onSlowLock(const Identity& id) noexcept { - auto& state = _getListenerState(); - for (auto listener : state.list) { - listener->onSlowLock(id); +void Mutex::_onSlowLock() noexcept { + _data->counts().acquired.fetchAndAdd(1); + + auto it = _getListenerState().list.iter(); + while (auto listener = it.next()) { + listener->onSlowLock(_data->identity()); } } -void Mutex::_onUnlock(const Identity& id) noexcept { - auto& state = _getListenerState(); - for (auto listener : state.list) { - listener->onUnlock(id); +void Mutex::_onUnlock() noexcept { + _data->counts().released.fetchAndAdd(1); + + auto it = _getListenerState().list.iter(); + while (auto listener = it.next()) { + listener->onUnlock(_data->identity()); } } diff --git a/src/mongo/platform/mutex.h b/src/mongo/platform/mutex.h index 10705d5386c..7776f94adea 100644 --- a/src/mongo/platform/mutex.h +++ b/src/mongo/platform/mutex.h @@ -37,11 +37,183 @@ #include "mongo/platform/source_location.h" #include "mongo/stdx/mutex.h" #include "mongo/util/assert_util.h" +#include "mongo/util/decorable.h" #include "mongo/util/duration.h" #include "mongo/util/hierarchical_acquisition.h" +#include "mongo/util/registry_list.h" namespace mongo { +class Mutex; + +namespace latch_detail { + +using Level = hierarchical_acquisition_detail::Level; + +static constexpr auto kAnonymousName = "AnonymousLatch"_sd; + +/** + * An Identity encapsulates the context around a latch + */ +class Identity { +public: + Identity() : Identity(boost::none, kAnonymousName) {} + + explicit Identity(StringData name) : Identity(boost::none, name) {} + + Identity(boost::optional<Level> level, StringData name) + : _index(_nextIndex()), _level(level), _name(name.toString()) {} + + /** + * Since SouceLocations usually come from macros, this function is a setter that allows + * a SourceLocation to be paired with __VA_ARGS__ construction. + */ + Identity& setSourceLocation(const SourceLocationHolder& sourceLocation) { + invariant(!_sourceLocation); + _sourceLocation = sourceLocation; + return *this; + } + + /** + * Return an optional that may contain the SourceLocation for this latch + */ + const boost::optional<SourceLocationHolder>& sourceLocation() const { + return _sourceLocation; + } + + /** + * Return an optional that may contain the HierarchicalAcquisitionLevel for this latch + */ + const boost::optional<Level>& level() const { + return _level; + } + + /** + * Return the name for this latch + * + * If there was no name provided on construction, this will be latch_detail::kAnonymousName. + */ + StringData name() const { + return _name; + } + + /** + * Return the index for this latch + * + * Latch indexes are assigned as Identity objects are created. Any given ordering is only valid + * for a single process lifetime. + */ + size_t index() const { + return _index; + } + +private: + static int64_t _nextIndex() { + static auto nextLatchIndex = AtomicWord<int64_t>(0); + return nextLatchIndex.fetchAndAdd(1); + } + + int64_t _index; + boost::optional<Level> _level; + std::string _name; + + boost::optional<SourceLocationHolder> _sourceLocation; +}; + +/** + * This class holds working data for a latchable resource + * + * All member data is either i) synchronized or ii) constant. + */ +class Data { +public: + explicit Data(Identity identity) : _identity(std::move(identity)) {} + + auto& counts() { + return _counts; + } + + const auto& counts() const { + return _counts; + } + + const auto& identity() const { + return _identity; + } + +private: + const Identity _identity; + + struct Counts { + AtomicWord<int> created{0}; + AtomicWord<int> destroyed{0}; + + AtomicWord<int> contended{0}; + AtomicWord<int> acquired{0}; + AtomicWord<int> released{0}; + }; + + Counts _counts; +}; + +/** + * latch_details::Catalog holds a collection of Data objects for use with Mutexes + * + * All rules for LockFreeCollection apply: + * - Synchronization is provided internally + * - All entries are expected to exist for the lifetime of the Catalog + */ +class Catalog final : public WeakPtrRegistryList<Data> { +public: + static auto& get() { + static Catalog gCatalog; + return gCatalog; + } +}; + +/** + * Simple registration object that construct with an Identity and provides access to a Data + * + * This object actually owns the Data object to make lifetime management simpler. + */ +class Registration { +public: + explicit Registration(Identity identity) + : _data(std::make_shared<Data>(std::move(identity))), _index{Catalog::get().add(_data)} {} + + const auto& data() { + return _data; + } + +private: + std::shared_ptr<Data> _data; + size_t _index; +}; + +/** + * Get a Data object (Identity, Counts) for a unique type Tag (which can be a noop lambda) + * + * When used with a macro (or converted to have a c++20-style <typename Tag = decltype([]{})>), this + * function provides a unique Data object per invocation context. This function also sets the + * Identity identity to contain sourceLocation. This is explicitly intended to work with + * preprocessor macros that generate SourceLocation objects and unique Tags. + */ +template <typename Tag> +auto getOrMakeLatchData(Tag&&, Identity identity, const SourceLocationHolder& sourceLocation) { + static auto reg = Registration( // + std::move(identity) // + .setSourceLocation(sourceLocation)); + return reg.data(); +} + +/** + * Provide a very generic Data object for use with default-constructed Mutexes + */ +inline auto defaultData() { + return getOrMakeLatchData([] {}, Identity(kAnonymousName), MONGO_SOURCE_LOCATION()); +} +} // namespace latch_detail + class Latch { public: virtual ~Latch() = default; @@ -51,54 +223,30 @@ public: virtual bool try_lock() = 0; virtual StringData getName() const { - return "AnonymousLatch"_sd; + return latch_detail::kAnonymousName; } }; +/** + * Mutex is a Lockable type that wraps a stdx::mutex + * + * This class is intended to be used wherever a stdx::mutex would previously be used. It provides + * a generic event-listener interface for instrumenting around lock()/unlock()/try_lock(). + */ class Mutex : public Latch { - class LockNotifier; - public: class LockListener; - static constexpr auto kAnonymousMutexStr = "AnonymousMutex"_sd; - void lock() override; void unlock() override; bool try_lock() override; - StringData getName() const override { - return StringData(_id.name); - } - - struct Identity { - Identity(StringData name = kAnonymousMutexStr) : Identity(boost::none, boost::none, name) {} - - Identity(SourceLocationHolder sourceLocation, StringData name = kAnonymousMutexStr) - : Identity(boost::none, sourceLocation, name) {} + StringData getName() const override; - Identity(hierarchical_acquisition_detail::Level level, StringData name = kAnonymousMutexStr) - : Identity(level, boost::none, name) {} - - Identity(boost::optional<hierarchical_acquisition_detail::Level> level, - boost::optional<SourceLocationHolder> sourceLocation, - StringData name = kAnonymousMutexStr) - : level(level), sourceLocation(sourceLocation), name(name.toString()) {} - - boost::optional<hierarchical_acquisition_detail::Level> level; - boost::optional<SourceLocationHolder> sourceLocation; - std::string name; - }; - - Mutex() : Mutex(Identity()) {} - - Mutex(const Identity& id) : _id(id) {} + Mutex() : Mutex(latch_detail::defaultData()) {} + explicit Mutex(std::shared_ptr<latch_detail::Data> data); ~Mutex(); - struct LatchSetState { - hierarchical_acquisition_detail::Set levelsHeld; - }; - /** * This function adds a LockListener subclass to the triggers for certain actions. * @@ -111,25 +259,22 @@ public: private: static auto& _getListenerState() noexcept { struct State { - std::vector<LockListener*> list; + RegistryList<LockListener*> list; }; - // Note that state should no longer be mutated after init-time (ala MONGO_INITIALIZERS). If - // this changes, than this state needs to be synchronized. static State state; return state; } - static void _onContendedLock(const Identity& id) noexcept; - static void _onQuickLock(const Identity& id) noexcept; - static void _onSlowLock(const Identity& id) noexcept; - static void _onUnlock(const Identity& id) noexcept; - - bool _isLocked = false; + void _onContendedLock() noexcept; + void _onQuickLock() noexcept; + void _onSlowLock() noexcept; + void _onUnlock() noexcept; - const Identity _id; + const std::shared_ptr<latch_detail::Data> _data; stdx::mutex _mutex; // NOLINT + bool _isLocked = false; }; /** @@ -139,6 +284,8 @@ class Mutex::LockListener { friend class Mutex; public: + using Identity = latch_detail::Identity; + virtual ~LockListener() = default; /** @@ -165,9 +312,13 @@ public: } // namespace mongo /** - * Define a mongo::Mutex with all arguments passed through to the ctor + * Construct and register a latch_detail::Data object exactly once per call site */ -#define MONGO_MAKE_LATCH(...) \ - mongo::Mutex { \ - mongo::Mutex::Identity(__VA_ARGS__) \ - } +#define MONGO_GET_LATCH_DATA(...) \ + ::mongo::latch_detail::getOrMakeLatchData( \ + [] {}, ::mongo::latch_detail::Identity(__VA_ARGS__), MONGO_SOURCE_LOCATION_NO_FUNC()) + +/** + * Construct a mongo::Mutex using the result of MONGO_GET_LATCH_DATA with all arguments forwarded + */ +#define MONGO_MAKE_LATCH(...) ::mongo::Mutex(MONGO_GET_LATCH_DATA(__VA_ARGS__)); diff --git a/src/mongo/platform/mutex_test.cpp b/src/mongo/platform/mutex_test.cpp index 739734c0e46..e85f8290466 100644 --- a/src/mongo/platform/mutex_test.cpp +++ b/src/mongo/platform/mutex_test.cpp @@ -63,6 +63,6 @@ TEST(MutexTest, Macros) { // Verify the global named latch static_assert(std::is_same_v<decltype(gMutex), Mutex>); - ASSERT_EQ(gMutex.getName(), Mutex::kAnonymousMutexStr); + ASSERT_EQ(gMutex.getName(), latch_detail::kAnonymousName); } } // namespace mongo diff --git a/src/mongo/platform/source_location.h b/src/mongo/platform/source_location.h index 272029635cd..3d9830ec97b 100644 --- a/src/mongo/platform/source_location.h +++ b/src/mongo/platform/source_location.h @@ -129,14 +129,14 @@ private: // MSVC does not have any of N4810 yet. (see // https://developercommunity.visualstudio.com/idea/354069/implement-c-library-fundamentals-ts-v2.html) -#define MONGO_SOURCE_LOCATION() SourceLocation(__LINE__, 0ul, __FILE__, __func__) -#define MONGO_SOURCE_LOCATION_NO_FUNC() SourceLocation(__LINE__, 0ul, __FILE__, "") +#define MONGO_SOURCE_LOCATION() ::mongo::SourceLocation(__LINE__, 0ul, __FILE__, __func__) +#define MONGO_SOURCE_LOCATION_NO_FUNC() ::mongo::SourceLocation(__LINE__, 0ul, __FILE__, "") #elif defined(__clang__) // windows -> clang // Clang got __builtin_FILE et al as of 8.0.1 (see https://reviews.llvm.org/D37035) -#define MONGO_SOURCE_LOCATION() SourceLocation(__LINE__, 0ul, __FILE__, __func__) -#define MONGO_SOURCE_LOCATION_NO_FUNC() SourceLocation(__LINE__, 0ul, __FILE__, "") +#define MONGO_SOURCE_LOCATION() ::mongo::SourceLocation(__LINE__, 0ul, __FILE__, __func__) +#define MONGO_SOURCE_LOCATION_NO_FUNC() ::mongo::SourceLocation(__LINE__, 0ul, __FILE__, "") #elif defined(__GNUG__) // clang -> gcc @@ -145,9 +145,10 @@ constexpr auto toSourceLocation(std::experimental::source_location loc) { return SourceLocation(loc.line(), loc.column(), loc.file_name(), loc.function_name()); } -#define MONGO_SOURCE_LOCATION() toSourceLocation(std::experimental::source_location::current()) +#define MONGO_SOURCE_LOCATION() \ + ::mongo::toSourceLocation(std::experimental::source_location::current()) #define MONGO_SOURCE_LOCATION_NO_FUNC() \ - toSourceLocation(std::experimental::source_location::current()) + ::mongo::toSourceLocation(std::experimental::source_location::current()) #else // gcc -> ? diff --git a/src/mongo/s/server.cpp b/src/mongo/s/server.cpp index 1870814d80e..020393769ba 100644 --- a/src/mongo/s/server.cpp +++ b/src/mongo/s/server.cpp @@ -102,6 +102,7 @@ #include "mongo/util/exception_filter_win32.h" #include "mongo/util/exit.h" #include "mongo/util/fast_clock_source_factory.h" +#include "mongo/util/latch_analyzer.h" #include "mongo/util/log.h" #include "mongo/util/net/socket_exception.h" #include "mongo/util/net/socket_utils.h" @@ -343,6 +344,8 @@ void cleanupTask(ServiceContext* serviceContext) { } audit::logShutdown(Client::getCurrent()); + + LatchAnalyzer::get(serviceContext).dump(); } Status initializeSharding(OperationContext* opCtx) { diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript index 06ae54e98cc..02bc09e44ff 100644 --- a/src/mongo/util/SConscript +++ b/src/mongo/util/SConscript @@ -291,8 +291,12 @@ env.Library( ], LIBDEPS=[ '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/db/commands/test_commands_enabled', '$BUILD_DIR/mongo/db/service_context', ], + LIBDEPS_PRIVATE=[ + '$BUILD_DIR/mongo/db/commands/server_status', + ], ) env.Benchmark( diff --git a/src/mongo/util/background.cpp b/src/mongo/util/background.cpp index 9dff4675fc7..6951bf86681 100644 --- a/src/mongo/util/background.cpp +++ b/src/mongo/util/background.cpp @@ -81,7 +81,9 @@ private: void _runTask(PeriodicTask* task); // _mutex protects the _shutdownRequested flag and the _tasks vector. - Mutex _mutex = MONGO_MAKE_LATCH(HierarchicalAcquisitionLevel(0), "PeriodicTaskRunner::_mutex"); + Mutex _mutex = MONGO_MAKE_LATCH( + // This mutex is held around task execution HierarchicalAcquisitionLevel(0), + "PeriodicTaskRunner::_mutex"); // The condition variable is used to sleep for the interval between task // executions, and is notified when the _shutdownRequested flag is toggled. diff --git a/src/mongo/util/diagnostic_info.cpp b/src/mongo/util/diagnostic_info.cpp index edf19a279df..c323dbfa521 100644 --- a/src/mongo/util/diagnostic_info.cpp +++ b/src/mongo/util/diagnostic_info.cpp @@ -176,24 +176,24 @@ const auto getDiagnosticInfoHandle = Client::declareDecoration<DiagnosticInfoHan MONGO_INITIALIZER(LockListener)(InitializerContext* context) { class LockListener : public Mutex::LockListener { - void onContendedLock(const Mutex::Identity& id) override { + void onContendedLock(const Identity& id) override { if (auto client = Client::getCurrent()) { auto& handle = getDiagnosticInfoHandle(client); stdx::lock_guard<stdx::mutex> lk(handle.mutex); - handle.list.emplace_front(DiagnosticInfo::capture(id.name)); + handle.list.emplace_front(DiagnosticInfo::capture(id.name())); if (currentOpSpawnsThreadWaitingForLatch.shouldFail() && - (id.name == kBlockedOpMutexName)) { + (id.name() == kBlockedOpMutexName)) { gBlockedOp.setIsContended(true); } } } - void onQuickLock(const Mutex::Identity&) override { + void onQuickLock(const Identity&) override { // Do nothing } - void onSlowLock(const Mutex::Identity& id) override { + void onSlowLock(const Identity& id) override { if (auto client = Client::getCurrent()) { auto& handle = getDiagnosticInfoHandle(client); stdx::lock_guard<stdx::mutex> lk(handle.mutex); @@ -203,7 +203,7 @@ MONGO_INITIALIZER(LockListener)(InitializerContext* context) { } } - void onUnlock(const Mutex::Identity&) override { + void onUnlock(const Identity&) override { // Do nothing } }; diff --git a/src/mongo/util/latch_analyzer.cpp b/src/mongo/util/latch_analyzer.cpp index 112cf79dbfb..318090b8bc8 100644 --- a/src/mongo/util/latch_analyzer.cpp +++ b/src/mongo/util/latch_analyzer.cpp @@ -1,5 +1,5 @@ /** - * Copyright (C) 2018-present MongoDB, Inc. + * Copyright (C) 2019-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, @@ -27,99 +27,333 @@ * it in the license file. */ +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault + #include "mongo/platform/basic.h" +#include "mongo/util/latch_analyzer.h" + +#include <fmt/format.h> + +#include "mongo/util/hierarchical_acquisition.h" + +#include "mongo/base/init.h" #include "mongo/db/client.h" +#include "mongo/db/commands/test_commands_enabled.h" +#include "mongo/db/service_context.h" #include "mongo/platform/mutex.h" +#include "mongo/util/fail_point.h" #include "mongo/util/latch_analyzer.h" +#include "mongo/util/log.h" namespace mongo { -using Set = HierarchicalAcquisitionSet; +namespace { + +auto kLatchAnalysisName = "latchAnalysis"_sd; +auto kLatchViolationKey = "hierarchicalAcquisitionLevelViolations"_sd; -LatchAnalyzer& LatchAnalyzer::get() { - static LatchAnalyzer gLatchAnalyzer; - return gLatchAnalyzer; +// LatchAnalyzer Decoration getter +const auto getLatchAnalyzer = ServiceContext::declareDecoration<LatchAnalyzer>(); + +/** + * LockListener sub-class to implement updating set in LatchSetState + */ +class LockListener : public Mutex::LockListener { +public: + void onContendedLock(const Identity& id) override { + if (auto client = Client::getCurrent()) { + LatchAnalyzer::get(client).onContention(id); + } + } + + void onQuickLock(const Identity& id) override { + if (auto client = Client::getCurrent()) { + LatchAnalyzer::get(client).onAcquire(id); + } + } + + void onSlowLock(const Identity& id) override { + if (auto client = Client::getCurrent()) { + LatchAnalyzer::get(client).onAcquire(id); + } + } + + void onUnlock(const Identity& id) override { + if (auto client = Client::getCurrent()) { + LatchAnalyzer::get(client).onRelease(id); + } + } +}; + +// Register our LockListener with the Mutex class +MONGO_INITIALIZER(LatchAnalysis)(InitializerContext* context) { + + // Intentionally leaked, people use Latches in detached threads + static auto& listener = *new LockListener; + Mutex::addLockListener(&listener); + + return Status::OK(); } -LatchAnalyzerDisabledBlock::LatchAnalyzerDisabledBlock() { - LatchAnalyzer::get().setEnabled(false); +// Create a FailPoint to analyze latches more seriously for diagnostic purposes. This can be used +// with a new set of test suites to verify our lock hierarchies. +MONGO_FAIL_POINT_DEFINE(enableLatchAnalysis); + +bool shouldAnalyzeLatches() { + return enableLatchAnalysis.shouldFail(); } -LatchAnalyzerDisabledBlock::~LatchAnalyzerDisabledBlock() { - LatchAnalyzer::get().setEnabled(true); + +// Define a new serverStatus section "latchAnalysis" +class LatchAnalysisSection final : public ServerStatusSection { +public: + LatchAnalysisSection() : ServerStatusSection(kLatchAnalysisName.toString()) {} + + bool includeByDefault() const override { + return false; + } + + BSONObj generateSection(OperationContext* opCtx, const BSONElement&) const override { + BSONObjBuilder analysis; + LatchAnalyzer::get(opCtx->getClient()).appendToBSON(analysis); + return analysis.obj(); + }; +} gLatchAnalysisSection; + +// Latching state object to pin onto the Client (i.e. thread) +struct LatchSetState { + HierarchicalAcquisitionSet levelsHeld; + stdx::unordered_set<const latch_detail::Identity*> latchesHeld; +}; + +const auto getLatchSetState = Client::declareDecoration<LatchSetState>(); + +} // namespace + +void LatchAnalyzer::setAllowExitOnViolation(bool allowExitOnViolation) { + _allowExitOnViolation.store(allowExitOnViolation); } -namespace { +bool LatchAnalyzer::allowExitOnViolation() { + return _allowExitOnViolation.load() && (getTestCommandsEnabled()); +} -const auto getLatchSetState = Client::declareDecoration<Mutex::LatchSetState>(); +LatchAnalyzer& LatchAnalyzer::get(ServiceContext* serviceContext) { + return getLatchAnalyzer(serviceContext); +} -/** - * LockListener sub-class to implement updating set in LatchSetState - */ -class MutexLockListener : public Mutex::LockListener { +LatchAnalyzer& LatchAnalyzer::get(Client* client) { + return get(client->getServiceContext()); +} -public: - void onContendedLock(const Mutex::Identity&) { - // Do nothing +LatchAnalyzer& LatchAnalyzer::get() { + auto serviceContext = getCurrentServiceContext(); + invariant(serviceContext); + return get(serviceContext); +} + +void LatchAnalyzer::onContention(const latch_detail::Identity&) { + // Nothing at the moment +} + +void LatchAnalyzer::onAcquire(const latch_detail::Identity& identity) { + auto client = Client::getCurrent(); + if (!client) { + return; } - void onQuickLock(const Mutex::Identity& id) { - onAcquire(id); + if (shouldAnalyzeLatches()) { + // If we should analyze latches, annotate the Client state + auto& latchSet = getLatchSetState(client).latchesHeld; + + stdx::lock_guard lk(_mutex); + for (auto otherIdentity : latchSet) { + auto& stat = _hierarchies[identity.index()][otherIdentity->index()]; + stat.identity = otherIdentity; + ++stat.acquiredAfter; + } + + latchSet.insert(&identity); } - void onSlowLock(const Mutex::Identity& id) { - onAcquire(id); + if (!identity.level()) { + // If we weren't given a HierarchicalAcquisitionLevel, don't verify hierarchies + return; } - void onUnlock(const Mutex::Identity& id) { - onRelease(id); + auto level = *identity.level(); + auto& handle = getLatchSetState(client); + auto result = handle.levelsHeld.add(level); + if (result != HierarchicalAcquisitionSet::AddResult::kValidWasAbsent) { + using namespace fmt::literals; + + auto errorMessage = + "Theoretical deadlock alert - {} latch acquisition at {}:{:d} on latch {}"_format( + toString(result), + identity.sourceLocation()->file_name(), + identity.sourceLocation()->line(), + identity.name()); + + if (allowExitOnViolation()) { + fassert(31360, Status(ErrorCodes::HierarchicalAcquisitionLevelViolation, errorMessage)); + } else { + warning() << errorMessage; + + { + stdx::lock_guard lk(_mutex); + + auto& violation = _violations[identity.index()]; + ++violation.onAcquire; + } + } + } +} + +void LatchAnalyzer::onRelease(const latch_detail::Identity& identity) { + auto client = Client::getCurrent(); + if (!client) { + return; } -private: - void onAcquire(const Mutex::Identity& id) { - if (!id.level) { - return; + if (shouldAnalyzeLatches()) { + // If we should analyze latches, annotate the Client state + auto& latchSet = getLatchSetState(client).latchesHeld; + latchSet.erase(&identity); + + stdx::lock_guard lk(_mutex); + for (auto otherIdentity : latchSet) { + auto& stat = _hierarchies[identity.index()][otherIdentity->index()]; + stat.identity = otherIdentity; + ++stat.releasedBefore; } - if (auto client = Client::getCurrent()) { - auto& handle = getLatchSetState(client); - auto result = handle.levelsHeld.add(id.level.get()); - if (result != Set::AddResult::kValidWasAbsent && LatchAnalyzer::get().isEnabled()) { - // TODO: SERVER-44570 Create a non process-fatal variant of invariant() - fassert(31360, - Status(ErrorCodes::HierarchicalAcquisitionLevelViolation, - str::stream() - << "Theoretical deadlock alert - " << toString(result) - << " latch acquisition at " << id.sourceLocation->toString() - << " on " << id.name)); + } + + if (!identity.level()) { + // If we weren't given a HierarchicalAcquisitionLevel, don't verify hierarchies + return; + } + + auto level = *identity.level(); + auto& handle = getLatchSetState(client); + auto result = handle.levelsHeld.remove(level); + if (result != HierarchicalAcquisitionSet::RemoveResult::kValidWasPresent) { + using namespace fmt::literals; + + auto errorMessage = + "Theoretical deadlock alert - {} latch release at {}:{} on latch {}"_format( + toString(result), + identity.sourceLocation()->file_name(), + identity.sourceLocation()->line(), + identity.name()); + + if (allowExitOnViolation()) { + fassert(31361, Status(ErrorCodes::HierarchicalAcquisitionLevelViolation, errorMessage)); + } else { + warning() << errorMessage; + + { + stdx::lock_guard lk(_mutex); + + auto& violation = _violations[identity.index()]; + ++violation.onRelease; } } } +} - void onRelease(const Mutex::Identity& id) { - if (!id.level) { - return; +void LatchAnalyzer::appendToBSON(mongo::BSONObjBuilder& result) const { + for (auto iter = latch_detail::Catalog::get().iter(); iter.more();) { + auto data = iter.next(); + if (!data) { + continue; } - if (auto client = Client::getCurrent()) { - auto& handle = getLatchSetState(client); - auto result = handle.levelsHeld.remove(id.level.get()); - if (result != Set::RemoveResult::kValidWasPresent && LatchAnalyzer::get().isEnabled()) { - // TODO: SERVER-44570 Create a non process-fatal variant of invariant() - fassert(31361, - Status(ErrorCodes::HierarchicalAcquisitionLevelViolation, - str::stream() - << "Theoretical deadlock alert - " << toString(result) - << " latch release at " << id.sourceLocation->toString() - << " on " << id.name)); + + auto& identity = data->identity(); + + BSONObjBuilder latchObj = result.subobjStart(identity.name()); + latchObj.append("created", data->counts().created.loadRelaxed()); + latchObj.append("destroyed", data->counts().destroyed.loadRelaxed()); + latchObj.append("acquired", data->counts().acquired.loadRelaxed()); + latchObj.append("released", data->counts().released.loadRelaxed()); + latchObj.append("contended", data->counts().contended.loadRelaxed()); + + auto appendViolations = [&] { + stdx::lock_guard lk(_mutex); + auto it = _violations.find(identity.index()); + if (it == _violations.end()) { + return; + } + auto& violation = it->second; + + BSONObjBuilder violationObj = latchObj.subobjStart(kLatchViolationKey); + violationObj.append("onAcquire", violation.onAcquire); + violationObj.append("onRelease", violation.onRelease); + }; + + appendViolations(); + + if (!shouldAnalyzeLatches()) { + // Only append hierarchical information if we should analyze latches + continue; + } + + stdx::lock_guard lk(_mutex); + auto it = _hierarchies.find(identity.index()); + if (it == _hierarchies.end()) { + continue; + } + + auto& latchHierarchy = it->second; + if (latchHierarchy.empty()) { + continue; + } + + { + BSONObjBuilder acquiredAfterObj = latchObj.subobjStart("acquiredAfter"); + for (auto& [_, stat] : latchHierarchy) { + auto count = stat.acquiredAfter; + if (count == 0) { + continue; + } + acquiredAfterObj.append(stat.identity->name(), count); + } + } + + { + BSONObjBuilder releasedBeforeObj = latchObj.subobjStart("releasedBefore"); + for (auto& [_, stat] : latchHierarchy) { + auto count = stat.releasedBefore; + if (count == 0) { + continue; + } + releasedBeforeObj.append(stat.identity->name(), count); } } } -}; +} -MONGO_INITIALIZER(CreateMutexLockListener)(InitializerContext* context) { - static auto& listener = *new MutexLockListener; - Mutex::addLockListener(&listener); - return Status::OK(); +void LatchAnalyzer::dump() { + if (!shouldAnalyzeLatches()) { + return; + } + + BSONObjBuilder bob(1024 * 1024); + { + BSONObjBuilder analysis = bob.subobjStart("latchAnalysis"); + appendToBSON(analysis); + } + + auto obj = bob.done(); + log().setIsTruncatable(false) << "=====LATCHES=====\n" + << obj.jsonString() << "\n===END LATCHES==="; +} + +LatchAnalyzerDisabledBlock::LatchAnalyzerDisabledBlock() { + LatchAnalyzer::get().setAllowExitOnViolation(false); +} + +LatchAnalyzerDisabledBlock::~LatchAnalyzerDisabledBlock() { + LatchAnalyzer::get().setAllowExitOnViolation(true); } -} // namespace } // namespace mongo diff --git a/src/mongo/util/latch_analyzer.h b/src/mongo/util/latch_analyzer.h index af4c9f8b626..41f41dca15f 100644 --- a/src/mongo/util/latch_analyzer.h +++ b/src/mongo/util/latch_analyzer.h @@ -1,5 +1,5 @@ /** - * Copyright (C) 2018-present MongoDB, Inc. + * Copyright (C) 2019-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, @@ -29,22 +29,71 @@ #pragma once +#include "mongo/base/simple_string_data_comparator.h" +#include "mongo/base/string_data.h" +#include "mongo/base/string_data_comparator_interface.h" +#include "mongo/db/client.h" +#include "mongo/db/commands/server_status.h" +#include "mongo/db/service_context.h" +#include "mongo/stdx/unordered_map.h" +#include "mongo/stdx/unordered_set.h" + namespace mongo { +/** + * LatchAnalyzer is a ServiceContext decoration that aggregates latch events + * + * This class is intended to provide a platform for hierarchical analysis on latches. To that end, + * onContention(), onAcquire(), and onRelease() are currently called by a Mutex::LockListener + * subclass defined in source. This class does much more work for each event when the + * enableLatchAnalysis failpoint is set to "alwaysOn". This failpoint provides a wealth of data for + * future analysis, but involves additional mutexes and mapping structures that may prove too costly + * for production usage at the least. + */ class LatchAnalyzer { - public: + static LatchAnalyzer& get(ServiceContext* serviceContext); + static LatchAnalyzer& get(Client* client); static LatchAnalyzer& get(); - void setEnabled(bool isEnabled) { - enabled.store(isEnabled); - } - bool isEnabled() { - return enabled.load(); - } + // Handler function for a failed latch acquire + void onContention(const latch_detail::Identity& id); + + // Handler function for a successful latch acquire + void onAcquire(const latch_detail::Identity& id); + + // Handler function for a latch release + void onRelease(const latch_detail::Identity& id); + + // Append the current statistics in a form appropriate for server status to a BOB + void appendToBSON(mongo::BSONObjBuilder& result) const; + + // Log the current statistics in JSON form to INFO + void dump(); + + void setAllowExitOnViolation(bool allowExitOnViolation); + bool allowExitOnViolation(); private: - AtomicWord<bool> enabled{true}; + struct HierarchyStats { + const latch_detail::Identity* identity = nullptr; + + int acquiredAfter = 0; + int releasedBefore = 0; + }; + + using SingleLatchHierarchy = stdx::unordered_map<int64_t, HierarchyStats>; + + struct HierarchicalAcquisitionLevelViolation { + int onAcquire = 0; + int onRelease = 0; + }; + + AtomicWord<bool> _allowExitOnViolation{true}; + + mutable stdx::mutex _mutex; // NOLINT + stdx::unordered_map<int64_t, SingleLatchHierarchy> _hierarchies; + stdx::unordered_map<int64_t, HierarchicalAcquisitionLevelViolation> _violations; }; class LatchAnalyzerDisabledBlock { @@ -54,4 +103,4 @@ public: ~LatchAnalyzerDisabledBlock(); }; -} // namespace mongo
\ No newline at end of file +} // namespace mongo diff --git a/src/mongo/util/latch_analyzer_test.cpp b/src/mongo/util/latch_analyzer_test.cpp index b8b519b6c90..993cf14fbe8 100644 --- a/src/mongo/util/latch_analyzer_test.cpp +++ b/src/mongo/util/latch_analyzer_test.cpp @@ -27,6 +27,7 @@ * it in the license file. */ +#include "mongo/db/commands/test_commands_enabled.h" #include "mongo/db/service_context_test_fixture.h" #include "mongo/platform/mutex.h" #include "mongo/platform/source_location.h" @@ -40,44 +41,44 @@ namespace { using Level = HierarchicalAcquisitionLevel; -class LatchAnalyzerTest : public ServiceContextTest {}; +class LatchAnalyzerTest : public ServiceContextTest { + void setUp() override { + ServiceContextTest::setUp(); + setTestCommandsEnabled(true); + } + + void tearDown() override { + setTestCommandsEnabled(false); + ServiceContextTest::tearDown(); + } +}; DEATH_TEST_F(LatchAnalyzerTest, AddInvalidWasAbsent, "Fatal assertion 31360") { - Mutex lowerLevel = MONGO_MAKE_LATCH( - Level(1), (SourceLocationHolder)MONGO_SOURCE_LOCATION(), "AddInvalidWasAbsent::lowerLevel"); - Mutex higherLevel = MONGO_MAKE_LATCH(Level(2), - (SourceLocationHolder)MONGO_SOURCE_LOCATION(), - "AddInvalidWasAbsent::higherLevel"); + Mutex lowerLevel = MONGO_MAKE_LATCH(Level(1), "AddInvalidWasAbsent::lowerLevel"); + Mutex higherLevel = MONGO_MAKE_LATCH(Level(2), "AddInvalidWasAbsent::higherLevel"); lowerLevel.lock(); higherLevel.lock(); } DEATH_TEST_F(LatchAnalyzerTest, AddInvalidWasPresent, "Fatal assertion 31360") { - Mutex m1 = MONGO_MAKE_LATCH( - Level(1), (SourceLocationHolder)MONGO_SOURCE_LOCATION(), "AddInvalidWasPresent::m1"); - Mutex m2 = MONGO_MAKE_LATCH( - Level(1), (SourceLocationHolder)MONGO_SOURCE_LOCATION(), "AddInvalidWasPresent::m2"); + Mutex m1 = MONGO_MAKE_LATCH(Level(1), "AddInvalidWasPresent::m1"); + Mutex m2 = MONGO_MAKE_LATCH(Level(1), "AddInvalidWasPresent::m2"); m1.lock(); m2.lock(); } DEATH_TEST_F(LatchAnalyzerTest, RemoveInvalidWasAbsent, "Fatal assertion 31361") { - Mutex m = MONGO_MAKE_LATCH( - Level(1), (SourceLocationHolder)MONGO_SOURCE_LOCATION(), "RemoveInvalidWasAbsent::m"); + Mutex m = MONGO_MAKE_LATCH(Level(1), "RemoveInvalidWasAbsent::m"); m.unlock(); } DEATH_TEST_F(LatchAnalyzerTest, RemoveInvalidWasPresent, "Fatal assertion 31361") { - Mutex higherLevel = MONGO_MAKE_LATCH(Level(2), - (SourceLocationHolder)MONGO_SOURCE_LOCATION(), - "RemoveInvalidWasPresent::higherLevel"); - Mutex lowerLevel = MONGO_MAKE_LATCH(Level(1), - (SourceLocationHolder)MONGO_SOURCE_LOCATION(), - "RemoveInvalidWasPresent::lowerLevel"); + Mutex higherLevel = MONGO_MAKE_LATCH(Level(2), "RemoveInvalidWasPresent::higherLevel"); + Mutex lowerLevel = MONGO_MAKE_LATCH(Level(1), "RemoveInvalidWasPresent::lowerLevel"); higherLevel.lock(); lowerLevel.lock(); @@ -85,10 +86,8 @@ DEATH_TEST_F(LatchAnalyzerTest, RemoveInvalidWasPresent, "Fatal assertion 31361" } TEST_F(LatchAnalyzerTest, AddValidWasAbsent) { - Mutex higherLevel = MONGO_MAKE_LATCH( - Level(2), (SourceLocationHolder)MONGO_SOURCE_LOCATION(), "AddValidWasAbsent::higherLevel"); - Mutex lowerLevel = MONGO_MAKE_LATCH( - Level(1), (SourceLocationHolder)MONGO_SOURCE_LOCATION(), "AddValidWasAbsent::lowerLevel"); + Mutex higherLevel = MONGO_MAKE_LATCH(Level(2), "AddValidWasAbsent::higherLevel"); + Mutex lowerLevel = MONGO_MAKE_LATCH(Level(1), "AddValidWasAbsent::lowerLevel"); higherLevel.lock(); lowerLevel.lock(); @@ -102,12 +101,8 @@ TEST_F(LatchAnalyzerTest, AddValidWasAbsent) { TEST_F(LatchAnalyzerTest, RemoveValidWasPresent) { - Mutex higherLevel = MONGO_MAKE_LATCH(Level(2), - (SourceLocationHolder)MONGO_SOURCE_LOCATION(), - "RemoveValidWasPresent::higherLevel"); - Mutex lowerLevel = MONGO_MAKE_LATCH(Level(1), - (SourceLocationHolder)MONGO_SOURCE_LOCATION(), - "RemoveValidWasPresent::lowerLevel"); + Mutex higherLevel = MONGO_MAKE_LATCH(Level(2), "RemoveValidWasPresent::higherLevel"); + Mutex lowerLevel = MONGO_MAKE_LATCH(Level(1), "RemoveValidWasPresent::lowerLevel"); { LatchAnalyzerDisabledBlock block; |