diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mongo/db/concurrency/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/concurrency/deadlock_detection_test.cpp | 139 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_manager.cpp | 145 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_manager.h | 99 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.cpp | 29 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.h | 17 | ||||
-rw-r--r-- | src/mongo/db/concurrency/locker.h | 11 | ||||
-rw-r--r-- | src/mongo/db/concurrency/locker_noop.h | 5 |
8 files changed, 20 insertions, 426 deletions
diff --git a/src/mongo/db/concurrency/SConscript b/src/mongo/db/concurrency/SConscript index 0ca5123d60b..1048ec8ef27 100644 --- a/src/mongo/db/concurrency/SConscript +++ b/src/mongo/db/concurrency/SConscript @@ -62,7 +62,6 @@ env.Benchmark( env.CppUnitTest( target='lock_manager_test', source=['d_concurrency_test.cpp', - 'deadlock_detection_test.cpp', 'fast_map_noalloc_test.cpp', 'lock_manager_test.cpp', 'lock_state_test.cpp', diff --git a/src/mongo/db/concurrency/deadlock_detection_test.cpp b/src/mongo/db/concurrency/deadlock_detection_test.cpp deleted file mode 100644 index 503f004fc09..00000000000 --- a/src/mongo/db/concurrency/deadlock_detection_test.cpp +++ /dev/null @@ -1,139 +0,0 @@ - -/** - * Copyright (C) 2018-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * <http://www.mongodb.com/licensing/server-side-public-license>. - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/concurrency/lock_manager_test_help.h" -#include "mongo/unittest/unittest.h" - -namespace mongo { - -TEST(Deadlock, NoDeadlock) { - const ResourceId resId(RESOURCE_DATABASE, std::string("A")); - - LockerForTests locker1(MODE_IS); - LockerForTests locker2(MODE_IS); - - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resId, MODE_S)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resId, MODE_S)); - - DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); - ASSERT(!wfg1.check().hasCycle()); - - DeadlockDetector wfg2(*getGlobalLockManager(), &locker2); - ASSERT(!wfg2.check().hasCycle()); -} - -TEST(Deadlock, Simple) { - const ResourceId resIdA(RESOURCE_DATABASE, std::string("A")); - const ResourceId resIdB(RESOURCE_DATABASE, std::string("B")); - - LockerForTests locker1(MODE_IX); - LockerForTests locker2(MODE_IX); - - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resIdA, MODE_X)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resIdB, MODE_X)); - - // 1 -> 2 - ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resIdB, MODE_X)); - - // 2 -> 1 - ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resIdA, MODE_X)); - - DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); - ASSERT(wfg1.check().hasCycle()); - - DeadlockDetector wfg2(*getGlobalLockManager(), &locker2); - ASSERT(wfg2.check().hasCycle()); - - // Cleanup, so that LockerImpl doesn't complain about leaked locks - locker1.unlock(resIdB); - locker2.unlock(resIdA); -} - -TEST(Deadlock, SimpleUpgrade) { - const ResourceId resId(RESOURCE_DATABASE, std::string("A")); - - LockerForTests locker1(MODE_IX); - LockerForTests locker2(MODE_IX); - - // Both acquire lock in intent mode - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resId, MODE_IX)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resId, MODE_IX)); - - // Both try to upgrade - ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resId, MODE_X)); - ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resId, MODE_X)); - - DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); - ASSERT(wfg1.check().hasCycle()); - - DeadlockDetector wfg2(*getGlobalLockManager(), &locker2); - ASSERT(wfg2.check().hasCycle()); - - // Cleanup, so that LockerImpl doesn't complain about leaked locks - locker1.unlock(resId); - locker2.unlock(resId); -} - -TEST(Deadlock, Indirect) { - const ResourceId resIdA(RESOURCE_DATABASE, std::string("A")); - const ResourceId resIdB(RESOURCE_DATABASE, std::string("B")); - - LockerForTests locker1(MODE_IX); - LockerForTests locker2(MODE_IX); - LockerForTests lockerIndirect(MODE_IX); - - ASSERT_EQUALS(LOCK_OK, locker1.lockBegin(nullptr, resIdA, MODE_X)); - ASSERT_EQUALS(LOCK_OK, locker2.lockBegin(nullptr, resIdB, MODE_X)); - - // 1 -> 2 - ASSERT_EQUALS(LOCK_WAITING, locker1.lockBegin(nullptr, resIdB, MODE_X)); - - // 2 -> 1 - ASSERT_EQUALS(LOCK_WAITING, locker2.lockBegin(nullptr, resIdA, MODE_X)); - - // 3 -> 2 - ASSERT_EQUALS(LOCK_WAITING, lockerIndirect.lockBegin(nullptr, resIdA, MODE_X)); - - DeadlockDetector wfg1(*getGlobalLockManager(), &locker1); - ASSERT(wfg1.check().hasCycle()); - - DeadlockDetector wfg2(*getGlobalLockManager(), &locker2); - ASSERT(wfg2.check().hasCycle()); - - // Indirect locker should not report the cycle since it does not participate in it - DeadlockDetector wfgIndirect(*getGlobalLockManager(), &lockerIndirect); - ASSERT(!wfgIndirect.check().hasCycle()); - - // Cleanup, so that LockerImpl doesn't complain about leaked locks - locker1.unlock(resIdB); - locker2.unlock(resIdA); -} - -} // namespace mongo diff --git a/src/mongo/db/concurrency/lock_manager.cpp b/src/mongo/db/concurrency/lock_manager.cpp index 11f2eac150c..1b8f5f644a5 100644 --- a/src/mongo/db/concurrency/lock_manager.cpp +++ b/src/mongo/db/concurrency/lock_manager.cpp @@ -972,151 +972,6 @@ LockHead* LockManager::LockBucket::findOrInsert(ResourceId resId) { } // -// DeadlockDetector -// - -DeadlockDetector::DeadlockDetector(const LockManager& lockMgr, const Locker* initialLocker) - : _lockMgr(lockMgr), _initialLockerId(initialLocker->getId()), _foundCycle(false) { - const ResourceId resId = initialLocker->getWaitingResource(); - - // If there is no resource waiting there is nothing to do - if (resId.isValid()) { - _queue.push_front(UnprocessedNode(_initialLockerId, resId)); - } -} - -bool DeadlockDetector::next() { - if (_queue.empty()) - return false; - - UnprocessedNode front = _queue.front(); - _queue.pop_front(); - - _processNextNode(front); - - return !_queue.empty(); -} - -bool DeadlockDetector::hasCycle() const { - invariant(_queue.empty()); - - return _foundCycle; -} - -std::string DeadlockDetector::toString() const { - StringBuilder sb; - - for (WaitForGraph::const_iterator it = _graph.begin(); it != _graph.end(); it++) { - sb << "Locker " << it->first << " waits for resource " << it->second.resId.toString() - << " held by ["; - - const ConflictingOwnersList owners = it->second.owners; - for (ConflictingOwnersList::const_iterator itW = owners.begin(); itW != owners.end(); - itW++) { - sb << *itW << ", "; - } - - sb << "]\n"; - } - - return sb.str(); -} - -void DeadlockDetector::_processNextNode(const UnprocessedNode& node) { - // Locate the request - LockManager::LockBucket* bucket = _lockMgr._getBucket(node.resId); - stdx::lock_guard<SimpleMutex> scopedLock(bucket->mutex); - - LockManager::LockBucket::Map::const_iterator iter = bucket->data.find(node.resId); - if (iter == bucket->data.end()) { - return; - } - - const LockHead* lock = iter->second; - - LockRequest* request = lock->findRequest(node.lockerId); - - // It is possible that a request which was thought to be waiting suddenly became - // granted, so check that before proceeding - if (!request || (request->status == LockRequest::STATUS_GRANTED)) { - return; - } - - std::pair<WaitForGraph::iterator, bool> val = - _graph.insert(WaitForGraphPair(node.lockerId, Edges(node.resId))); - if (!val.second) { - // We already saw this locker id, which means we have a cycle. - if (!_foundCycle) { - _foundCycle = (node.lockerId == _initialLockerId); - } - - return; - } - - Edges& edges = val.first->second; - - bool seen = false; - for (LockRequest* it = lock->grantedList._back; it != nullptr; it = it->prev) { - // We can't conflict with ourselves - if (it == request) { - seen = true; - continue; - } - - // If we are a regular conflicting request, both granted and conversion modes need to - // be checked for conflict, since conversions will be granted first. - if (request->status == LockRequest::STATUS_WAITING) { - if (conflicts(request->mode, modeMask(it->mode)) || - conflicts(request->mode, modeMask(it->convertMode))) { - const LockerId lockerId = it->locker->getId(); - const ResourceId waitResId = it->locker->getWaitingResource(); - - if (waitResId.isValid()) { - _queue.push_front(UnprocessedNode(lockerId, waitResId)); - edges.owners.push_back(lockerId); - } - } - - continue; - } - - // If we are a conversion request, only requests, which are before us need to be - // accounted for. - invariant(request->status == LockRequest::STATUS_CONVERTING); - - if (conflicts(request->convertMode, modeMask(it->mode)) || - (seen && conflicts(request->convertMode, modeMask(it->convertMode)))) { - const LockerId lockerId = it->locker->getId(); - const ResourceId waitResId = it->locker->getWaitingResource(); - - if (waitResId.isValid()) { - _queue.push_front(UnprocessedNode(lockerId, waitResId)); - edges.owners.push_back(lockerId); - } - } - } - - // All conflicting waits, which would be granted before us - for (LockRequest* it = request->prev; - (request->status == LockRequest::STATUS_WAITING) && (it != nullptr); - it = it->prev) { - // We started from the previous element, so we should never see ourselves - invariant(it != request); - - if (conflicts(request->mode, modeMask(it->mode))) { - const LockerId lockerId = it->locker->getId(); - const ResourceId waitResId = it->locker->getWaitingResource(); - - if (waitResId.isValid()) { - _queue.push_front(UnprocessedNode(lockerId, waitResId)); - edges.owners.push_back(lockerId); - } - } - } -} - - -// // ResourceId // diff --git a/src/mongo/db/concurrency/lock_manager.h b/src/mongo/db/concurrency/lock_manager.h index 7b137715412..75922ac7c18 100644 --- a/src/mongo/db/concurrency/lock_manager.h +++ b/src/mongo/db/concurrency/lock_manager.h @@ -134,9 +134,6 @@ public: BSONObjBuilder* result); private: - // The deadlock detector needs to access the buckets and locks directly - friend class DeadlockDetector; - // The lockheads need access to the partitions friend struct LockHead; @@ -219,100 +216,4 @@ private: static const unsigned _numPartitions; Partition* _partitions; }; - - -/** - * Iteratively builds the wait-for graph, starting from a given blocked Locker and stops either - * when all reachable nodes have been checked or if a cycle is detected. This class is - * thread-safe. Because locks may come and go in parallel with deadlock detection, it may - * report false positives, but if there is a stable cycle it will be discovered. - * - * Implemented as a separate class in order to facilitate diagnostics and also unit-testing for - * cases where locks come and go in parallel with deadlock detection. - */ -class DeadlockDetector { -public: - /** - * Initializes the wait-for graph builder with the LM to operate on and a locker object - * from which to start the search. Deadlock will only be reported if there is a wait cycle - * in which the initial locker participates. - */ - DeadlockDetector(const LockManager& lockMgr, const Locker* initialLocker); - - DeadlockDetector& check() { - while (next()) { - } - - return *this; - } - - /** - * Processes the next wait for node and queues up its set of owners to the unprocessed - * queue. - * - * @return true if there are more unprocessed nodes and no cycle has been discovered yet; - * false if either all reachable nodes have been processed or - */ - bool next(); - - /** - * Checks whether a cycle exists in the wait-for graph, which has been built so far. It's - * only useful to call this after next() has returned false. - */ - bool hasCycle() const; - - /** - * Produces a string containing the wait-for graph that has been built so far. - */ - std::string toString() const; - -private: - // An entry in the owners list below means that some locker L is blocked on some resource - // resId, which is currently held by the given set of owners. The reason to store it in - // such form is in order to avoid storing pointers to the lockers or to have to look them - // up by id, both of which require some form of synchronization other than locking the - // bucket for the resource. Instead, given the resId, we can lock the bucket for the lock - // and find the respective LockRequests and continue our scan forward. - typedef std::vector<LockerId> ConflictingOwnersList; - - struct Edges { - explicit Edges(ResourceId resId) : resId(resId) {} - - // Resource id indicating the lock node - ResourceId resId; - - // List of lock owners/pariticipants with which the initial locker conflicts for - // obtaining the lock - ConflictingOwnersList owners; - }; - - typedef std::map<LockerId, Edges> WaitForGraph; - typedef WaitForGraph::value_type WaitForGraphPair; - - - // We don't want to hold locks between iteration cycles, so just store the resourceId and - // the lockerId so we can directly find them from the lock manager. - struct UnprocessedNode { - UnprocessedNode(LockerId lockerId, ResourceId resId) : lockerId(lockerId), resId(resId) {} - - LockerId lockerId; - ResourceId resId; - }; - - typedef std::deque<UnprocessedNode> UnprocessedNodesQueue; - - - void _processNextNode(const UnprocessedNode& node); - - - // Not owned. Lifetime must be longer than that of the graph builder. - const LockManager& _lockMgr; - const LockerId _initialLockerId; - - UnprocessedNodesQueue _queue; - WaitForGraph _graph; - - bool _foundCycle; -}; - } // namespace mongo diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index e4df0cb2528..902afbda01b 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -369,7 +369,7 @@ LockResult LockerImpl::_lockGlobalBegin(OperationContext* opCtx, LockMode mode, } LockResult LockerImpl::lockGlobalComplete(OperationContext* opCtx, Date_t deadline) { - return lockComplete(opCtx, resourceIdGlobal, getLockMode(resourceIdGlobal), deadline, false); + return lockComplete(opCtx, resourceIdGlobal, getLockMode(resourceIdGlobal), deadline); } bool LockerImpl::unlockGlobal() { @@ -422,8 +422,10 @@ void LockerImpl::endWriteUnitOfWork() { } } -LockResult LockerImpl::lock( - OperationContext* opCtx, ResourceId resId, LockMode mode, Date_t deadline, bool checkDeadlock) { +LockResult LockerImpl::lock(OperationContext* opCtx, + ResourceId resId, + LockMode mode, + Date_t deadline) { const LockResult result = lockBegin(opCtx, resId, mode); @@ -435,7 +437,7 @@ LockResult LockerImpl::lock( // unsuccessful result that the lock manager would return is LOCK_WAITING. invariant(result == LOCK_WAITING); - return lockComplete(opCtx, resId, mode, deadline, checkDeadlock); + return lockComplete(opCtx, resId, mode, deadline); } void LockerImpl::downgrade(ResourceId resId, LockMode newMode) { @@ -734,8 +736,10 @@ LockResult LockerImpl::lockBegin(OperationContext* opCtx, ResourceId resId, Lock return result; } -LockResult LockerImpl::lockComplete( - OperationContext* opCtx, ResourceId resId, LockMode mode, Date_t deadline, bool checkDeadlock) { +LockResult LockerImpl::lockComplete(OperationContext* opCtx, + ResourceId resId, + LockMode mode, + Date_t deadline) { LockResult result; Milliseconds timeout; @@ -789,19 +793,6 @@ LockResult LockerImpl::lockComplete( if (result == LOCK_OK) break; - if (checkDeadlock) { - DeadlockDetector wfg(globalLockManager, this); - if (wfg.check().hasCycle()) { - warning() << "Deadlock found: " << wfg.toString(); - - globalStats.recordDeadlock(resId, mode); - _stats.recordDeadlock(resId, mode); - - result = LOCK_DEADLOCK; - break; - } - } - // If infinite timeout was requested, just keep waiting if (timeout == Milliseconds::max()) { continue; diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h index 4532e0e9830..6d7278153ac 100644 --- a/src/mongo/db/concurrency/lock_state.h +++ b/src/mongo/db/concurrency/lock_state.h @@ -162,14 +162,10 @@ public: virtual LockResult lock(OperationContext* opCtx, ResourceId resId, LockMode mode, - Date_t deadline = Date_t::max(), - bool checkDeadlock = false); + Date_t deadline = Date_t::max()); - virtual LockResult lock(ResourceId resId, - LockMode mode, - Date_t deadline = Date_t::max(), - bool checkDeadlock = false) { - return lock(nullptr, resId, mode, deadline, checkDeadlock); + virtual LockResult lock(ResourceId resId, LockMode mode, Date_t deadline = Date_t::max()) { + return lock(nullptr, resId, mode, deadline); } virtual void downgrade(ResourceId resId, LockMode newMode); @@ -240,11 +236,10 @@ public: LockResult lockComplete(OperationContext* opCtx, ResourceId resId, LockMode mode, - Date_t deadline, - bool checkDeadlock); + Date_t deadline); - LockResult lockComplete(ResourceId resId, LockMode mode, Date_t deadline, bool checkDeadlock) { - return lockComplete(nullptr, resId, mode, deadline, checkDeadlock); + LockResult lockComplete(ResourceId resId, LockMode mode, Date_t deadline) { + return lockComplete(nullptr, resId, mode, deadline); } /** diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h index 03b4d871c5c..d347e3d4cc2 100644 --- a/src/mongo/db/concurrency/locker.h +++ b/src/mongo/db/concurrency/locker.h @@ -222,26 +222,19 @@ public: * returning LOCK_TIMEOUT. This parameter defaults to an infinite deadline. * If Milliseconds(0) is passed, the request will return immediately, if * the request could not be granted right away. - * @param checkDeadlock Whether to enable deadlock detection for this acquisition. This - * parameter is put in place until we can handle deadlocks at all places, - * which acquire locks. * * @return All LockResults except for LOCK_WAITING, because it blocks. */ virtual LockResult lock(OperationContext* opCtx, ResourceId resId, LockMode mode, - Date_t deadline = Date_t::max(), - bool checkDeadlock = false) = 0; + Date_t deadline = Date_t::max()) = 0; /** * Calling lock without an OperationContext does not allow LOCK_WAITING states to be * interrupted. */ - virtual LockResult lock(ResourceId resId, - LockMode mode, - Date_t deadline = Date_t::max(), - bool checkDeadlock = false) = 0; + virtual LockResult lock(ResourceId resId, LockMode mode, Date_t deadline = Date_t::max()) = 0; /** * Downgrades the specified resource's lock mode without changing the reference count. diff --git a/src/mongo/db/concurrency/locker_noop.h b/src/mongo/db/concurrency/locker_noop.h index 95f426b2e15..188163b2af3 100644 --- a/src/mongo/db/concurrency/locker_noop.h +++ b/src/mongo/db/concurrency/locker_noop.h @@ -122,12 +122,11 @@ public: virtual LockResult lock(OperationContext* opCtx, ResourceId resId, LockMode mode, - Date_t deadline, - bool checkDeadlock) { + Date_t deadline) { return LockResult::LOCK_OK; } - virtual LockResult lock(ResourceId resId, LockMode mode, Date_t deadline, bool checkDeadlock) { + virtual LockResult lock(ResourceId resId, LockMode mode, Date_t deadline) { return LockResult::LOCK_OK; } |