diff options
author | Spencer T Brody <spencer@mongodb.com> | 2018-08-22 14:37:16 -0400 |
---|---|---|
committer | Spencer T Brody <spencer@mongodb.com> | 2018-09-18 13:20:18 -0400 |
commit | e65ff57e108ed69c46cc0b0ccbdd675663de2469 (patch) | |
tree | 010051703fd944884d99571e31f1dd45b28da8a1 /src/mongo/db/concurrency | |
parent | 157691ef0babb24cd1566446f5f88206f9607564 (diff) | |
download | mongo-e65ff57e108ed69c46cc0b0ccbdd675663de2469.tar.gz |
SERVER-36913 Add functionality to LockManager for repl state transitions with prepared transactions.
Diffstat (limited to 'src/mongo/db/concurrency')
-rw-r--r-- | src/mongo/db/concurrency/SConscript | 1 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_manager.cpp | 317 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_manager.h | 150 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.cpp | 67 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.h | 8 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state_test.cpp | 99 | ||||
-rw-r--r-- | src/mongo/db/concurrency/locker.h | 20 | ||||
-rw-r--r-- | src/mongo/db/concurrency/locker_noop.h | 12 | ||||
-rw-r--r-- | src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp | 105 | ||||
-rw-r--r-- | src/mongo/db/concurrency/replication_lock_manager_manipulator.h | 71 |
10 files changed, 642 insertions, 208 deletions
diff --git a/src/mongo/db/concurrency/SConscript b/src/mongo/db/concurrency/SConscript index 0ca5123d60b..9ddfa508f36 100644 --- a/src/mongo/db/concurrency/SConscript +++ b/src/mongo/db/concurrency/SConscript @@ -38,6 +38,7 @@ env.Library( 'lock_manager.cpp', 'lock_state.cpp', 'lock_stats.cpp', + 'replication_lock_manager_manipulator.cpp', ], LIBDEPS=[ '$BUILD_DIR/mongo/base', diff --git a/src/mongo/db/concurrency/lock_manager.cpp b/src/mongo/db/concurrency/lock_manager.cpp index b18007877f8..0b93ae967fb 100644 --- a/src/mongo/db/concurrency/lock_manager.cpp +++ b/src/mongo/db/concurrency/lock_manager.cpp @@ -129,208 +129,6 @@ MONGO_STATIC_ASSERT((sizeof(LockRequestStatusNames) / sizeof(LockRequestStatusNa } // namespace /** - * There is one of these objects for each resource that has a lock request. Empty objects (i.e. - * LockHead with no requests) are allowed to exist on the lock manager's hash table. - * - * The memory and lifetime is controlled entirely by the LockManager class. - * - * Not thread-safe and should only be accessed under the LockManager's bucket lock. Must be locked - * before locking a partition, not after. - */ -struct LockHead { - - /** - * Used for initialization of a LockHead, which might have been retrieved from cache and also in - * order to keep the LockHead structure a POD. - */ - void initNew(ResourceId resId) { - resourceId = resId; - - grantedList.reset(); - memset(grantedCounts, 0, sizeof(grantedCounts)); - grantedModes = 0; - - conflictList.reset(); - memset(conflictCounts, 0, sizeof(conflictCounts)); - conflictModes = 0; - - conversionsCount = 0; - compatibleFirstCount = 0; - } - - /** - * True iff there may be partitions with granted requests for this resource. - */ - bool partitioned() const { - return !partitions.empty(); - } - - /** - * Locates the request corresponding to the particular locker or returns nullptr. Must be called - * with the bucket holding this lock head locked. - */ - LockRequest* findRequest(LockerId lockerId) const { - // Check the granted queue first - for (LockRequest* it = grantedList._front; it != nullptr; it = it->next) { - if (it->locker->getId() == lockerId) { - return it; - } - } - - // Check the conflict queue second - for (LockRequest* it = conflictList._front; it != nullptr; it = it->next) { - if (it->locker->getId() == lockerId) { - return it; - } - } - - return nullptr; - } - - /** - * Finish creation of request and put it on the LockHead's conflict or granted queues. Returns - * LOCK_WAITING for conflict case and LOCK_OK otherwise. - */ - LockResult newRequest(LockRequest* request) { - invariant(!request->partitionedLock); - request->lock = this; - - // We cannot set request->partitioned to false, as this might be a migration, in which case - // access to that field is not protected. The 'partitioned' member instead indicates if a - // request was initially partitioned. - - // New lock request. Queue after all granted modes and after any already requested - // conflicting modes - if (conflicts(request->mode, grantedModes) || - (!compatibleFirstCount && conflicts(request->mode, conflictModes))) { - request->status = LockRequest::STATUS_WAITING; - - // Put it on the conflict queue. Conflicts are granted front to back. - if (request->enqueueAtFront) { - conflictList.push_front(request); - } else { - conflictList.push_back(request); - } - - incConflictModeCount(request->mode); - - return LOCK_WAITING; - } - - // No conflict, new request - request->status = LockRequest::STATUS_GRANTED; - - grantedList.push_back(request); - incGrantedModeCount(request->mode); - - if (request->compatibleFirst) { - compatibleFirstCount++; - } - - return LOCK_OK; - } - - /** - * Lock each partitioned LockHead in turn, and move any (granted) intent mode requests for - * lock->resourceId to lock, which must itself already be locked. - */ - void migratePartitionedLockHeads(); - - // Methods to maintain the granted queue - void incGrantedModeCount(LockMode mode) { - invariant(grantedCounts[mode] >= 0); - if (++grantedCounts[mode] == 1) { - invariant((grantedModes & modeMask(mode)) == 0); - grantedModes |= modeMask(mode); - } - } - - void decGrantedModeCount(LockMode mode) { - invariant(grantedCounts[mode] >= 1); - if (--grantedCounts[mode] == 0) { - invariant((grantedModes & modeMask(mode)) == modeMask(mode)); - grantedModes &= ~modeMask(mode); - } - } - - // Methods to maintain the conflict queue - void incConflictModeCount(LockMode mode) { - invariant(conflictCounts[mode] >= 0); - if (++conflictCounts[mode] == 1) { - invariant((conflictModes & modeMask(mode)) == 0); - conflictModes |= modeMask(mode); - } - } - - void decConflictModeCount(LockMode mode) { - invariant(conflictCounts[mode] >= 1); - if (--conflictCounts[mode] == 0) { - invariant((conflictModes & modeMask(mode)) == modeMask(mode)); - conflictModes &= ~modeMask(mode); - } - } - - // Id of the resource which is protected by this lock. Initialized at construction time and does - // not change. - ResourceId resourceId; - - // - // Granted queue - // - - // Doubly-linked list of requests, which have been granted. Newly granted requests go to - // the end of the queue. Conversion requests are granted from the beginning forward. - LockRequestList grantedList; - - // Counts the grants and conversion counts for each of the supported lock modes. These - // counts should exactly match the aggregated modes on the granted list. - uint32_t grantedCounts[LockModesCount]; - - // Bit-mask of the granted + converting modes on the granted queue. Maintained in lock-step - // with the grantedCounts array. - uint32_t grantedModes; - - // - // Conflict queue - // - - // Doubly-linked list of requests, which have not been granted yet because they conflict - // with the set of granted modes. Requests are queued at the end of the queue and are - // granted from the beginning forward, which gives these locks FIFO ordering. Exceptions - // are high-priority locks, such as the MMAP V1 flush lock. - LockRequestList conflictList; - - // Counts the conflicting requests for each of the lock modes. These counts should exactly - // match the aggregated modes on the conflicts list. - uint32_t conflictCounts[LockModesCount]; - - // Bit-mask of the conflict modes on the conflict queue. Maintained in lock-step with the - // conflictCounts array. - uint32_t conflictModes; - - // References partitions that may have PartitionedLockHeads for this LockHead. - // Non-empty implies the lock has no conflicts and only has intent modes as grantedModes. - // TODO: Remove this vector and make LockHead a POD - std::vector<LockManager::Partition*> partitions; - - // - // Conversion - // - - // Counts the number of requests on the granted queue, which have requested any kind of - // conflicting conversion and are blocked (i.e. all requests which are currently - // STATUS_CONVERTING). This is an optimization for unlocking in that we do not need to - // check the granted queue for requests in STATUS_CONVERTING if this count is zero. This - // saves cycles in the regular case and only burdens the less-frequent lock upgrade case. - uint32_t conversionsCount; - - // Counts the number of requests on the granted queue, which have requested that the policy - // be switched to compatible-first. As long as this value is > 0, the policy will stay - // compatible-first. - uint32_t compatibleFirstCount; -}; - -/** * The PartitionedLockHead allows optimizing the case where requests overwhelmingly use * the intent lock modes MODE_IS and MODE_IX, which are compatible with each other. * Having to use a single LockHead causes contention where none would be needed. @@ -370,6 +168,85 @@ struct PartitionedLockHead { LockRequestList grantedList; }; +// +// LockHead +// +void LockHead::initNew(ResourceId resId) { + resourceId = resId; + + grantedList.reset(); + memset(grantedCounts, 0, sizeof(grantedCounts)); + grantedModes = 0; + + conflictList.reset(); + memset(conflictCounts, 0, sizeof(conflictCounts)); + conflictModes = 0; + + conversionsCount = 0; + compatibleFirstCount = 0; +} + +bool LockHead::partitioned() const { + return !partitions.empty(); +} + +LockRequest* LockHead::findRequest(LockerId lockerId) const { + // Check the granted queue first + for (LockRequest* it = grantedList._front; it != nullptr; it = it->next) { + if (it->locker->getId() == lockerId) { + return it; + } + } + + // Check the conflict queue second + for (LockRequest* it = conflictList._front; it != nullptr; it = it->next) { + if (it->locker->getId() == lockerId) { + return it; + } + } + + return nullptr; +} + +LockResult LockHead::newRequest(LockRequest* request) { + invariant(!request->partitionedLock); + request->lock = this; + + // We cannot set request->partitioned to false, as this might be a migration, in which case + // access to that field is not protected. The 'partitioned' member instead indicates if a + // request was initially partitioned. + + // New lock request. Queue after all granted modes and after any already requested + // conflicting modes + if (conflicts(request->mode, grantedModes) || + (!compatibleFirstCount && conflicts(request->mode, conflictModes))) { + request->status = LockRequest::STATUS_WAITING; + + // Put it on the conflict queue. Conflicts are granted front to back. + if (request->enqueueAtFront) { + conflictList.push_front(request); + } else { + conflictList.push_back(request); + } + + incConflictModeCount(request->mode); + + return LOCK_WAITING; + } + + // No conflict, new request + request->status = LockRequest::STATUS_GRANTED; + + grantedList.push_back(request); + incGrantedModeCount(request->mode); + + if (request->compatibleFirst) { + compatibleFirstCount++; + } + + return LOCK_OK; +} + void LockHead::migratePartitionedLockHeads() { invariant(partitioned()); @@ -403,6 +280,38 @@ void LockHead::migratePartitionedLockHeads() { } } +void LockHead::incGrantedModeCount(LockMode mode) { + invariant(grantedCounts[mode] >= 0); + if (++grantedCounts[mode] == 1) { + invariant((grantedModes & modeMask(mode)) == 0); + grantedModes |= modeMask(mode); + } +} + +void LockHead::decGrantedModeCount(LockMode mode) { + invariant(grantedCounts[mode] >= 1); + if (--grantedCounts[mode] == 0) { + invariant((grantedModes & modeMask(mode)) == modeMask(mode)); + grantedModes &= ~modeMask(mode); + } +} + +void LockHead::incConflictModeCount(LockMode mode) { + invariant(conflictCounts[mode] >= 0); + if (++conflictCounts[mode] == 1) { + invariant((conflictModes & modeMask(mode)) == 0); + conflictModes |= modeMask(mode); + } +} + +void LockHead::decConflictModeCount(LockMode mode) { + invariant(conflictCounts[mode] >= 1); + if (--conflictCounts[mode] == 0) { + invariant((conflictModes & modeMask(mode)) == modeMask(mode)); + conflictModes &= ~modeMask(mode); + } +} + // // LockManager // @@ -1204,4 +1113,8 @@ const char* lockRequestStatusName(LockRequest::Status status) { return LockRequestStatusNames[status]; } +LockManager::TemporaryResourceQueue::TemporaryResourceQueue(ResourceId resourceId) { + _lockHead.initNew(resourceId); +} + } // namespace mongo diff --git a/src/mongo/db/concurrency/lock_manager.h b/src/mongo/db/concurrency/lock_manager.h index 2956232f996..5f481b76911 100644 --- a/src/mongo/db/concurrency/lock_manager.h +++ b/src/mongo/db/concurrency/lock_manager.h @@ -57,6 +57,8 @@ public: LockManager(); ~LockManager(); + class TemporaryResourceQueue; + /** * Acquires lock on the specified resource in the specified mode and returns the outcome * of the operation. See the details for LockResult for more information on what the @@ -138,6 +140,10 @@ private: // The lockheads need access to the partitions friend struct LockHead; + // ReplicationLockManagerManipulator manipulates LockManager state directly in order to + // encapsulate specific logic for replication state transitions. + friend class ReplicationLockManagerManipulator; + // These types describe the locks hash table struct LockBucket { @@ -200,7 +206,7 @@ private: * * @param lock Lock whose grant state should be recalculated. * @param checkConflictQueue Whether to go through the conflict queue. This is an - * optimisation in that we only need to check the conflict queue if one of the + * optimization in that we only need to check the conflict queue if one of the * granted modes, which was conflicting before became zero. */ void _onLockModeChanged(LockHead* lock, bool checkConflictQueue); @@ -313,4 +319,146 @@ private: bool _foundCycle; }; +/** + * There is one of these objects for each resource that has a lock request. Empty objects (i.e. + * LockHead with no requests) are allowed to exist on the lock manager's hash table. + * + * The memory and lifetime is controlled entirely by the LockManager class. + * + * Not thread-safe and should only be accessed under the LockManager's bucket lock. Must be locked + * before locking a partition, not after. + * + * This struct *should* just be a private sub-class of LockManager since it is effectively an + * implementation detail of LockManager. The reason it isn't is because it is also used by + * ReplicationLockManagerManipulator and because it is forward-declared in lock_manager_defs.h. + * Nevertheless this struct should be thought of as an implementation detail of LockManager and + * should not be used directly except by LockManager and friend classes of LockManager that serve to + * extend LockManager functionality. + */ +struct LockHead { +private: + friend class DeadlockDetector; + friend class LockManager; + friend class ReplicationLockManagerManipulator; + + /** + * Used for initialization of a LockHead, which might have been retrieved from cache and also in + * order to keep the LockHead structure a POD. + */ + void initNew(ResourceId resId); + + /** + * True iff there may be partitions with granted requests for this resource. + */ + bool partitioned() const; + + /** + * Locates the request corresponding to the particular locker or returns nullptr. Must be called + * with the bucket holding this lock head locked. + */ + LockRequest* findRequest(LockerId lockerId) const; + + /** + * Finish creation of request and put it on the LockHead's conflict or granted queues. Returns + * LOCK_WAITING for conflict case and LOCK_OK otherwise. + */ + LockResult newRequest(LockRequest* request); + + /** + * Lock each partitioned LockHead in turn, and move any (granted) intent mode requests for + * lock->resourceId to lock, which must itself already be locked. + */ + void migratePartitionedLockHeads(); + + // Methods to maintain the granted queue + void incGrantedModeCount(LockMode mode); + + void decGrantedModeCount(LockMode mode); + + // Methods to maintain the conflict queue + void incConflictModeCount(LockMode mode); + + void decConflictModeCount(LockMode mode); + + // Id of the resource which is protected by this lock. Initialized at construction time and does + // not change. + ResourceId resourceId; + + // + // Granted queue + // + + // Doubly-linked list of requests, which have been granted. Newly granted requests go to + // the end of the queue. Conversion requests are granted from the beginning forward. + LockRequestList grantedList; + + // Counts the grants and conversion counts for each of the supported lock modes. These + // counts should exactly match the aggregated modes on the granted list. + uint32_t grantedCounts[LockModesCount]; + + // Bit-mask of the granted + converting modes on the granted queue. Maintained in lock-step + // with the grantedCounts array. + uint32_t grantedModes; + + // + // Conflict queue + // + + // Doubly-linked list of requests, which have not been granted yet because they conflict + // with the set of granted modes. Requests are queued at the end of the queue and are + // granted from the beginning forward, which gives these locks FIFO ordering. Exceptions + // are high-priority locks, such as the MMAP V1 flush lock. + LockRequestList conflictList; + + // Counts the conflicting requests for each of the lock modes. These counts should exactly + // match the aggregated modes on the conflicts list. + uint32_t conflictCounts[LockModesCount]; + + // Bit-mask of the conflict modes on the conflict queue. Maintained in lock-step with the + // conflictCounts array. + uint32_t conflictModes; + + // References partitions that may have PartitionedLockHeads for this LockHead. + // Non-empty implies the lock has no conflicts and only has intent modes as grantedModes. + // TODO: Remove this vector and make LockHead a POD + std::vector<LockManager::Partition*> partitions; + + // + // Conversion + // + + // Counts the number of requests on the granted queue, which have requested any kind of + // conflicting conversion and are blocked (i.e. all requests which are currently + // STATUS_CONVERTING). This is an optimization for unlocking in that we do not need to + // check the granted queue for requests in STATUS_CONVERTING if this count is zero. This + // saves cycles in the regular case and only burdens the less-frequent lock upgrade case. + uint32_t conversionsCount; + + // Counts the number of requests on the granted queue, which have requested that the policy + // be switched to compatible-first. As long as this value is > 0, the policy will stay + // compatible-first. + uint32_t compatibleFirstCount; +}; + +/** + * This class wraps a LockHead and is used to represent a temporary state for a resource managed by + * the LockManager. This allows us to prepare lock requests against a resource without those + * requests actually being present in the "true" version of the resource which is actually being + * managed by the LockManager. We use this to avoid exposing LockHead, which is an implementation + * detail of the LockManager, while still providing a handle to state for a LockManager resource. + */ +class LockManager::TemporaryResourceQueue { +public: + explicit TemporaryResourceQueue(ResourceId resourceId); + + ResourceId getResourceId() const { + return _lockHead.resourceId; + } + +private: + friend class ReplicationLockManagerManipulator; + + LockHead _lockHead; +}; + } // namespace mongo diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index e526931d785..576d476abae 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -34,6 +34,7 @@ #include <vector> +#include "mongo/db/concurrency/replication_lock_manager_manipulator.h" #include "mongo/db/namespace_string.h" #include "mongo/db/service_context.h" #include "mongo/platform/compiler.h" @@ -120,6 +121,12 @@ AtomicUInt64 idCounter(0); // Partitioned global lock statistics, so we don't hit the same bucket PartitionedInstanceWideLockStats globalStats; +// When this is set, lock acquisitions for the global resource go into the TemporaryRequestQueue +// stored in this decoration instead of letting the LockManager look up and put the requests into +// the true global resource state. +OperationContext::Decoration<LockManager::TemporaryResourceQueue*> globalResourceShadow = + OperationContext::declareDecoration<LockManager::TemporaryResourceQueue*>(); + } // namespace bool LockerImpl::_shouldDelayUnlock(ResourceId resId, LockMode mode) const { @@ -263,7 +270,8 @@ LockerImpl::~LockerImpl() { invariant(!inAWriteUnitOfWork()); invariant(_numResourcesToUnlockAtEndUnitOfWork == 0); invariant(_requests.empty()); - invariant(_modeForTicket == MODE_NONE); + invariant(_modeForTicket == MODE_NONE, + str::stream() << "_modeForTicket found: " << _modeForTicket); // Reset the locking statistics so the object can be reused _stats.reset(); @@ -349,7 +357,7 @@ LockResult LockerImpl::_lockGlobalBegin(OperationContext* opCtx, LockMode mode, } LockMode actualLockMode = mode; - if (opCtx) { + if (opCtx && opCtx->getServiceContext()) { auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); if (storageEngine && !storageEngine->supportsDBLocking()) { actualLockMode = isSharedLockMode(mode) ? MODE_S : MODE_X; @@ -361,7 +369,7 @@ LockResult LockerImpl::_lockGlobalBegin(OperationContext* opCtx, LockMode mode, // Currently, deadlock detection does not happen inline with lock acquisition so the only // unsuccessful result that the lock manager would return is LOCK_WAITING. - invariant(result == LOCK_WAITING); + invariant(result == LOCK_WAITING, str::stream() << "Unexpected lock result: " << result); return result; } @@ -656,6 +664,42 @@ void LockerImpl::restoreLockState(OperationContext* opCtx, const Locker::LockSna invariant(_modeForTicket != MODE_NONE); } +void LockerImpl::restoreLockStateWithTemporaryGlobalResource( + OperationContext* opCtx, + const LockSnapshot& state, + LockManager::TemporaryResourceQueue* tempGlobalResource) { + invariant(tempGlobalResource->getResourceId().getType() == ResourceType::RESOURCE_GLOBAL); + invariant(globalResourceShadow(opCtx) == nullptr); + + globalResourceShadow(opCtx) = tempGlobalResource; + ON_BLOCK_EXIT([&] { globalResourceShadow(opCtx) = nullptr; }); + + restoreLockState(opCtx, state); +} + +void LockerImpl::replaceGlobalLockStateWithTemporaryGlobalResource( + LockManager::TemporaryResourceQueue* tempGlobalResource) { + invariant(tempGlobalResource->getResourceId().getType() == ResourceType::RESOURCE_GLOBAL); + + // Transfer the LockRequests from tempGlobalResource into the true resource for the global lock + // that is managed by the LockManager. This also removes the existing MODE_X LockRequest from + // the granted list for the true global resource, but we still need to delete that LockRequest + // and remove it from this Locker's _requests list. + ReplicationLockManagerManipulator(&globalLockManager) + .replaceGlobalLocksWithLocksFromTemporaryGlobalResource(resourceIdGlobal, + tempGlobalResource); + + // Release the ticket that this Locker was holding for the global X lock. + invariant(_modeForTicket == MODE_X); + _releaseTicket(); + _modeForTicket = MODE_NONE; + + // Now fully delete the LockRequest. + auto it = _requests.find(resourceIdGlobal); + scoped_spinlock scopedLock(_lock); + it.remove(); +} + LockResult LockerImpl::lockBegin(OperationContext* opCtx, ResourceId resId, LockMode mode) { dassert(!getWaitingResource().isValid()); @@ -711,8 +755,21 @@ LockResult LockerImpl::lockBegin(OperationContext* opCtx, ResourceId resId, Lock // otherwise we might reset state if the lock becomes granted very fast. _notify.clear(); - LockResult result = isNew ? globalLockManager.lock(resId, request, mode) - : globalLockManager.convert(resId, request, mode); + LockResult result{LockResult::LOCK_INVALID}; + if (resType == RESOURCE_GLOBAL && opCtx && globalResourceShadow(opCtx)) { + // If we're trying to lock the global resource and we have a temporary global resource + // installed, use the temporary resource instead of letting the LockManager look up the + // true resource for the global lock. + invariant(isNew); + ReplicationLockManagerManipulator(&globalLockManager) + .lockUncontestedTemporaryGlobalResource(globalResourceShadow(opCtx), request, mode); + // lockUncontestedTemporaryGlobalResource can't fail. + result = LockResult::LOCK_OK; + } else { + // Normal case using the true global lock head. + result = isNew ? globalLockManager.lock(resId, request, mode) + : globalLockManager.convert(resId, request, mode); + } if (result == LOCK_WAITING) { globalStats.recordWait(_id, resId, mode); diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h index daf86c3eb5e..8eb9ec4053c 100644 --- a/src/mongo/db/concurrency/lock_state.h +++ b/src/mongo/db/concurrency/lock_state.h @@ -193,6 +193,14 @@ public: restoreLockState(nullptr, stateToRestore); } + void restoreLockStateWithTemporaryGlobalResource( + OperationContext* opCtx, + const LockSnapshot& stateToRestore, + LockManager::TemporaryResourceQueue* tempGlobalResource) override; + + void replaceGlobalLockStateWithTemporaryGlobalResource( + LockManager::TemporaryResourceQueue* tempGlobalResource) override; + virtual void releaseTicket(); virtual void reacquireTicket(OperationContext* opCtx); diff --git a/src/mongo/db/concurrency/lock_state_test.cpp b/src/mongo/db/concurrency/lock_state_test.cpp index 4d2da27bd94..3ee780e364b 100644 --- a/src/mongo/db/concurrency/lock_state_test.cpp +++ b/src/mongo/db/concurrency/lock_state_test.cpp @@ -37,6 +37,7 @@ #include "mongo/config.h" #include "mongo/db/concurrency/lock_manager_test_help.h" #include "mongo/db/concurrency/locker.h" +#include "mongo/db/operation_context_noop.h" #include "mongo/unittest/unittest.h" #include "mongo/util/log.h" #include "mongo/util/timer.h" @@ -657,4 +658,102 @@ TEST(LockerImpl, ConvertLockPendingUnlockAndUnlock) { locker.unlockGlobal(); } +/** + * This test mimics the process that replica set state transitions go through. + * First a request for the global X lock is enqueued, then the existing locks held by various + * ongoing prepared transactions are yielded so that the global X lock request can succeed. + * When the state transition is complete we then need to atomically restore the locks that were + * yielded while dropping the global X lock that was held for the transition. + */ +TEST(LockerImpl, AtomicLockRestoreAndGlobalLockReleaseForReplStateTransitions) { + const ResourceId globalResId(RESOURCE_GLOBAL, ResourceId::SINGLETON_GLOBAL); + const ResourceId resIdDatabase(RESOURCE_DATABASE, "TestDB"_sd); + const ResourceId resIdCollection1(RESOURCE_COLLECTION, "TestDB.collection1"_sd); + const ResourceId resIdCollection2(RESOURCE_COLLECTION, "TestDB.collection2"_sd); + const ResourceId resIdCollection3(RESOURCE_COLLECTION, "TestDB.collection2"_sd); + + Locker::LockSnapshot lockInfo1, lockInfo2, lockInfo3; + LockerImpl stepUpLocker, txnLocker1, txnLocker2, txnLocker3, randomOpLocker1, randomOpLocker2; + OperationContextNoop opCtx1, opCtx2, opCtx3; + ON_BLOCK_EXIT([&] { + // clean up locks on test completion. + stepUpLocker.unlockGlobal(); + txnLocker1.unlockGlobal(); + txnLocker2.unlockGlobal(); + txnLocker3.unlockGlobal(); + randomOpLocker1.unlockGlobal(); + randomOpLocker2.unlockGlobal(); + }); + + // Take some locks, mimicking the locks that would be held by ongoing prepared transactions. + txnLocker1.lockGlobal(&opCtx1, MODE_IX); + ASSERT_EQUALS(LOCK_OK, txnLocker1.lock(&opCtx1, resIdDatabase, MODE_IX)); + ASSERT_EQUALS(LOCK_OK, txnLocker1.lock(&opCtx1, resIdCollection1, MODE_IX)); + ASSERT_EQUALS(LOCK_OK, txnLocker1.lock(&opCtx1, resIdCollection2, MODE_IX)); + + txnLocker2.lockGlobal(&opCtx2, MODE_IX); + ASSERT_EQUALS(LOCK_OK, txnLocker2.lock(&opCtx2, resIdDatabase, MODE_IX)); + ASSERT_EQUALS(LOCK_OK, txnLocker2.lock(&opCtx2, resIdCollection2, MODE_IX)); + + txnLocker3.lockGlobal(&opCtx3, MODE_IX); + ASSERT_EQUALS(LOCK_OK, txnLocker3.lock(&opCtx3, resIdDatabase, MODE_IX)); + ASSERT_EQUALS(LOCK_OK, txnLocker3.lock(&opCtx3, resIdCollection3, MODE_IX)); + + // Enqueue request for global X lock in stepUpLocker, mimicking the lock held by the thread + // performing the state transition. + ASSERT_EQUALS(LockResult::LOCK_WAITING, stepUpLocker.lockGlobalBegin(MODE_X, Date_t::max())); + + // Enqueue a lock request behind the pending global X request to ensure that it gets granted + // later when the global X lock is released. + ASSERT_EQUALS(LockResult::LOCK_WAITING, + randomOpLocker1.lockGlobalBegin(MODE_IS, Date_t::max())); + + // Yield locks on all txn threads. + ASSERT_TRUE(txnLocker1.saveLockStateAndUnlock(&lockInfo1)); + ASSERT_TRUE(txnLocker2.saveLockStateAndUnlock(&lockInfo2)); + ASSERT_TRUE(txnLocker3.saveLockStateAndUnlock(&lockInfo3)); + + // Ensure that stepUpLocker is now able to acquire the global X lock. + ASSERT_EQUALS(LockResult::LOCK_OK, stepUpLocker.lockGlobalComplete(Date_t::max())); + + // Enqueue a lock request behind the global X lock to ensure that it gets granted + // later when the global X lock is released. + ASSERT_EQUALS(LockResult::LOCK_WAITING, + randomOpLocker2.lockGlobalBegin(MODE_IX, Date_t::max())); + + // Now we need to atomically release the global X lock from stepUpLocker and restore the lock + // state for the txn threads. We start by restoring the lock state from the txn threads with + // the global locks placed into a temporary LockHead. + LockManager::TemporaryResourceQueue tempGlobalResource(globalResId); + + txnLocker1.restoreLockStateWithTemporaryGlobalResource(&opCtx1, lockInfo1, &tempGlobalResource); + txnLocker2.restoreLockStateWithTemporaryGlobalResource(&opCtx2, lockInfo2, &tempGlobalResource); + txnLocker3.restoreLockStateWithTemporaryGlobalResource(&opCtx3, lockInfo3, &tempGlobalResource); + + // Atomically release the global X lock from stepUpLocker and move the global locks for the + // txn threads from the temporary LockHead into the true LockHead for the global lock. + stepUpLocker.replaceGlobalLockStateWithTemporaryGlobalResource(&tempGlobalResource); + + // Make sure all the locks were acquired and released appropriately. + ASSERT_EQUALS(MODE_NONE, stepUpLocker.getLockMode(globalResId)); + + ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(globalResId)); + ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(resIdDatabase)); + ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(resIdCollection1)); + ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(resIdCollection2)); + + ASSERT_EQUALS(MODE_IX, txnLocker2.getLockMode(globalResId)); + ASSERT_EQUALS(MODE_IX, txnLocker2.getLockMode(resIdDatabase)); + ASSERT_EQUALS(MODE_IX, txnLocker2.getLockMode(resIdCollection2)); + + ASSERT_EQUALS(MODE_IX, txnLocker3.getLockMode(globalResId)); + ASSERT_EQUALS(MODE_IX, txnLocker3.getLockMode(resIdDatabase)); + ASSERT_EQUALS(MODE_IX, txnLocker3.getLockMode(resIdCollection3)); + + // Make sure the pending global lock requests got granted when the global X lock was released. + ASSERT_EQUALS(LockResult::LOCK_OK, randomOpLocker1.lockGlobalComplete(Date_t::now())); + ASSERT_EQUALS(LockResult::LOCK_OK, randomOpLocker2.lockGlobalComplete(Date_t::now())); + ASSERT_EQUALS(MODE_IS, randomOpLocker1.getLockMode(globalResId)); + ASSERT_EQUALS(MODE_IX, randomOpLocker2.getLockMode(globalResId)); +} } // namespace mongo diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h index 29b0ca9d364..07e45d052c5 100644 --- a/src/mongo/db/concurrency/locker.h +++ b/src/mongo/db/concurrency/locker.h @@ -366,6 +366,26 @@ public: virtual void restoreLockState(const LockSnapshot& stateToRestore) = 0; /** + * Works like restoreLockState but for any global locks in the state to restore, rather than + * restoring them into the true global lock resource owned by the LockManager, + * restores the global locks into the TemporaryResourceQueue for the global resource that is + * provided. Locks on resources other than the global lock are restored to their true + * LockManager-owned resource objects. + */ + virtual void restoreLockStateWithTemporaryGlobalResource( + OperationContext* opCtx, + const LockSnapshot& stateToRestore, + LockManager::TemporaryResourceQueue* tempGlobalResource) = 0; + + /** + * Atomically releases the global X lock from the true global resource managed by the + * LockManager and transfers the locks from the 'tempGlobalResource' into the true global + * resource. + */ + virtual void replaceGlobalLockStateWithTemporaryGlobalResource( + LockManager::TemporaryResourceQueue* tempGlobalResource) = 0; + + /** * Releases the ticket associated with the Locker. This allows locks to be held without * contributing to reader/writer throttling. */ diff --git a/src/mongo/db/concurrency/locker_noop.h b/src/mongo/db/concurrency/locker_noop.h index 232b0b5f685..957627c8124 100644 --- a/src/mongo/db/concurrency/locker_noop.h +++ b/src/mongo/db/concurrency/locker_noop.h @@ -178,6 +178,18 @@ public: MONGO_UNREACHABLE; } + void restoreLockStateWithTemporaryGlobalResource( + OperationContext* opCtx, + const LockSnapshot& stateToRestore, + LockManager::TemporaryResourceQueue* tempGlobalResource) override { + MONGO_UNREACHABLE; + } + + void replaceGlobalLockStateWithTemporaryGlobalResource( + LockManager::TemporaryResourceQueue* tempGlobalResource) override { + MONGO_UNREACHABLE; + } + virtual void releaseTicket() { MONGO_UNREACHABLE; } diff --git a/src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp b/src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp new file mode 100644 index 00000000000..009bbbe3c20 --- /dev/null +++ b/src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp @@ -0,0 +1,105 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault + +#include "mongo/platform/basic.h" + +#include "mongo/db/concurrency/replication_lock_manager_manipulator.h" + +namespace mongo { + +ReplicationLockManagerManipulator::ReplicationLockManagerManipulator(LockManager* lockManager) + : _lockManager(lockManager) {} + +void ReplicationLockManagerManipulator::lockUncontestedTemporaryGlobalResource( + LockManager::TemporaryResourceQueue* tempGlobalResource, LockRequest* request, LockMode mode) { + // Sanity check that requests are not being reused without proper cleanup + invariant(request->status == LockRequest::STATUS_NEW); + invariant(request->recursiveCount == 1); + invariant(!request->partitioned); + invariant(tempGlobalResource->_lockHead.resourceId.getType() == ResourceType::RESOURCE_GLOBAL); + + request->mode = mode; + const auto lockResult = tempGlobalResource->_lockHead.newRequest(request); + invariant(lockResult == LockResult::LOCK_OK); +} + +void ReplicationLockManagerManipulator::replaceGlobalLocksWithLocksFromTemporaryGlobalResource( + ResourceId resId, LockManager::TemporaryResourceQueue* tempGlobalResource) { + invariant(resId.getType() == ResourceType::RESOURCE_GLOBAL); + invariant(tempGlobalResource->_lockHead.resourceId == resId); + + LockManager::LockBucket* bucket = _lockManager->_getBucket(resId); + stdx::lock_guard<SimpleMutex> scopedLock(bucket->mutex); + LockHead* trueGlobalLockHead = bucket->findOrInsert(resId); + + LockHead* tempGlobalLockHead = &tempGlobalResource->_lockHead; + + invariant(trueGlobalLockHead->grantedCounts[MODE_X] == 1); + invariant(trueGlobalLockHead->compatibleFirstCount == 1); + invariant(tempGlobalLockHead->conflictList.empty()); + + LockRequest* existingGlobalLockRequest = trueGlobalLockHead->grantedList._front; + invariant(!existingGlobalLockRequest->next); + invariant(existingGlobalLockRequest->mode == MODE_X); + invariant(existingGlobalLockRequest->status == LockRequest::Status::STATUS_GRANTED); + + // Remove the existing granted MODE_X lock from the trueGlobalLockHead so it can be replaced + // by the locks from tempGlobalLockHead. + trueGlobalLockHead->grantedList.remove(existingGlobalLockRequest); + trueGlobalLockHead->decGrantedModeCount(existingGlobalLockRequest->mode); + trueGlobalLockHead->compatibleFirstCount--; + + // Now iterate over the granted LockRequests in the tempGlobalLockHead and transfer them over + // to the trueGlobalLockHead. + for (LockRequest* it = tempGlobalLockHead->grantedList._front; it != nullptr;) { + LockRequest* next = it->next; + + invariant(it->mode == MODE_IX); + invariant(it->status == LockRequest::Status::STATUS_GRANTED); + invariant(it->lock == tempGlobalLockHead); + + it->lock = trueGlobalLockHead; + tempGlobalLockHead->grantedList.remove(it); + tempGlobalLockHead->decGrantedModeCount(it->mode); + trueGlobalLockHead->grantedList.push_back(it); + trueGlobalLockHead->incGrantedModeCount(it->mode); + + it = next; + } + invariant(tempGlobalLockHead->grantedList.empty()); + invariant(tempGlobalLockHead->grantedCounts[MODE_X] == 0); + invariant(tempGlobalLockHead->grantedModes == 0); + + // Grant any pending requests against the true global lock head that can proceed now that the + // global X lock has been released. + _lockManager->_onLockModeChanged(trueGlobalLockHead, true); +} + +} // namespace mongo diff --git a/src/mongo/db/concurrency/replication_lock_manager_manipulator.h b/src/mongo/db/concurrency/replication_lock_manager_manipulator.h new file mode 100644 index 00000000000..44048de2eb2 --- /dev/null +++ b/src/mongo/db/concurrency/replication_lock_manager_manipulator.h @@ -0,0 +1,71 @@ +/** + * Copyright (C) 2018 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/db/concurrency/lock_manager.h" + +namespace mongo { + +/** + * This friend class to the LockManager extends LockManager functionality to enable behaviors + * required for replication state transitions, which must atomically release the global X lock and + * restore locks for prepared transactions into their individual Lockers. + */ +class ReplicationLockManagerManipulator { + MONGO_DISALLOW_COPYING(ReplicationLockManagerManipulator); + +public: + explicit ReplicationLockManagerManipulator(LockManager* lockManager); + ~ReplicationLockManagerManipulator() = default; + + /** + * Works like LockManager::lock() except that it only works for the Global lock and rather than + * looking up the true LockHead for the global resource ID, it puts the LockRequest into the + * given TemporaryResourceQueue, which is guaranteed not to have any conflicting locks for the + * given request. + */ + void lockUncontestedTemporaryGlobalResource( + LockManager::TemporaryResourceQueue* tempGlobalResource, + LockRequest* request, + LockMode mode); + + /** + * Takes the locks from a given TemporaryResourceQueue for the Global resource and moves them + * into the true LockHead for the global resource, atomically releasing the global X lock from + * the true LockHead for the global resource in the process. + */ + void replaceGlobalLocksWithLocksFromTemporaryGlobalResource( + ResourceId resId, LockManager::TemporaryResourceQueue* tempGlobalResource); + + +private: + LockManager* _lockManager; +}; + +} // namespace mongo |