summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2018-08-22 14:37:16 -0400
committerSpencer T Brody <spencer@mongodb.com>2018-09-18 13:20:18 -0400
commite65ff57e108ed69c46cc0b0ccbdd675663de2469 (patch)
tree010051703fd944884d99571e31f1dd45b28da8a1
parent157691ef0babb24cd1566446f5f88206f9607564 (diff)
downloadmongo-e65ff57e108ed69c46cc0b0ccbdd675663de2469.tar.gz
SERVER-36913 Add functionality to LockManager for repl state transitions with prepared transactions.
-rw-r--r--src/mongo/db/concurrency/SConscript1
-rw-r--r--src/mongo/db/concurrency/lock_manager.cpp317
-rw-r--r--src/mongo/db/concurrency/lock_manager.h150
-rw-r--r--src/mongo/db/concurrency/lock_state.cpp67
-rw-r--r--src/mongo/db/concurrency/lock_state.h8
-rw-r--r--src/mongo/db/concurrency/lock_state_test.cpp99
-rw-r--r--src/mongo/db/concurrency/locker.h20
-rw-r--r--src/mongo/db/concurrency/locker_noop.h12
-rw-r--r--src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp105
-rw-r--r--src/mongo/db/concurrency/replication_lock_manager_manipulator.h71
10 files changed, 642 insertions, 208 deletions
diff --git a/src/mongo/db/concurrency/SConscript b/src/mongo/db/concurrency/SConscript
index 0ca5123d60b..9ddfa508f36 100644
--- a/src/mongo/db/concurrency/SConscript
+++ b/src/mongo/db/concurrency/SConscript
@@ -38,6 +38,7 @@ env.Library(
'lock_manager.cpp',
'lock_state.cpp',
'lock_stats.cpp',
+ 'replication_lock_manager_manipulator.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
diff --git a/src/mongo/db/concurrency/lock_manager.cpp b/src/mongo/db/concurrency/lock_manager.cpp
index b18007877f8..0b93ae967fb 100644
--- a/src/mongo/db/concurrency/lock_manager.cpp
+++ b/src/mongo/db/concurrency/lock_manager.cpp
@@ -129,208 +129,6 @@ MONGO_STATIC_ASSERT((sizeof(LockRequestStatusNames) / sizeof(LockRequestStatusNa
} // namespace
/**
- * There is one of these objects for each resource that has a lock request. Empty objects (i.e.
- * LockHead with no requests) are allowed to exist on the lock manager's hash table.
- *
- * The memory and lifetime is controlled entirely by the LockManager class.
- *
- * Not thread-safe and should only be accessed under the LockManager's bucket lock. Must be locked
- * before locking a partition, not after.
- */
-struct LockHead {
-
- /**
- * Used for initialization of a LockHead, which might have been retrieved from cache and also in
- * order to keep the LockHead structure a POD.
- */
- void initNew(ResourceId resId) {
- resourceId = resId;
-
- grantedList.reset();
- memset(grantedCounts, 0, sizeof(grantedCounts));
- grantedModes = 0;
-
- conflictList.reset();
- memset(conflictCounts, 0, sizeof(conflictCounts));
- conflictModes = 0;
-
- conversionsCount = 0;
- compatibleFirstCount = 0;
- }
-
- /**
- * True iff there may be partitions with granted requests for this resource.
- */
- bool partitioned() const {
- return !partitions.empty();
- }
-
- /**
- * Locates the request corresponding to the particular locker or returns nullptr. Must be called
- * with the bucket holding this lock head locked.
- */
- LockRequest* findRequest(LockerId lockerId) const {
- // Check the granted queue first
- for (LockRequest* it = grantedList._front; it != nullptr; it = it->next) {
- if (it->locker->getId() == lockerId) {
- return it;
- }
- }
-
- // Check the conflict queue second
- for (LockRequest* it = conflictList._front; it != nullptr; it = it->next) {
- if (it->locker->getId() == lockerId) {
- return it;
- }
- }
-
- return nullptr;
- }
-
- /**
- * Finish creation of request and put it on the LockHead's conflict or granted queues. Returns
- * LOCK_WAITING for conflict case and LOCK_OK otherwise.
- */
- LockResult newRequest(LockRequest* request) {
- invariant(!request->partitionedLock);
- request->lock = this;
-
- // We cannot set request->partitioned to false, as this might be a migration, in which case
- // access to that field is not protected. The 'partitioned' member instead indicates if a
- // request was initially partitioned.
-
- // New lock request. Queue after all granted modes and after any already requested
- // conflicting modes
- if (conflicts(request->mode, grantedModes) ||
- (!compatibleFirstCount && conflicts(request->mode, conflictModes))) {
- request->status = LockRequest::STATUS_WAITING;
-
- // Put it on the conflict queue. Conflicts are granted front to back.
- if (request->enqueueAtFront) {
- conflictList.push_front(request);
- } else {
- conflictList.push_back(request);
- }
-
- incConflictModeCount(request->mode);
-
- return LOCK_WAITING;
- }
-
- // No conflict, new request
- request->status = LockRequest::STATUS_GRANTED;
-
- grantedList.push_back(request);
- incGrantedModeCount(request->mode);
-
- if (request->compatibleFirst) {
- compatibleFirstCount++;
- }
-
- return LOCK_OK;
- }
-
- /**
- * Lock each partitioned LockHead in turn, and move any (granted) intent mode requests for
- * lock->resourceId to lock, which must itself already be locked.
- */
- void migratePartitionedLockHeads();
-
- // Methods to maintain the granted queue
- void incGrantedModeCount(LockMode mode) {
- invariant(grantedCounts[mode] >= 0);
- if (++grantedCounts[mode] == 1) {
- invariant((grantedModes & modeMask(mode)) == 0);
- grantedModes |= modeMask(mode);
- }
- }
-
- void decGrantedModeCount(LockMode mode) {
- invariant(grantedCounts[mode] >= 1);
- if (--grantedCounts[mode] == 0) {
- invariant((grantedModes & modeMask(mode)) == modeMask(mode));
- grantedModes &= ~modeMask(mode);
- }
- }
-
- // Methods to maintain the conflict queue
- void incConflictModeCount(LockMode mode) {
- invariant(conflictCounts[mode] >= 0);
- if (++conflictCounts[mode] == 1) {
- invariant((conflictModes & modeMask(mode)) == 0);
- conflictModes |= modeMask(mode);
- }
- }
-
- void decConflictModeCount(LockMode mode) {
- invariant(conflictCounts[mode] >= 1);
- if (--conflictCounts[mode] == 0) {
- invariant((conflictModes & modeMask(mode)) == modeMask(mode));
- conflictModes &= ~modeMask(mode);
- }
- }
-
- // Id of the resource which is protected by this lock. Initialized at construction time and does
- // not change.
- ResourceId resourceId;
-
- //
- // Granted queue
- //
-
- // Doubly-linked list of requests, which have been granted. Newly granted requests go to
- // the end of the queue. Conversion requests are granted from the beginning forward.
- LockRequestList grantedList;
-
- // Counts the grants and conversion counts for each of the supported lock modes. These
- // counts should exactly match the aggregated modes on the granted list.
- uint32_t grantedCounts[LockModesCount];
-
- // Bit-mask of the granted + converting modes on the granted queue. Maintained in lock-step
- // with the grantedCounts array.
- uint32_t grantedModes;
-
- //
- // Conflict queue
- //
-
- // Doubly-linked list of requests, which have not been granted yet because they conflict
- // with the set of granted modes. Requests are queued at the end of the queue and are
- // granted from the beginning forward, which gives these locks FIFO ordering. Exceptions
- // are high-priority locks, such as the MMAP V1 flush lock.
- LockRequestList conflictList;
-
- // Counts the conflicting requests for each of the lock modes. These counts should exactly
- // match the aggregated modes on the conflicts list.
- uint32_t conflictCounts[LockModesCount];
-
- // Bit-mask of the conflict modes on the conflict queue. Maintained in lock-step with the
- // conflictCounts array.
- uint32_t conflictModes;
-
- // References partitions that may have PartitionedLockHeads for this LockHead.
- // Non-empty implies the lock has no conflicts and only has intent modes as grantedModes.
- // TODO: Remove this vector and make LockHead a POD
- std::vector<LockManager::Partition*> partitions;
-
- //
- // Conversion
- //
-
- // Counts the number of requests on the granted queue, which have requested any kind of
- // conflicting conversion and are blocked (i.e. all requests which are currently
- // STATUS_CONVERTING). This is an optimization for unlocking in that we do not need to
- // check the granted queue for requests in STATUS_CONVERTING if this count is zero. This
- // saves cycles in the regular case and only burdens the less-frequent lock upgrade case.
- uint32_t conversionsCount;
-
- // Counts the number of requests on the granted queue, which have requested that the policy
- // be switched to compatible-first. As long as this value is > 0, the policy will stay
- // compatible-first.
- uint32_t compatibleFirstCount;
-};
-
-/**
* The PartitionedLockHead allows optimizing the case where requests overwhelmingly use
* the intent lock modes MODE_IS and MODE_IX, which are compatible with each other.
* Having to use a single LockHead causes contention where none would be needed.
@@ -370,6 +168,85 @@ struct PartitionedLockHead {
LockRequestList grantedList;
};
+//
+// LockHead
+//
+void LockHead::initNew(ResourceId resId) {
+ resourceId = resId;
+
+ grantedList.reset();
+ memset(grantedCounts, 0, sizeof(grantedCounts));
+ grantedModes = 0;
+
+ conflictList.reset();
+ memset(conflictCounts, 0, sizeof(conflictCounts));
+ conflictModes = 0;
+
+ conversionsCount = 0;
+ compatibleFirstCount = 0;
+}
+
+bool LockHead::partitioned() const {
+ return !partitions.empty();
+}
+
+LockRequest* LockHead::findRequest(LockerId lockerId) const {
+ // Check the granted queue first
+ for (LockRequest* it = grantedList._front; it != nullptr; it = it->next) {
+ if (it->locker->getId() == lockerId) {
+ return it;
+ }
+ }
+
+ // Check the conflict queue second
+ for (LockRequest* it = conflictList._front; it != nullptr; it = it->next) {
+ if (it->locker->getId() == lockerId) {
+ return it;
+ }
+ }
+
+ return nullptr;
+}
+
+LockResult LockHead::newRequest(LockRequest* request) {
+ invariant(!request->partitionedLock);
+ request->lock = this;
+
+ // We cannot set request->partitioned to false, as this might be a migration, in which case
+ // access to that field is not protected. The 'partitioned' member instead indicates if a
+ // request was initially partitioned.
+
+ // New lock request. Queue after all granted modes and after any already requested
+ // conflicting modes
+ if (conflicts(request->mode, grantedModes) ||
+ (!compatibleFirstCount && conflicts(request->mode, conflictModes))) {
+ request->status = LockRequest::STATUS_WAITING;
+
+ // Put it on the conflict queue. Conflicts are granted front to back.
+ if (request->enqueueAtFront) {
+ conflictList.push_front(request);
+ } else {
+ conflictList.push_back(request);
+ }
+
+ incConflictModeCount(request->mode);
+
+ return LOCK_WAITING;
+ }
+
+ // No conflict, new request
+ request->status = LockRequest::STATUS_GRANTED;
+
+ grantedList.push_back(request);
+ incGrantedModeCount(request->mode);
+
+ if (request->compatibleFirst) {
+ compatibleFirstCount++;
+ }
+
+ return LOCK_OK;
+}
+
void LockHead::migratePartitionedLockHeads() {
invariant(partitioned());
@@ -403,6 +280,38 @@ void LockHead::migratePartitionedLockHeads() {
}
}
+void LockHead::incGrantedModeCount(LockMode mode) {
+ invariant(grantedCounts[mode] >= 0);
+ if (++grantedCounts[mode] == 1) {
+ invariant((grantedModes & modeMask(mode)) == 0);
+ grantedModes |= modeMask(mode);
+ }
+}
+
+void LockHead::decGrantedModeCount(LockMode mode) {
+ invariant(grantedCounts[mode] >= 1);
+ if (--grantedCounts[mode] == 0) {
+ invariant((grantedModes & modeMask(mode)) == modeMask(mode));
+ grantedModes &= ~modeMask(mode);
+ }
+}
+
+void LockHead::incConflictModeCount(LockMode mode) {
+ invariant(conflictCounts[mode] >= 0);
+ if (++conflictCounts[mode] == 1) {
+ invariant((conflictModes & modeMask(mode)) == 0);
+ conflictModes |= modeMask(mode);
+ }
+}
+
+void LockHead::decConflictModeCount(LockMode mode) {
+ invariant(conflictCounts[mode] >= 1);
+ if (--conflictCounts[mode] == 0) {
+ invariant((conflictModes & modeMask(mode)) == modeMask(mode));
+ conflictModes &= ~modeMask(mode);
+ }
+}
+
//
// LockManager
//
@@ -1204,4 +1113,8 @@ const char* lockRequestStatusName(LockRequest::Status status) {
return LockRequestStatusNames[status];
}
+LockManager::TemporaryResourceQueue::TemporaryResourceQueue(ResourceId resourceId) {
+ _lockHead.initNew(resourceId);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/concurrency/lock_manager.h b/src/mongo/db/concurrency/lock_manager.h
index 2956232f996..5f481b76911 100644
--- a/src/mongo/db/concurrency/lock_manager.h
+++ b/src/mongo/db/concurrency/lock_manager.h
@@ -57,6 +57,8 @@ public:
LockManager();
~LockManager();
+ class TemporaryResourceQueue;
+
/**
* Acquires lock on the specified resource in the specified mode and returns the outcome
* of the operation. See the details for LockResult for more information on what the
@@ -138,6 +140,10 @@ private:
// The lockheads need access to the partitions
friend struct LockHead;
+ // ReplicationLockManagerManipulator manipulates LockManager state directly in order to
+ // encapsulate specific logic for replication state transitions.
+ friend class ReplicationLockManagerManipulator;
+
// These types describe the locks hash table
struct LockBucket {
@@ -200,7 +206,7 @@ private:
*
* @param lock Lock whose grant state should be recalculated.
* @param checkConflictQueue Whether to go through the conflict queue. This is an
- * optimisation in that we only need to check the conflict queue if one of the
+ * optimization in that we only need to check the conflict queue if one of the
* granted modes, which was conflicting before became zero.
*/
void _onLockModeChanged(LockHead* lock, bool checkConflictQueue);
@@ -313,4 +319,146 @@ private:
bool _foundCycle;
};
+/**
+ * There is one of these objects for each resource that has a lock request. Empty objects (i.e.
+ * LockHead with no requests) are allowed to exist on the lock manager's hash table.
+ *
+ * The memory and lifetime is controlled entirely by the LockManager class.
+ *
+ * Not thread-safe and should only be accessed under the LockManager's bucket lock. Must be locked
+ * before locking a partition, not after.
+ *
+ * This struct *should* just be a private sub-class of LockManager since it is effectively an
+ * implementation detail of LockManager. The reason it isn't is because it is also used by
+ * ReplicationLockManagerManipulator and because it is forward-declared in lock_manager_defs.h.
+ * Nevertheless this struct should be thought of as an implementation detail of LockManager and
+ * should not be used directly except by LockManager and friend classes of LockManager that serve to
+ * extend LockManager functionality.
+ */
+struct LockHead {
+private:
+ friend class DeadlockDetector;
+ friend class LockManager;
+ friend class ReplicationLockManagerManipulator;
+
+ /**
+ * Used for initialization of a LockHead, which might have been retrieved from cache and also in
+ * order to keep the LockHead structure a POD.
+ */
+ void initNew(ResourceId resId);
+
+ /**
+ * True iff there may be partitions with granted requests for this resource.
+ */
+ bool partitioned() const;
+
+ /**
+ * Locates the request corresponding to the particular locker or returns nullptr. Must be called
+ * with the bucket holding this lock head locked.
+ */
+ LockRequest* findRequest(LockerId lockerId) const;
+
+ /**
+ * Finish creation of request and put it on the LockHead's conflict or granted queues. Returns
+ * LOCK_WAITING for conflict case and LOCK_OK otherwise.
+ */
+ LockResult newRequest(LockRequest* request);
+
+ /**
+ * Lock each partitioned LockHead in turn, and move any (granted) intent mode requests for
+ * lock->resourceId to lock, which must itself already be locked.
+ */
+ void migratePartitionedLockHeads();
+
+ // Methods to maintain the granted queue
+ void incGrantedModeCount(LockMode mode);
+
+ void decGrantedModeCount(LockMode mode);
+
+ // Methods to maintain the conflict queue
+ void incConflictModeCount(LockMode mode);
+
+ void decConflictModeCount(LockMode mode);
+
+ // Id of the resource which is protected by this lock. Initialized at construction time and does
+ // not change.
+ ResourceId resourceId;
+
+ //
+ // Granted queue
+ //
+
+ // Doubly-linked list of requests, which have been granted. Newly granted requests go to
+ // the end of the queue. Conversion requests are granted from the beginning forward.
+ LockRequestList grantedList;
+
+ // Counts the grants and conversion counts for each of the supported lock modes. These
+ // counts should exactly match the aggregated modes on the granted list.
+ uint32_t grantedCounts[LockModesCount];
+
+ // Bit-mask of the granted + converting modes on the granted queue. Maintained in lock-step
+ // with the grantedCounts array.
+ uint32_t grantedModes;
+
+ //
+ // Conflict queue
+ //
+
+ // Doubly-linked list of requests, which have not been granted yet because they conflict
+ // with the set of granted modes. Requests are queued at the end of the queue and are
+ // granted from the beginning forward, which gives these locks FIFO ordering. Exceptions
+ // are high-priority locks, such as the MMAP V1 flush lock.
+ LockRequestList conflictList;
+
+ // Counts the conflicting requests for each of the lock modes. These counts should exactly
+ // match the aggregated modes on the conflicts list.
+ uint32_t conflictCounts[LockModesCount];
+
+ // Bit-mask of the conflict modes on the conflict queue. Maintained in lock-step with the
+ // conflictCounts array.
+ uint32_t conflictModes;
+
+ // References partitions that may have PartitionedLockHeads for this LockHead.
+ // Non-empty implies the lock has no conflicts and only has intent modes as grantedModes.
+ // TODO: Remove this vector and make LockHead a POD
+ std::vector<LockManager::Partition*> partitions;
+
+ //
+ // Conversion
+ //
+
+ // Counts the number of requests on the granted queue, which have requested any kind of
+ // conflicting conversion and are blocked (i.e. all requests which are currently
+ // STATUS_CONVERTING). This is an optimization for unlocking in that we do not need to
+ // check the granted queue for requests in STATUS_CONVERTING if this count is zero. This
+ // saves cycles in the regular case and only burdens the less-frequent lock upgrade case.
+ uint32_t conversionsCount;
+
+ // Counts the number of requests on the granted queue, which have requested that the policy
+ // be switched to compatible-first. As long as this value is > 0, the policy will stay
+ // compatible-first.
+ uint32_t compatibleFirstCount;
+};
+
+/**
+ * This class wraps a LockHead and is used to represent a temporary state for a resource managed by
+ * the LockManager. This allows us to prepare lock requests against a resource without those
+ * requests actually being present in the "true" version of the resource which is actually being
+ * managed by the LockManager. We use this to avoid exposing LockHead, which is an implementation
+ * detail of the LockManager, while still providing a handle to state for a LockManager resource.
+ */
+class LockManager::TemporaryResourceQueue {
+public:
+ explicit TemporaryResourceQueue(ResourceId resourceId);
+
+ ResourceId getResourceId() const {
+ return _lockHead.resourceId;
+ }
+
+private:
+ friend class ReplicationLockManagerManipulator;
+
+ LockHead _lockHead;
+};
+
} // namespace mongo
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index e526931d785..576d476abae 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -34,6 +34,7 @@
#include <vector>
+#include "mongo/db/concurrency/replication_lock_manager_manipulator.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/service_context.h"
#include "mongo/platform/compiler.h"
@@ -120,6 +121,12 @@ AtomicUInt64 idCounter(0);
// Partitioned global lock statistics, so we don't hit the same bucket
PartitionedInstanceWideLockStats globalStats;
+// When this is set, lock acquisitions for the global resource go into the TemporaryRequestQueue
+// stored in this decoration instead of letting the LockManager look up and put the requests into
+// the true global resource state.
+OperationContext::Decoration<LockManager::TemporaryResourceQueue*> globalResourceShadow =
+ OperationContext::declareDecoration<LockManager::TemporaryResourceQueue*>();
+
} // namespace
bool LockerImpl::_shouldDelayUnlock(ResourceId resId, LockMode mode) const {
@@ -263,7 +270,8 @@ LockerImpl::~LockerImpl() {
invariant(!inAWriteUnitOfWork());
invariant(_numResourcesToUnlockAtEndUnitOfWork == 0);
invariant(_requests.empty());
- invariant(_modeForTicket == MODE_NONE);
+ invariant(_modeForTicket == MODE_NONE,
+ str::stream() << "_modeForTicket found: " << _modeForTicket);
// Reset the locking statistics so the object can be reused
_stats.reset();
@@ -349,7 +357,7 @@ LockResult LockerImpl::_lockGlobalBegin(OperationContext* opCtx, LockMode mode,
}
LockMode actualLockMode = mode;
- if (opCtx) {
+ if (opCtx && opCtx->getServiceContext()) {
auto storageEngine = opCtx->getServiceContext()->getStorageEngine();
if (storageEngine && !storageEngine->supportsDBLocking()) {
actualLockMode = isSharedLockMode(mode) ? MODE_S : MODE_X;
@@ -361,7 +369,7 @@ LockResult LockerImpl::_lockGlobalBegin(OperationContext* opCtx, LockMode mode,
// Currently, deadlock detection does not happen inline with lock acquisition so the only
// unsuccessful result that the lock manager would return is LOCK_WAITING.
- invariant(result == LOCK_WAITING);
+ invariant(result == LOCK_WAITING, str::stream() << "Unexpected lock result: " << result);
return result;
}
@@ -656,6 +664,42 @@ void LockerImpl::restoreLockState(OperationContext* opCtx, const Locker::LockSna
invariant(_modeForTicket != MODE_NONE);
}
+void LockerImpl::restoreLockStateWithTemporaryGlobalResource(
+ OperationContext* opCtx,
+ const LockSnapshot& state,
+ LockManager::TemporaryResourceQueue* tempGlobalResource) {
+ invariant(tempGlobalResource->getResourceId().getType() == ResourceType::RESOURCE_GLOBAL);
+ invariant(globalResourceShadow(opCtx) == nullptr);
+
+ globalResourceShadow(opCtx) = tempGlobalResource;
+ ON_BLOCK_EXIT([&] { globalResourceShadow(opCtx) = nullptr; });
+
+ restoreLockState(opCtx, state);
+}
+
+void LockerImpl::replaceGlobalLockStateWithTemporaryGlobalResource(
+ LockManager::TemporaryResourceQueue* tempGlobalResource) {
+ invariant(tempGlobalResource->getResourceId().getType() == ResourceType::RESOURCE_GLOBAL);
+
+ // Transfer the LockRequests from tempGlobalResource into the true resource for the global lock
+ // that is managed by the LockManager. This also removes the existing MODE_X LockRequest from
+ // the granted list for the true global resource, but we still need to delete that LockRequest
+ // and remove it from this Locker's _requests list.
+ ReplicationLockManagerManipulator(&globalLockManager)
+ .replaceGlobalLocksWithLocksFromTemporaryGlobalResource(resourceIdGlobal,
+ tempGlobalResource);
+
+ // Release the ticket that this Locker was holding for the global X lock.
+ invariant(_modeForTicket == MODE_X);
+ _releaseTicket();
+ _modeForTicket = MODE_NONE;
+
+ // Now fully delete the LockRequest.
+ auto it = _requests.find(resourceIdGlobal);
+ scoped_spinlock scopedLock(_lock);
+ it.remove();
+}
+
LockResult LockerImpl::lockBegin(OperationContext* opCtx, ResourceId resId, LockMode mode) {
dassert(!getWaitingResource().isValid());
@@ -711,8 +755,21 @@ LockResult LockerImpl::lockBegin(OperationContext* opCtx, ResourceId resId, Lock
// otherwise we might reset state if the lock becomes granted very fast.
_notify.clear();
- LockResult result = isNew ? globalLockManager.lock(resId, request, mode)
- : globalLockManager.convert(resId, request, mode);
+ LockResult result{LockResult::LOCK_INVALID};
+ if (resType == RESOURCE_GLOBAL && opCtx && globalResourceShadow(opCtx)) {
+ // If we're trying to lock the global resource and we have a temporary global resource
+ // installed, use the temporary resource instead of letting the LockManager look up the
+ // true resource for the global lock.
+ invariant(isNew);
+ ReplicationLockManagerManipulator(&globalLockManager)
+ .lockUncontestedTemporaryGlobalResource(globalResourceShadow(opCtx), request, mode);
+ // lockUncontestedTemporaryGlobalResource can't fail.
+ result = LockResult::LOCK_OK;
+ } else {
+ // Normal case using the true global lock head.
+ result = isNew ? globalLockManager.lock(resId, request, mode)
+ : globalLockManager.convert(resId, request, mode);
+ }
if (result == LOCK_WAITING) {
globalStats.recordWait(_id, resId, mode);
diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h
index daf86c3eb5e..8eb9ec4053c 100644
--- a/src/mongo/db/concurrency/lock_state.h
+++ b/src/mongo/db/concurrency/lock_state.h
@@ -193,6 +193,14 @@ public:
restoreLockState(nullptr, stateToRestore);
}
+ void restoreLockStateWithTemporaryGlobalResource(
+ OperationContext* opCtx,
+ const LockSnapshot& stateToRestore,
+ LockManager::TemporaryResourceQueue* tempGlobalResource) override;
+
+ void replaceGlobalLockStateWithTemporaryGlobalResource(
+ LockManager::TemporaryResourceQueue* tempGlobalResource) override;
+
virtual void releaseTicket();
virtual void reacquireTicket(OperationContext* opCtx);
diff --git a/src/mongo/db/concurrency/lock_state_test.cpp b/src/mongo/db/concurrency/lock_state_test.cpp
index 4d2da27bd94..3ee780e364b 100644
--- a/src/mongo/db/concurrency/lock_state_test.cpp
+++ b/src/mongo/db/concurrency/lock_state_test.cpp
@@ -37,6 +37,7 @@
#include "mongo/config.h"
#include "mongo/db/concurrency/lock_manager_test_help.h"
#include "mongo/db/concurrency/locker.h"
+#include "mongo/db/operation_context_noop.h"
#include "mongo/unittest/unittest.h"
#include "mongo/util/log.h"
#include "mongo/util/timer.h"
@@ -657,4 +658,102 @@ TEST(LockerImpl, ConvertLockPendingUnlockAndUnlock) {
locker.unlockGlobal();
}
+/**
+ * This test mimics the process that replica set state transitions go through.
+ * First a request for the global X lock is enqueued, then the existing locks held by various
+ * ongoing prepared transactions are yielded so that the global X lock request can succeed.
+ * When the state transition is complete we then need to atomically restore the locks that were
+ * yielded while dropping the global X lock that was held for the transition.
+ */
+TEST(LockerImpl, AtomicLockRestoreAndGlobalLockReleaseForReplStateTransitions) {
+ const ResourceId globalResId(RESOURCE_GLOBAL, ResourceId::SINGLETON_GLOBAL);
+ const ResourceId resIdDatabase(RESOURCE_DATABASE, "TestDB"_sd);
+ const ResourceId resIdCollection1(RESOURCE_COLLECTION, "TestDB.collection1"_sd);
+ const ResourceId resIdCollection2(RESOURCE_COLLECTION, "TestDB.collection2"_sd);
+ const ResourceId resIdCollection3(RESOURCE_COLLECTION, "TestDB.collection2"_sd);
+
+ Locker::LockSnapshot lockInfo1, lockInfo2, lockInfo3;
+ LockerImpl stepUpLocker, txnLocker1, txnLocker2, txnLocker3, randomOpLocker1, randomOpLocker2;
+ OperationContextNoop opCtx1, opCtx2, opCtx3;
+ ON_BLOCK_EXIT([&] {
+ // clean up locks on test completion.
+ stepUpLocker.unlockGlobal();
+ txnLocker1.unlockGlobal();
+ txnLocker2.unlockGlobal();
+ txnLocker3.unlockGlobal();
+ randomOpLocker1.unlockGlobal();
+ randomOpLocker2.unlockGlobal();
+ });
+
+ // Take some locks, mimicking the locks that would be held by ongoing prepared transactions.
+ txnLocker1.lockGlobal(&opCtx1, MODE_IX);
+ ASSERT_EQUALS(LOCK_OK, txnLocker1.lock(&opCtx1, resIdDatabase, MODE_IX));
+ ASSERT_EQUALS(LOCK_OK, txnLocker1.lock(&opCtx1, resIdCollection1, MODE_IX));
+ ASSERT_EQUALS(LOCK_OK, txnLocker1.lock(&opCtx1, resIdCollection2, MODE_IX));
+
+ txnLocker2.lockGlobal(&opCtx2, MODE_IX);
+ ASSERT_EQUALS(LOCK_OK, txnLocker2.lock(&opCtx2, resIdDatabase, MODE_IX));
+ ASSERT_EQUALS(LOCK_OK, txnLocker2.lock(&opCtx2, resIdCollection2, MODE_IX));
+
+ txnLocker3.lockGlobal(&opCtx3, MODE_IX);
+ ASSERT_EQUALS(LOCK_OK, txnLocker3.lock(&opCtx3, resIdDatabase, MODE_IX));
+ ASSERT_EQUALS(LOCK_OK, txnLocker3.lock(&opCtx3, resIdCollection3, MODE_IX));
+
+ // Enqueue request for global X lock in stepUpLocker, mimicking the lock held by the thread
+ // performing the state transition.
+ ASSERT_EQUALS(LockResult::LOCK_WAITING, stepUpLocker.lockGlobalBegin(MODE_X, Date_t::max()));
+
+ // Enqueue a lock request behind the pending global X request to ensure that it gets granted
+ // later when the global X lock is released.
+ ASSERT_EQUALS(LockResult::LOCK_WAITING,
+ randomOpLocker1.lockGlobalBegin(MODE_IS, Date_t::max()));
+
+ // Yield locks on all txn threads.
+ ASSERT_TRUE(txnLocker1.saveLockStateAndUnlock(&lockInfo1));
+ ASSERT_TRUE(txnLocker2.saveLockStateAndUnlock(&lockInfo2));
+ ASSERT_TRUE(txnLocker3.saveLockStateAndUnlock(&lockInfo3));
+
+ // Ensure that stepUpLocker is now able to acquire the global X lock.
+ ASSERT_EQUALS(LockResult::LOCK_OK, stepUpLocker.lockGlobalComplete(Date_t::max()));
+
+ // Enqueue a lock request behind the global X lock to ensure that it gets granted
+ // later when the global X lock is released.
+ ASSERT_EQUALS(LockResult::LOCK_WAITING,
+ randomOpLocker2.lockGlobalBegin(MODE_IX, Date_t::max()));
+
+ // Now we need to atomically release the global X lock from stepUpLocker and restore the lock
+ // state for the txn threads. We start by restoring the lock state from the txn threads with
+ // the global locks placed into a temporary LockHead.
+ LockManager::TemporaryResourceQueue tempGlobalResource(globalResId);
+
+ txnLocker1.restoreLockStateWithTemporaryGlobalResource(&opCtx1, lockInfo1, &tempGlobalResource);
+ txnLocker2.restoreLockStateWithTemporaryGlobalResource(&opCtx2, lockInfo2, &tempGlobalResource);
+ txnLocker3.restoreLockStateWithTemporaryGlobalResource(&opCtx3, lockInfo3, &tempGlobalResource);
+
+ // Atomically release the global X lock from stepUpLocker and move the global locks for the
+ // txn threads from the temporary LockHead into the true LockHead for the global lock.
+ stepUpLocker.replaceGlobalLockStateWithTemporaryGlobalResource(&tempGlobalResource);
+
+ // Make sure all the locks were acquired and released appropriately.
+ ASSERT_EQUALS(MODE_NONE, stepUpLocker.getLockMode(globalResId));
+
+ ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(globalResId));
+ ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(resIdDatabase));
+ ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(resIdCollection1));
+ ASSERT_EQUALS(MODE_IX, txnLocker1.getLockMode(resIdCollection2));
+
+ ASSERT_EQUALS(MODE_IX, txnLocker2.getLockMode(globalResId));
+ ASSERT_EQUALS(MODE_IX, txnLocker2.getLockMode(resIdDatabase));
+ ASSERT_EQUALS(MODE_IX, txnLocker2.getLockMode(resIdCollection2));
+
+ ASSERT_EQUALS(MODE_IX, txnLocker3.getLockMode(globalResId));
+ ASSERT_EQUALS(MODE_IX, txnLocker3.getLockMode(resIdDatabase));
+ ASSERT_EQUALS(MODE_IX, txnLocker3.getLockMode(resIdCollection3));
+
+ // Make sure the pending global lock requests got granted when the global X lock was released.
+ ASSERT_EQUALS(LockResult::LOCK_OK, randomOpLocker1.lockGlobalComplete(Date_t::now()));
+ ASSERT_EQUALS(LockResult::LOCK_OK, randomOpLocker2.lockGlobalComplete(Date_t::now()));
+ ASSERT_EQUALS(MODE_IS, randomOpLocker1.getLockMode(globalResId));
+ ASSERT_EQUALS(MODE_IX, randomOpLocker2.getLockMode(globalResId));
+}
} // namespace mongo
diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h
index 29b0ca9d364..07e45d052c5 100644
--- a/src/mongo/db/concurrency/locker.h
+++ b/src/mongo/db/concurrency/locker.h
@@ -366,6 +366,26 @@ public:
virtual void restoreLockState(const LockSnapshot& stateToRestore) = 0;
/**
+ * Works like restoreLockState but for any global locks in the state to restore, rather than
+ * restoring them into the true global lock resource owned by the LockManager,
+ * restores the global locks into the TemporaryResourceQueue for the global resource that is
+ * provided. Locks on resources other than the global lock are restored to their true
+ * LockManager-owned resource objects.
+ */
+ virtual void restoreLockStateWithTemporaryGlobalResource(
+ OperationContext* opCtx,
+ const LockSnapshot& stateToRestore,
+ LockManager::TemporaryResourceQueue* tempGlobalResource) = 0;
+
+ /**
+ * Atomically releases the global X lock from the true global resource managed by the
+ * LockManager and transfers the locks from the 'tempGlobalResource' into the true global
+ * resource.
+ */
+ virtual void replaceGlobalLockStateWithTemporaryGlobalResource(
+ LockManager::TemporaryResourceQueue* tempGlobalResource) = 0;
+
+ /**
* Releases the ticket associated with the Locker. This allows locks to be held without
* contributing to reader/writer throttling.
*/
diff --git a/src/mongo/db/concurrency/locker_noop.h b/src/mongo/db/concurrency/locker_noop.h
index 232b0b5f685..957627c8124 100644
--- a/src/mongo/db/concurrency/locker_noop.h
+++ b/src/mongo/db/concurrency/locker_noop.h
@@ -178,6 +178,18 @@ public:
MONGO_UNREACHABLE;
}
+ void restoreLockStateWithTemporaryGlobalResource(
+ OperationContext* opCtx,
+ const LockSnapshot& stateToRestore,
+ LockManager::TemporaryResourceQueue* tempGlobalResource) override {
+ MONGO_UNREACHABLE;
+ }
+
+ void replaceGlobalLockStateWithTemporaryGlobalResource(
+ LockManager::TemporaryResourceQueue* tempGlobalResource) override {
+ MONGO_UNREACHABLE;
+ }
+
virtual void releaseTicket() {
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp b/src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp
new file mode 100644
index 00000000000..009bbbe3c20
--- /dev/null
+++ b/src/mongo/db/concurrency/replication_lock_manager_manipulator.cpp
@@ -0,0 +1,105 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kDefault
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/concurrency/replication_lock_manager_manipulator.h"
+
+namespace mongo {
+
+ReplicationLockManagerManipulator::ReplicationLockManagerManipulator(LockManager* lockManager)
+ : _lockManager(lockManager) {}
+
+void ReplicationLockManagerManipulator::lockUncontestedTemporaryGlobalResource(
+ LockManager::TemporaryResourceQueue* tempGlobalResource, LockRequest* request, LockMode mode) {
+ // Sanity check that requests are not being reused without proper cleanup
+ invariant(request->status == LockRequest::STATUS_NEW);
+ invariant(request->recursiveCount == 1);
+ invariant(!request->partitioned);
+ invariant(tempGlobalResource->_lockHead.resourceId.getType() == ResourceType::RESOURCE_GLOBAL);
+
+ request->mode = mode;
+ const auto lockResult = tempGlobalResource->_lockHead.newRequest(request);
+ invariant(lockResult == LockResult::LOCK_OK);
+}
+
+void ReplicationLockManagerManipulator::replaceGlobalLocksWithLocksFromTemporaryGlobalResource(
+ ResourceId resId, LockManager::TemporaryResourceQueue* tempGlobalResource) {
+ invariant(resId.getType() == ResourceType::RESOURCE_GLOBAL);
+ invariant(tempGlobalResource->_lockHead.resourceId == resId);
+
+ LockManager::LockBucket* bucket = _lockManager->_getBucket(resId);
+ stdx::lock_guard<SimpleMutex> scopedLock(bucket->mutex);
+ LockHead* trueGlobalLockHead = bucket->findOrInsert(resId);
+
+ LockHead* tempGlobalLockHead = &tempGlobalResource->_lockHead;
+
+ invariant(trueGlobalLockHead->grantedCounts[MODE_X] == 1);
+ invariant(trueGlobalLockHead->compatibleFirstCount == 1);
+ invariant(tempGlobalLockHead->conflictList.empty());
+
+ LockRequest* existingGlobalLockRequest = trueGlobalLockHead->grantedList._front;
+ invariant(!existingGlobalLockRequest->next);
+ invariant(existingGlobalLockRequest->mode == MODE_X);
+ invariant(existingGlobalLockRequest->status == LockRequest::Status::STATUS_GRANTED);
+
+ // Remove the existing granted MODE_X lock from the trueGlobalLockHead so it can be replaced
+ // by the locks from tempGlobalLockHead.
+ trueGlobalLockHead->grantedList.remove(existingGlobalLockRequest);
+ trueGlobalLockHead->decGrantedModeCount(existingGlobalLockRequest->mode);
+ trueGlobalLockHead->compatibleFirstCount--;
+
+ // Now iterate over the granted LockRequests in the tempGlobalLockHead and transfer them over
+ // to the trueGlobalLockHead.
+ for (LockRequest* it = tempGlobalLockHead->grantedList._front; it != nullptr;) {
+ LockRequest* next = it->next;
+
+ invariant(it->mode == MODE_IX);
+ invariant(it->status == LockRequest::Status::STATUS_GRANTED);
+ invariant(it->lock == tempGlobalLockHead);
+
+ it->lock = trueGlobalLockHead;
+ tempGlobalLockHead->grantedList.remove(it);
+ tempGlobalLockHead->decGrantedModeCount(it->mode);
+ trueGlobalLockHead->grantedList.push_back(it);
+ trueGlobalLockHead->incGrantedModeCount(it->mode);
+
+ it = next;
+ }
+ invariant(tempGlobalLockHead->grantedList.empty());
+ invariant(tempGlobalLockHead->grantedCounts[MODE_X] == 0);
+ invariant(tempGlobalLockHead->grantedModes == 0);
+
+ // Grant any pending requests against the true global lock head that can proceed now that the
+ // global X lock has been released.
+ _lockManager->_onLockModeChanged(trueGlobalLockHead, true);
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/concurrency/replication_lock_manager_manipulator.h b/src/mongo/db/concurrency/replication_lock_manager_manipulator.h
new file mode 100644
index 00000000000..44048de2eb2
--- /dev/null
+++ b/src/mongo/db/concurrency/replication_lock_manager_manipulator.h
@@ -0,0 +1,71 @@
+/**
+ * Copyright (C) 2018 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/concurrency/lock_manager.h"
+
+namespace mongo {
+
+/**
+ * This friend class to the LockManager extends LockManager functionality to enable behaviors
+ * required for replication state transitions, which must atomically release the global X lock and
+ * restore locks for prepared transactions into their individual Lockers.
+ */
+class ReplicationLockManagerManipulator {
+ MONGO_DISALLOW_COPYING(ReplicationLockManagerManipulator);
+
+public:
+ explicit ReplicationLockManagerManipulator(LockManager* lockManager);
+ ~ReplicationLockManagerManipulator() = default;
+
+ /**
+ * Works like LockManager::lock() except that it only works for the Global lock and rather than
+ * looking up the true LockHead for the global resource ID, it puts the LockRequest into the
+ * given TemporaryResourceQueue, which is guaranteed not to have any conflicting locks for the
+ * given request.
+ */
+ void lockUncontestedTemporaryGlobalResource(
+ LockManager::TemporaryResourceQueue* tempGlobalResource,
+ LockRequest* request,
+ LockMode mode);
+
+ /**
+ * Takes the locks from a given TemporaryResourceQueue for the Global resource and moves them
+ * into the true LockHead for the global resource, atomically releasing the global X lock from
+ * the true LockHead for the global resource in the process.
+ */
+ void replaceGlobalLocksWithLocksFromTemporaryGlobalResource(
+ ResourceId resId, LockManager::TemporaryResourceQueue* tempGlobalResource);
+
+
+private:
+ LockManager* _lockManager;
+};
+
+} // namespace mongo