diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2014-11-18 11:35:22 -0500 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2014-11-19 15:28:20 -0500 |
commit | 2304904687c5d29e228d86da95244682dc62caa1 (patch) | |
tree | e4e999bf58fe0204f287e157b7bea4ed8b8afa77 | |
parent | 489690cfbccc24cf69b6ae7848581303ec8f4b0e (diff) | |
download | mongo-2304904687c5d29e228d86da95244682dc62caa1.tar.gz |
SERVER-16194 Add a lockGlobalBegin capability for the global lock
This allows replication step down to queue itself for acquiring the global
lock and then go and kill all owners and wait afterwards.
-rw-r--r-- | src/mongo/db/concurrency/d_concurrency_test.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.cpp | 260 | ||||
-rw-r--r-- | src/mongo/db/concurrency/lock_state.h | 21 | ||||
-rw-r--r-- | src/mongo/db/concurrency/locker.h | 16 |
4 files changed, 174 insertions, 141 deletions
diff --git a/src/mongo/db/concurrency/d_concurrency_test.cpp b/src/mongo/db/concurrency/d_concurrency_test.cpp index 141baf48d8f..e0b6be8ebd7 100644 --- a/src/mongo/db/concurrency/d_concurrency_test.cpp +++ b/src/mongo/db/concurrency/d_concurrency_test.cpp @@ -86,9 +86,9 @@ namespace mongo { } } - TEST(DConcurrency, readlocktryNoTimeoutDueToFlushLockS) { + TEST(DConcurrency, readlocktryNoTimeoutDueToGlobalLockS) { MMAPV1LockerImpl ls(1); - AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls); + Lock::GlobalRead globalRead(&ls); MMAPV1LockerImpl lsTry(2); readlocktry lockTry(&lsTry, 1); @@ -96,9 +96,9 @@ namespace mongo { ASSERT(lockTry.got()); } - TEST(DConcurrency, writelocktryTimeoutDueToFlushLockS) { + TEST(DConcurrency, writelocktryTimeoutDueToGlobalLockS) { MMAPV1LockerImpl ls(1); - AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls); + Lock::GlobalRead globalRead(&ls); MMAPV1LockerImpl lsTry(2); writelocktry lockTry(&lsTry, 1); @@ -106,10 +106,9 @@ namespace mongo { ASSERT(!lockTry.got()); } - TEST(DConcurrency, readlocktryTimeoutDueToFlushLockX) { + TEST(DConcurrency, readlocktryTimeoutDueToGlobalLockX) { MMAPV1LockerImpl ls(1); - AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls); - autoFlushLock.upgradeFlushLockToExclusive(); + Lock::GlobalWrite globalWrite(&ls); MMAPV1LockerImpl lsTry(2); readlocktry lockTry(&lsTry, 1); @@ -117,10 +116,9 @@ namespace mongo { ASSERT(!lockTry.got()); } - TEST(DConcurrency, writelocktryTimeoutDueToFlushLockX) { + TEST(DConcurrency, writelocktryTimeoutDueToGlobalLockX) { MMAPV1LockerImpl ls(1); - AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls); - autoFlushLock.upgradeFlushLockToExclusive(); + Lock::GlobalWrite globalWrite(&ls); MMAPV1LockerImpl lsTry(2); writelocktry lockTry(&lsTry, 1); diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp index 59bf9e5955b..450f20e84ef 100644 --- a/src/mongo/db/concurrency/lock_state.cpp +++ b/src/mongo/db/concurrency/lock_state.cpp @@ -40,113 +40,115 @@ #include "mongo/util/stacktrace.h" namespace mongo { +namespace { + + // Global lock manager instance. + LockManager globalLockManager; + + // Global lock. Every server operation, which uses the Locker must acquire this lock at least + // once. See comments in the header file (begin/endTransaction) for more information. + const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL); + + // Flush lock. This is only used for the MMAP V1 storage engine and synchronizes journal writes + // to the shared view and remaps. See the comments in the header for information on how MMAP V1 + // concurrency control works. + const ResourceId resourceIdMMAPV1Flush = ResourceId(RESOURCE_MMAPV1_FLUSH, 2ULL); + + // How often (in millis) to check for deadlock if a lock has not been granted for some time + const unsigned DeadlockTimeoutMs = 100; + + /** + * Used to sort locks by granularity when snapshotting lock state. We must report and reacquire + * locks in the same granularity in which they are acquired (i.e. global, flush, database, + * collection, etc). + */ + struct SortByGranularity { + inline bool operator()(const Locker::OneLock& lhs, const Locker::OneLock& rhs) const { + return lhs.resourceId.getType() < rhs.resourceId.getType(); + } + }; + + /** + * Returns whether the passed in mode is S or IS. Used for validation checks. + */ + bool isSharedMode(LockMode mode) { + return (mode == MODE_IS || mode == MODE_S); + } + + /** + * Whether the particular lock's release should be held until the end of the operation. We + * delay release of exclusive locks (locks that are for write operations) in order to ensure + * that the data they protect is committed successfully. + */ + bool shouldDelayUnlock(ResourceId resId, LockMode mode) { + // Global and flush lock are not used to protect transactional resources and as such, they + // need to be acquired and released when requested. + if (resId == resourceIdGlobal) { + return false; + } - namespace { - - // Global lock manager instance. - LockManager globalLockManager; - - // Global lock. Every server operation, which uses the Locker must acquire this lock at - // least once. See comments in the header file (begin/endTransaction) for more information - // on its use. - const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL); - - // Flush lock. This is only used for the MMAP V1 storage engine and synchronizes the - // application of journal writes to the shared view and remaps. See the comments in the - // header for _acquireFlushLockForMMAPV1/_releaseFlushLockForMMAPV1 for more information - // on its use. - const ResourceId resourceIdMMAPV1Flush = ResourceId(RESOURCE_MMAPV1_FLUSH, 2ULL); - - // How often (in millis) to check for deadlock if a lock has not been granted for some time - const unsigned DeadlockTimeoutMs = 100; - - /** - * Used to sort locks by granularity when snapshotting lock state. We must report and - * reacquire locks in the same granularity in which they are acquired (i.e. global, flush, - * database, collection, etc). - */ - struct SortByGranularity { - inline bool operator()(const Locker::OneLock& lhs, const Locker::OneLock& rhs) { - return lhs.resourceId.getType() < rhs.resourceId.getType(); - } - }; - - /** - * Returns whether the passed in mode is S or IS. Used for validation checks. - */ - bool isSharedMode(LockMode mode) { - return (mode == MODE_IS || mode == MODE_S); - } - - /** - * Whether the particular lock's release should be held until the end of the operation. We - * delay releases for exclusive locks (locks that are for write operations) in order to - * ensure that the data they protect is committed successfully. - */ - bool shouldDelayUnlock(ResourceId resId, LockMode mode) { - // Global and flush lock are not used to protect transactional resources and as - // such, they need to be acquired and released when requested. - if (resId == resourceIdGlobal) { - return false; - } - - if (resId == resourceIdMMAPV1Flush) { - return false; - } + if (resId == resourceIdMMAPV1Flush) { + return false; + } - switch (mode) { - case MODE_X: - case MODE_IX: - return true; + switch (mode) { + case MODE_X: + case MODE_IX: + return true; - case MODE_IS: - case MODE_S: - return false; + case MODE_IS: + case MODE_S: + return false; - default: - invariant(false); - } + default: + invariant(false); } + } - /** - * Dumps the contents of the global lock manager to the server log for diagnostics. - */ - const uint64_t LockMgrDumpThrottleMicros = 30 * Timer::microsPerSecond; - AtomicUInt64 lastDumpTimestampMicros(0); + /** + * Dumps the contents of the global lock manager to the server log for diagnostics. + */ + enum { + LockMgrDumpThrottleMillis = 60000, + LockMgrDumpThrottleMicros = LockMgrDumpThrottleMillis * 1000 + }; - void dumpGlobalLockManagerAndCallstackThrottled(const Locker* locker) { - const uint64_t lastDumpMicros = lastDumpTimestampMicros.load(); + AtomicUInt64 lastDumpTimestampMicros(0); - // Don't print too frequently - if (curTimeMicros64() - lastDumpMicros < LockMgrDumpThrottleMicros) return; + void dumpGlobalLockManagerAndCallstackThrottled(const Locker* locker) { + const uint64_t lastDumpMicros = lastDumpTimestampMicros.load(); - // Only one thread should dump the lock manager in order to not pollute the log - if (lastDumpTimestampMicros.compareAndSwap(lastDumpMicros, - curTimeMicros64()) == lastDumpMicros) { + // Don't print too frequently + if (curTimeMicros64() - lastDumpMicros < LockMgrDumpThrottleMicros) return; - log() << "LockerId " << locker->getId() - << " has been waiting to acquire lock for more than 30 seconds. MongoDB will" - << " print the lock manager state and the stack of the thread that has been" - << " waiting, for diagnostic purposes. This message does not necessary imply" - << " that the server is experiencing an outage, but might be an indication" - << " of an overloaded server."; + // Only one thread should dump the lock manager in order to not pollute the log + if (lastDumpTimestampMicros.compareAndSwap(lastDumpMicros, + curTimeMicros64()) == lastDumpMicros) { - // Dump the lock manager state and the stack trace so we can investigate - globalLockManager.dump(); + log() << "LockerId " << locker->getId() + << " has been waiting to acquire lock for more than 30 seconds. MongoDB will" + << " print the lock manager state and the stack of the thread that has been" + << " waiting, for diagnostic purposes. This message does not necessary imply" + << " that the server is experiencing an outage, but might be an indication of" + << " an overload."; - log() << '\n'; - printStackTrace(); + // Dump the lock manager state and the stack trace so we can investigate + globalLockManager.dump(); - // If a deadlock was discovered, the server will never recover from it, so shutdown - DeadlockDetector wfg(globalLockManager, locker); - if (wfg.check().hasCycle()) { - severe() << "Deadlock found during lock acquisition: " << wfg.toString(); - fassertFailed(28557); - } + log() << '\n'; + printStackTrace(); + + // If a deadlock was discovered, the server will never recover from it, so shutdown + DeadlockDetector wfg(globalLockManager, locker); + if (wfg.check().hasCycle()) { + severe() << "Deadlock found during lock acquisition: " << wfg.toString(); + fassertFailed(28557); } } } +} // namespace + template<bool IsForMMAPV1> bool LockerImpl<IsForMMAPV1>::isW() const { @@ -311,47 +313,63 @@ namespace mongo { template<bool IsForMMAPV1> LockResult LockerImpl<IsForMMAPV1>::lockGlobal(LockMode mode, unsigned timeoutMs) { - LockRequestsMap::Iterator it = _requests.find(resourceIdGlobal); - if (!it) { - // Global lock should be the first lock on any operation - invariant(_requests.empty()); + LockResult globalLockResult = lockGlobalBegin(mode); + if (globalLockResult != LOCK_OK) { + // Could only be LOCK_WAITING (checked by lockGlobalComplete) + globalLockResult = lockGlobalComplete(timeoutMs); - // Start counting time since first global lock acquisition (that's when effectively - // any timing for the locker counts from). - _timer.reset(); + // If waiting for the lock failed, no point in asking for the flush lock + if (globalLockResult != LOCK_OK) { + return globalLockResult; + } } - else { - // No upgrades on the GlobalLock are currently necessary. Should not be used until we - // are handling deadlocks on anything other than the flush thread. - invariant(it->mode >= mode); + + // We would have returned above if global lock acquisition failed for any reason + invariant(globalLockResult == LOCK_OK); + + // We are done if this is not MMAP V1 + if (!IsForMMAPV1) { + return LOCK_OK; } - LockResult globalLockResult = lock(resourceIdGlobal, mode, timeoutMs); - if (globalLockResult != LOCK_OK) { - invariant(globalLockResult == LOCK_TIMEOUT); + // Special-handling for MMAP V1 commit concurrency control. We will not obey the timeout + // request to simplify the logic here, since the only places which acquire global lock with + // a timeout is the shutdown code. - return globalLockResult; + // The flush lock always has a reference count of 1, because it is dropped at the end of + // each write unit of work in order to allow the flush thread to run. See the comments in + // the header for information on how the MMAP V1 journaling system works. + const LockRequest* globalLockRequest = _requests.find(resourceIdGlobal).objAddr(); + if (globalLockRequest->recursiveCount > 1){ + return LOCK_OK; } - // Special-handling for MMAP V1 concurrency control - if (IsForMMAPV1 && !it) { - // Obey the requested timeout - const unsigned elapsedTimeMs = _timer.millis(); - const unsigned remainingTimeMs = - elapsedTimeMs < timeoutMs ? (timeoutMs - elapsedTimeMs) : 0; + const LockResult flushLockResult = lock(resourceIdMMAPV1Flush, + _getModeForMMAPV1FlushLock()); + if (flushLockResult != LOCK_OK) { + invariant(flushLockResult == LOCK_TIMEOUT); + invariant(unlock(resourceIdGlobal)); + } + + return flushLockResult; + } + + template<bool IsForMMAPV1> + LockResult LockerImpl<IsForMMAPV1>::lockGlobalComplete(unsigned timeoutMs) { + return lockComplete(resourceIdGlobal, timeoutMs, false); + } - LockResult flushLockResult = - lock(resourceIdMMAPV1Flush, _getModeForMMAPV1FlushLock(), remainingTimeMs); + template<bool IsForMMAPV1> + LockResult LockerImpl<IsForMMAPV1>::lockGlobalBegin(LockMode mode) { + const LockResult result = lockBegin(resourceIdGlobal, mode); - if (flushLockResult != LOCK_OK) { - invariant(flushLockResult == LOCK_TIMEOUT); - invariant(unlock(resourceIdGlobal)); + if (result == LOCK_OK) return LOCK_OK; - return flushLockResult; - } - } + // Currently, deadlock detection does not happen inline with lock acquisition so the only + // unsuccessful result that the lock manager would return is LOCK_WAITING. + invariant(result == LOCK_WAITING); - return LOCK_OK; + return result; } template<bool IsForMMAPV1> @@ -704,7 +722,7 @@ namespace mongo { // This will occasionally dump the global lock manager in case lock acquisition is // taking too long. - if (elapsedTimeMs > 30000U) { + if (elapsedTimeMs > LockMgrDumpThrottleMillis) { dumpGlobalLockManagerAndCallstackThrottled(this); } } diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h index 8dd1125cc14..9aa540dee6e 100644 --- a/src/mongo/db/concurrency/lock_state.h +++ b/src/mongo/db/concurrency/lock_state.h @@ -96,6 +96,9 @@ namespace mongo { virtual LockerId getId() const { return _id; } virtual LockResult lockGlobal(LockMode mode, unsigned timeoutMs = UINT_MAX); + virtual LockResult lockGlobalBegin(LockMode mode); + virtual LockResult lockGlobalComplete(unsigned timeoutMs); + virtual void downgradeGlobalXtoSForMMAPV1(); virtual bool unlockAll(); @@ -127,12 +130,11 @@ namespace mongo { virtual void restoreLockState(const LockSnapshot& stateToRestore); /** - * These two methods allow for lock requests to be acquired in a non-blocking way. There - * can be only one outstanding pending lock request per locker object. I.e., for each call - * to lockBegin, which does not return LOCK_OK, there needs to be a corresponding call to - * lockComplete or unlock. + * Allows for lock requests to be requested in a non-blocking way. There can be only one + * outstanding pending lock request per locker object. * - * lockBegin posts a request to the lock manager for the specified lock to be acquired + * lockBegin posts a request to the lock manager for the specified lock to be acquired, + * which either immediately grants the lock, or puts the requestor on the conflict queue * and returns immediately with the result of the acquisition. The result can be one of: * * LOCK_OK - Nothing more needs to be done. The lock is granted. @@ -141,13 +143,16 @@ namespace mongo { * order to wait for the actual grant to occur. If the caller no longer needs to wait * for the grant to happen, unlock needs to be called with the same resource passed * to lockBegin. + * + * In other words for each call to lockBegin, which does not return LOCK_OK, there needs to + * be a corresponding call to either lockComplete or unlock. */ LockResult lockBegin(ResourceId resId, LockMode mode); /** - * Waits for the completion of a lock, previously requested through lockBegin. Must only be - * called, if lockBegin returned LOCK_WAITING. The resId argument must match what was - * previously passed to lockBegin. + * Waits for the completion of a lock, previously requested through lockBegin or + * lockGlobalBegin. Must only be called, if lockBegin returned LOCK_WAITING. The resId + * argument must match what was previously passed to lockBegin. */ LockResult lockComplete(ResourceId resId, unsigned timeoutMs, bool checkDeadlock); diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h index db684c97f9c..157c380214c 100644 --- a/src/mongo/db/concurrency/locker.h +++ b/src/mongo/db/concurrency/locker.h @@ -62,8 +62,8 @@ namespace mongo { * X - Stops all activity. Used for administrative operations (repl state changes, * shutdown, etc). * - * This method can be called recursively, but each call to beginTransaction must be - * accompanied by a call to endTransaction. + * This method can be called recursively, but each call to lockGlobal must be accompanied + * by a call to unlockAll. * * @param mode Mode in which the global lock should be acquired. Also indicates the intent * of the operation. @@ -77,6 +77,18 @@ namespace mongo { virtual LockResult lockGlobal(LockMode mode, unsigned timeoutMs = UINT_MAX) = 0; /** + * Requests *only* the global lock to be acquired in the specified mode. Does not do the + * full MMAP V1 concurrency control functionality, which acquires the flush lock as well. + * + * Should only be used for cases, where no data reads or writes will be performed, such as + * replication step-down. + * + * See the comments for lockBegin/Complete for more information on the semantics. + */ + virtual LockResult lockGlobalBegin(LockMode mode) = 0; + virtual LockResult lockGlobalComplete(unsigned timeoutMs) = 0; + + /** * Decrements the reference count on the global lock. If the reference count on the * global lock hits zero, the transaction is over, and unlockAll unlocks all other locks. * |