summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2014-11-18 11:35:22 -0500
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2014-11-19 15:28:20 -0500
commit2304904687c5d29e228d86da95244682dc62caa1 (patch)
treee4e999bf58fe0204f287e157b7bea4ed8b8afa77
parent489690cfbccc24cf69b6ae7848581303ec8f4b0e (diff)
downloadmongo-2304904687c5d29e228d86da95244682dc62caa1.tar.gz
SERVER-16194 Add a lockGlobalBegin capability for the global lock
This allows replication step down to queue itself for acquiring the global lock and then go and kill all owners and wait afterwards.
-rw-r--r--src/mongo/db/concurrency/d_concurrency_test.cpp18
-rw-r--r--src/mongo/db/concurrency/lock_state.cpp260
-rw-r--r--src/mongo/db/concurrency/lock_state.h21
-rw-r--r--src/mongo/db/concurrency/locker.h16
4 files changed, 174 insertions, 141 deletions
diff --git a/src/mongo/db/concurrency/d_concurrency_test.cpp b/src/mongo/db/concurrency/d_concurrency_test.cpp
index 141baf48d8f..e0b6be8ebd7 100644
--- a/src/mongo/db/concurrency/d_concurrency_test.cpp
+++ b/src/mongo/db/concurrency/d_concurrency_test.cpp
@@ -86,9 +86,9 @@ namespace mongo {
}
}
- TEST(DConcurrency, readlocktryNoTimeoutDueToFlushLockS) {
+ TEST(DConcurrency, readlocktryNoTimeoutDueToGlobalLockS) {
MMAPV1LockerImpl ls(1);
- AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
+ Lock::GlobalRead globalRead(&ls);
MMAPV1LockerImpl lsTry(2);
readlocktry lockTry(&lsTry, 1);
@@ -96,9 +96,9 @@ namespace mongo {
ASSERT(lockTry.got());
}
- TEST(DConcurrency, writelocktryTimeoutDueToFlushLockS) {
+ TEST(DConcurrency, writelocktryTimeoutDueToGlobalLockS) {
MMAPV1LockerImpl ls(1);
- AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
+ Lock::GlobalRead globalRead(&ls);
MMAPV1LockerImpl lsTry(2);
writelocktry lockTry(&lsTry, 1);
@@ -106,10 +106,9 @@ namespace mongo {
ASSERT(!lockTry.got());
}
- TEST(DConcurrency, readlocktryTimeoutDueToFlushLockX) {
+ TEST(DConcurrency, readlocktryTimeoutDueToGlobalLockX) {
MMAPV1LockerImpl ls(1);
- AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
- autoFlushLock.upgradeFlushLockToExclusive();
+ Lock::GlobalWrite globalWrite(&ls);
MMAPV1LockerImpl lsTry(2);
readlocktry lockTry(&lsTry, 1);
@@ -117,10 +116,9 @@ namespace mongo {
ASSERT(!lockTry.got());
}
- TEST(DConcurrency, writelocktryTimeoutDueToFlushLockX) {
+ TEST(DConcurrency, writelocktryTimeoutDueToGlobalLockX) {
MMAPV1LockerImpl ls(1);
- AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
- autoFlushLock.upgradeFlushLockToExclusive();
+ Lock::GlobalWrite globalWrite(&ls);
MMAPV1LockerImpl lsTry(2);
writelocktry lockTry(&lsTry, 1);
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index 59bf9e5955b..450f20e84ef 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -40,113 +40,115 @@
#include "mongo/util/stacktrace.h"
namespace mongo {
+namespace {
+
+ // Global lock manager instance.
+ LockManager globalLockManager;
+
+ // Global lock. Every server operation, which uses the Locker must acquire this lock at least
+ // once. See comments in the header file (begin/endTransaction) for more information.
+ const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL);
+
+ // Flush lock. This is only used for the MMAP V1 storage engine and synchronizes journal writes
+ // to the shared view and remaps. See the comments in the header for information on how MMAP V1
+ // concurrency control works.
+ const ResourceId resourceIdMMAPV1Flush = ResourceId(RESOURCE_MMAPV1_FLUSH, 2ULL);
+
+ // How often (in millis) to check for deadlock if a lock has not been granted for some time
+ const unsigned DeadlockTimeoutMs = 100;
+
+ /**
+ * Used to sort locks by granularity when snapshotting lock state. We must report and reacquire
+ * locks in the same granularity in which they are acquired (i.e. global, flush, database,
+ * collection, etc).
+ */
+ struct SortByGranularity {
+ inline bool operator()(const Locker::OneLock& lhs, const Locker::OneLock& rhs) const {
+ return lhs.resourceId.getType() < rhs.resourceId.getType();
+ }
+ };
+
+ /**
+ * Returns whether the passed in mode is S or IS. Used for validation checks.
+ */
+ bool isSharedMode(LockMode mode) {
+ return (mode == MODE_IS || mode == MODE_S);
+ }
+
+ /**
+ * Whether the particular lock's release should be held until the end of the operation. We
+ * delay release of exclusive locks (locks that are for write operations) in order to ensure
+ * that the data they protect is committed successfully.
+ */
+ bool shouldDelayUnlock(ResourceId resId, LockMode mode) {
+ // Global and flush lock are not used to protect transactional resources and as such, they
+ // need to be acquired and released when requested.
+ if (resId == resourceIdGlobal) {
+ return false;
+ }
- namespace {
-
- // Global lock manager instance.
- LockManager globalLockManager;
-
- // Global lock. Every server operation, which uses the Locker must acquire this lock at
- // least once. See comments in the header file (begin/endTransaction) for more information
- // on its use.
- const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL);
-
- // Flush lock. This is only used for the MMAP V1 storage engine and synchronizes the
- // application of journal writes to the shared view and remaps. See the comments in the
- // header for _acquireFlushLockForMMAPV1/_releaseFlushLockForMMAPV1 for more information
- // on its use.
- const ResourceId resourceIdMMAPV1Flush = ResourceId(RESOURCE_MMAPV1_FLUSH, 2ULL);
-
- // How often (in millis) to check for deadlock if a lock has not been granted for some time
- const unsigned DeadlockTimeoutMs = 100;
-
- /**
- * Used to sort locks by granularity when snapshotting lock state. We must report and
- * reacquire locks in the same granularity in which they are acquired (i.e. global, flush,
- * database, collection, etc).
- */
- struct SortByGranularity {
- inline bool operator()(const Locker::OneLock& lhs, const Locker::OneLock& rhs) {
- return lhs.resourceId.getType() < rhs.resourceId.getType();
- }
- };
-
- /**
- * Returns whether the passed in mode is S or IS. Used for validation checks.
- */
- bool isSharedMode(LockMode mode) {
- return (mode == MODE_IS || mode == MODE_S);
- }
-
- /**
- * Whether the particular lock's release should be held until the end of the operation. We
- * delay releases for exclusive locks (locks that are for write operations) in order to
- * ensure that the data they protect is committed successfully.
- */
- bool shouldDelayUnlock(ResourceId resId, LockMode mode) {
- // Global and flush lock are not used to protect transactional resources and as
- // such, they need to be acquired and released when requested.
- if (resId == resourceIdGlobal) {
- return false;
- }
-
- if (resId == resourceIdMMAPV1Flush) {
- return false;
- }
+ if (resId == resourceIdMMAPV1Flush) {
+ return false;
+ }
- switch (mode) {
- case MODE_X:
- case MODE_IX:
- return true;
+ switch (mode) {
+ case MODE_X:
+ case MODE_IX:
+ return true;
- case MODE_IS:
- case MODE_S:
- return false;
+ case MODE_IS:
+ case MODE_S:
+ return false;
- default:
- invariant(false);
- }
+ default:
+ invariant(false);
}
+ }
- /**
- * Dumps the contents of the global lock manager to the server log for diagnostics.
- */
- const uint64_t LockMgrDumpThrottleMicros = 30 * Timer::microsPerSecond;
- AtomicUInt64 lastDumpTimestampMicros(0);
+ /**
+ * Dumps the contents of the global lock manager to the server log for diagnostics.
+ */
+ enum {
+ LockMgrDumpThrottleMillis = 60000,
+ LockMgrDumpThrottleMicros = LockMgrDumpThrottleMillis * 1000
+ };
- void dumpGlobalLockManagerAndCallstackThrottled(const Locker* locker) {
- const uint64_t lastDumpMicros = lastDumpTimestampMicros.load();
+ AtomicUInt64 lastDumpTimestampMicros(0);
- // Don't print too frequently
- if (curTimeMicros64() - lastDumpMicros < LockMgrDumpThrottleMicros) return;
+ void dumpGlobalLockManagerAndCallstackThrottled(const Locker* locker) {
+ const uint64_t lastDumpMicros = lastDumpTimestampMicros.load();
- // Only one thread should dump the lock manager in order to not pollute the log
- if (lastDumpTimestampMicros.compareAndSwap(lastDumpMicros,
- curTimeMicros64()) == lastDumpMicros) {
+ // Don't print too frequently
+ if (curTimeMicros64() - lastDumpMicros < LockMgrDumpThrottleMicros) return;
- log() << "LockerId " << locker->getId()
- << " has been waiting to acquire lock for more than 30 seconds. MongoDB will"
- << " print the lock manager state and the stack of the thread that has been"
- << " waiting, for diagnostic purposes. This message does not necessary imply"
- << " that the server is experiencing an outage, but might be an indication"
- << " of an overloaded server.";
+ // Only one thread should dump the lock manager in order to not pollute the log
+ if (lastDumpTimestampMicros.compareAndSwap(lastDumpMicros,
+ curTimeMicros64()) == lastDumpMicros) {
- // Dump the lock manager state and the stack trace so we can investigate
- globalLockManager.dump();
+ log() << "LockerId " << locker->getId()
+ << " has been waiting to acquire lock for more than 30 seconds. MongoDB will"
+ << " print the lock manager state and the stack of the thread that has been"
+ << " waiting, for diagnostic purposes. This message does not necessary imply"
+ << " that the server is experiencing an outage, but might be an indication of"
+ << " an overload.";
- log() << '\n';
- printStackTrace();
+ // Dump the lock manager state and the stack trace so we can investigate
+ globalLockManager.dump();
- // If a deadlock was discovered, the server will never recover from it, so shutdown
- DeadlockDetector wfg(globalLockManager, locker);
- if (wfg.check().hasCycle()) {
- severe() << "Deadlock found during lock acquisition: " << wfg.toString();
- fassertFailed(28557);
- }
+ log() << '\n';
+ printStackTrace();
+
+ // If a deadlock was discovered, the server will never recover from it, so shutdown
+ DeadlockDetector wfg(globalLockManager, locker);
+ if (wfg.check().hasCycle()) {
+ severe() << "Deadlock found during lock acquisition: " << wfg.toString();
+ fassertFailed(28557);
}
}
}
+} // namespace
+
template<bool IsForMMAPV1>
bool LockerImpl<IsForMMAPV1>::isW() const {
@@ -311,47 +313,63 @@ namespace mongo {
template<bool IsForMMAPV1>
LockResult LockerImpl<IsForMMAPV1>::lockGlobal(LockMode mode, unsigned timeoutMs) {
- LockRequestsMap::Iterator it = _requests.find(resourceIdGlobal);
- if (!it) {
- // Global lock should be the first lock on any operation
- invariant(_requests.empty());
+ LockResult globalLockResult = lockGlobalBegin(mode);
+ if (globalLockResult != LOCK_OK) {
+ // Could only be LOCK_WAITING (checked by lockGlobalComplete)
+ globalLockResult = lockGlobalComplete(timeoutMs);
- // Start counting time since first global lock acquisition (that's when effectively
- // any timing for the locker counts from).
- _timer.reset();
+ // If waiting for the lock failed, no point in asking for the flush lock
+ if (globalLockResult != LOCK_OK) {
+ return globalLockResult;
+ }
}
- else {
- // No upgrades on the GlobalLock are currently necessary. Should not be used until we
- // are handling deadlocks on anything other than the flush thread.
- invariant(it->mode >= mode);
+
+ // We would have returned above if global lock acquisition failed for any reason
+ invariant(globalLockResult == LOCK_OK);
+
+ // We are done if this is not MMAP V1
+ if (!IsForMMAPV1) {
+ return LOCK_OK;
}
- LockResult globalLockResult = lock(resourceIdGlobal, mode, timeoutMs);
- if (globalLockResult != LOCK_OK) {
- invariant(globalLockResult == LOCK_TIMEOUT);
+ // Special-handling for MMAP V1 commit concurrency control. We will not obey the timeout
+ // request to simplify the logic here, since the only places which acquire global lock with
+ // a timeout is the shutdown code.
- return globalLockResult;
+ // The flush lock always has a reference count of 1, because it is dropped at the end of
+ // each write unit of work in order to allow the flush thread to run. See the comments in
+ // the header for information on how the MMAP V1 journaling system works.
+ const LockRequest* globalLockRequest = _requests.find(resourceIdGlobal).objAddr();
+ if (globalLockRequest->recursiveCount > 1){
+ return LOCK_OK;
}
- // Special-handling for MMAP V1 concurrency control
- if (IsForMMAPV1 && !it) {
- // Obey the requested timeout
- const unsigned elapsedTimeMs = _timer.millis();
- const unsigned remainingTimeMs =
- elapsedTimeMs < timeoutMs ? (timeoutMs - elapsedTimeMs) : 0;
+ const LockResult flushLockResult = lock(resourceIdMMAPV1Flush,
+ _getModeForMMAPV1FlushLock());
+ if (flushLockResult != LOCK_OK) {
+ invariant(flushLockResult == LOCK_TIMEOUT);
+ invariant(unlock(resourceIdGlobal));
+ }
+
+ return flushLockResult;
+ }
+
+ template<bool IsForMMAPV1>
+ LockResult LockerImpl<IsForMMAPV1>::lockGlobalComplete(unsigned timeoutMs) {
+ return lockComplete(resourceIdGlobal, timeoutMs, false);
+ }
- LockResult flushLockResult =
- lock(resourceIdMMAPV1Flush, _getModeForMMAPV1FlushLock(), remainingTimeMs);
+ template<bool IsForMMAPV1>
+ LockResult LockerImpl<IsForMMAPV1>::lockGlobalBegin(LockMode mode) {
+ const LockResult result = lockBegin(resourceIdGlobal, mode);
- if (flushLockResult != LOCK_OK) {
- invariant(flushLockResult == LOCK_TIMEOUT);
- invariant(unlock(resourceIdGlobal));
+ if (result == LOCK_OK) return LOCK_OK;
- return flushLockResult;
- }
- }
+ // Currently, deadlock detection does not happen inline with lock acquisition so the only
+ // unsuccessful result that the lock manager would return is LOCK_WAITING.
+ invariant(result == LOCK_WAITING);
- return LOCK_OK;
+ return result;
}
template<bool IsForMMAPV1>
@@ -704,7 +722,7 @@ namespace mongo {
// This will occasionally dump the global lock manager in case lock acquisition is
// taking too long.
- if (elapsedTimeMs > 30000U) {
+ if (elapsedTimeMs > LockMgrDumpThrottleMillis) {
dumpGlobalLockManagerAndCallstackThrottled(this);
}
}
diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h
index 8dd1125cc14..9aa540dee6e 100644
--- a/src/mongo/db/concurrency/lock_state.h
+++ b/src/mongo/db/concurrency/lock_state.h
@@ -96,6 +96,9 @@ namespace mongo {
virtual LockerId getId() const { return _id; }
virtual LockResult lockGlobal(LockMode mode, unsigned timeoutMs = UINT_MAX);
+ virtual LockResult lockGlobalBegin(LockMode mode);
+ virtual LockResult lockGlobalComplete(unsigned timeoutMs);
+
virtual void downgradeGlobalXtoSForMMAPV1();
virtual bool unlockAll();
@@ -127,12 +130,11 @@ namespace mongo {
virtual void restoreLockState(const LockSnapshot& stateToRestore);
/**
- * These two methods allow for lock requests to be acquired in a non-blocking way. There
- * can be only one outstanding pending lock request per locker object. I.e., for each call
- * to lockBegin, which does not return LOCK_OK, there needs to be a corresponding call to
- * lockComplete or unlock.
+ * Allows for lock requests to be requested in a non-blocking way. There can be only one
+ * outstanding pending lock request per locker object.
*
- * lockBegin posts a request to the lock manager for the specified lock to be acquired
+ * lockBegin posts a request to the lock manager for the specified lock to be acquired,
+ * which either immediately grants the lock, or puts the requestor on the conflict queue
* and returns immediately with the result of the acquisition. The result can be one of:
*
* LOCK_OK - Nothing more needs to be done. The lock is granted.
@@ -141,13 +143,16 @@ namespace mongo {
* order to wait for the actual grant to occur. If the caller no longer needs to wait
* for the grant to happen, unlock needs to be called with the same resource passed
* to lockBegin.
+ *
+ * In other words for each call to lockBegin, which does not return LOCK_OK, there needs to
+ * be a corresponding call to either lockComplete or unlock.
*/
LockResult lockBegin(ResourceId resId, LockMode mode);
/**
- * Waits for the completion of a lock, previously requested through lockBegin. Must only be
- * called, if lockBegin returned LOCK_WAITING. The resId argument must match what was
- * previously passed to lockBegin.
+ * Waits for the completion of a lock, previously requested through lockBegin or
+ * lockGlobalBegin. Must only be called, if lockBegin returned LOCK_WAITING. The resId
+ * argument must match what was previously passed to lockBegin.
*/
LockResult lockComplete(ResourceId resId, unsigned timeoutMs, bool checkDeadlock);
diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h
index db684c97f9c..157c380214c 100644
--- a/src/mongo/db/concurrency/locker.h
+++ b/src/mongo/db/concurrency/locker.h
@@ -62,8 +62,8 @@ namespace mongo {
* X - Stops all activity. Used for administrative operations (repl state changes,
* shutdown, etc).
*
- * This method can be called recursively, but each call to beginTransaction must be
- * accompanied by a call to endTransaction.
+ * This method can be called recursively, but each call to lockGlobal must be accompanied
+ * by a call to unlockAll.
*
* @param mode Mode in which the global lock should be acquired. Also indicates the intent
* of the operation.
@@ -77,6 +77,18 @@ namespace mongo {
virtual LockResult lockGlobal(LockMode mode, unsigned timeoutMs = UINT_MAX) = 0;
/**
+ * Requests *only* the global lock to be acquired in the specified mode. Does not do the
+ * full MMAP V1 concurrency control functionality, which acquires the flush lock as well.
+ *
+ * Should only be used for cases, where no data reads or writes will be performed, such as
+ * replication step-down.
+ *
+ * See the comments for lockBegin/Complete for more information on the semantics.
+ */
+ virtual LockResult lockGlobalBegin(LockMode mode) = 0;
+ virtual LockResult lockGlobalComplete(unsigned timeoutMs) = 0;
+
+ /**
* Decrements the reference count on the global lock. If the reference count on the
* global lock hits zero, the transaction is over, and unlockAll unlocks all other locks.
*