SERVER-16194 Add a lockGlobalBegin capability for the global lock

This allows replication step down to queue itself for acquiring the global lock and then go and kill all owners and wait afterwards.
author: Kaloian Manassiev <kaloian.manassiev@mongodb.com> 2014-11-18 11:35:22 -0500
committer: Kaloian Manassiev <kaloian.manassiev@mongodb.com> 2014-11-19 15:28:20 -0500
commit: 2304904687c5d29e228d86da95244682dc62caa1 (patch)
tree: e4e999bf58fe0204f287e157b7bea4ed8b8afa77
parent: 489690cfbccc24cf69b6ae7848581303ec8f4b0e (diff)
download: mongo-2304904687c5d29e228d86da95244682dc62caa1.tar.gz
4 files changed, 174 insertions, 141 deletions
diff --git a/src/mongo/db/concurrency/d_concurrency_test.cpp b/src/mongo/db/concurrency/d_concurrency_test.cpp
index 141baf48d8f..e0b6be8ebd7 100644
--- a/src/mongo/db/concurrency/d_concurrency_test.cpp
+++ b/src/mongo/db/concurrency/d_concurrency_test.cpp
@@ -86,9 +86,9 @@ namespace mongo {
         }
     }
 
-    TEST(DConcurrency, readlocktryNoTimeoutDueToFlushLockS) {
+    TEST(DConcurrency, readlocktryNoTimeoutDueToGlobalLockS) {
         MMAPV1LockerImpl ls(1);
-        AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
+        Lock::GlobalRead globalRead(&ls);
 
         MMAPV1LockerImpl lsTry(2);
         readlocktry lockTry(&lsTry, 1);
@@ -96,9 +96,9 @@ namespace mongo {
         ASSERT(lockTry.got());
     }
 
-    TEST(DConcurrency, writelocktryTimeoutDueToFlushLockS) {
+    TEST(DConcurrency, writelocktryTimeoutDueToGlobalLockS) {
         MMAPV1LockerImpl ls(1);
-        AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
+        Lock::GlobalRead globalRead(&ls);
 
         MMAPV1LockerImpl lsTry(2);
         writelocktry lockTry(&lsTry, 1);
@@ -106,10 +106,9 @@ namespace mongo {
         ASSERT(!lockTry.got());
     }
 
-    TEST(DConcurrency, readlocktryTimeoutDueToFlushLockX) {
+    TEST(DConcurrency, readlocktryTimeoutDueToGlobalLockX) {
         MMAPV1LockerImpl ls(1);
-        AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
-        autoFlushLock.upgradeFlushLockToExclusive();
+        Lock::GlobalWrite globalWrite(&ls);
 
         MMAPV1LockerImpl lsTry(2);
         readlocktry lockTry(&lsTry, 1);
@@ -117,10 +116,9 @@ namespace mongo {
         ASSERT(!lockTry.got());
     }
 
-    TEST(DConcurrency, writelocktryTimeoutDueToFlushLockX) {
+    TEST(DConcurrency, writelocktryTimeoutDueToGlobalLockX) {
         MMAPV1LockerImpl ls(1);
-        AutoAcquireFlushLockForMMAPV1Commit autoFlushLock(&ls);
-        autoFlushLock.upgradeFlushLockToExclusive();
+        Lock::GlobalWrite globalWrite(&ls);
 
         MMAPV1LockerImpl lsTry(2);
         writelocktry lockTry(&lsTry, 1);
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index 59bf9e5955b..450f20e84ef 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -40,113 +40,115 @@
 #include "mongo/util/stacktrace.h"
 
 namespace mongo {
+namespace {
+
+    // Global lock manager instance.
+    LockManager globalLockManager;
+
+    // Global lock. Every server operation, which uses the Locker must acquire this lock at least
+    // once. See comments in the header file (begin/endTransaction) for more information.
+    const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL);
+
+    // Flush lock. This is only used for the MMAP V1 storage engine and synchronizes journal writes
+    // to the shared view and remaps. See the comments in the header for information on how MMAP V1
+    // concurrency control works.
+    const ResourceId resourceIdMMAPV1Flush = ResourceId(RESOURCE_MMAPV1_FLUSH, 2ULL);
+
+    // How often (in millis) to check for deadlock if a lock has not been granted for some time
+    const unsigned DeadlockTimeoutMs = 100;
+
+    /**
+     * Used to sort locks by granularity when snapshotting lock state. We must report and reacquire
+     * locks in the same granularity in which they are acquired (i.e. global, flush, database,
+     * collection, etc).
+     */
+    struct SortByGranularity {
+        inline bool operator()(const Locker::OneLock& lhs, const Locker::OneLock& rhs) const {
+            return lhs.resourceId.getType() < rhs.resourceId.getType();
+        }
+    };
+
+    /**
+     * Returns whether the passed in mode is S or IS. Used for validation checks.
+     */
+    bool isSharedMode(LockMode mode) {
+        return (mode == MODE_IS || mode == MODE_S);
+    }
+
+    /**
+     * Whether the particular lock's release should be held until the end of the operation. We
+     * delay release of exclusive locks (locks that are for write operations) in order to ensure
+     * that the data they protect is committed successfully.
+     */
+    bool shouldDelayUnlock(ResourceId resId, LockMode mode) {
+        // Global and flush lock are not used to protect transactional resources and as such, they
+        // need to be acquired and released when requested.
+        if (resId == resourceIdGlobal) {
+            return false;
+        }
 
-    namespace {
-
-        // Global lock manager instance.
-        LockManager globalLockManager;
-
-        // Global lock. Every server operation, which uses the Locker must acquire this lock at
-        // least once. See comments in the header file (begin/endTransaction) for more information
-        // on its use.
-        const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL);
-
-        // Flush lock. This is only used for the MMAP V1 storage engine and synchronizes the
-        // application of journal writes to the shared view and remaps. See the comments in the
-        // header for _acquireFlushLockForMMAPV1/_releaseFlushLockForMMAPV1 for more information
-        // on its use.
-        const ResourceId resourceIdMMAPV1Flush = ResourceId(RESOURCE_MMAPV1_FLUSH, 2ULL);
-
-        // How often (in millis) to check for deadlock if a lock has not been granted for some time
-        const unsigned DeadlockTimeoutMs = 100;
-
-        /**
-         * Used to sort locks by granularity when snapshotting lock state. We must report and
-         * reacquire locks in the same granularity in which they are acquired (i.e. global, flush,
-         * database, collection, etc).
-         */
-        struct SortByGranularity {
-            inline bool operator()(const Locker::OneLock& lhs, const Locker::OneLock& rhs) {
-                return lhs.resourceId.getType() < rhs.resourceId.getType();
-            }
-        };
-
-        /**
-         * Returns whether the passed in mode is S or IS. Used for validation checks.
-         */
-        bool isSharedMode(LockMode mode) {
-            return (mode == MODE_IS || mode == MODE_S);
-        }
-
-        /**
-         * Whether the particular lock's release should be held until the end of the operation. We
-         * delay releases for exclusive locks (locks that are for write operations) in order to
-         * ensure that the data they protect is committed successfully.
-         */
-        bool shouldDelayUnlock(ResourceId resId, LockMode mode) {
-            // Global and flush lock are not used to protect transactional resources and as
-            // such, they need to be acquired and released when requested.
-            if (resId == resourceIdGlobal) {
-                return false;
-            }
-
-            if (resId == resourceIdMMAPV1Flush) {
-                return false;
-            }
+        if (resId == resourceIdMMAPV1Flush) {
+            return false;
+        }
 
-            switch (mode) {
-            case MODE_X:
-            case MODE_IX:
-                return true;
+        switch (mode) {
+        case MODE_X:
+        case MODE_IX:
+            return true;
 
-            case MODE_IS:
-            case MODE_S:
-                return false;
+        case MODE_IS:
+        case MODE_S:
+            return false;
 
-            default:
-                invariant(false);
-            }
+        default:
+            invariant(false);
         }
+    }
 
-        /**
-         * Dumps the contents of the global lock manager to the server log for diagnostics.
-         */
-        const uint64_t LockMgrDumpThrottleMicros = 30 * Timer::microsPerSecond;
-        AtomicUInt64 lastDumpTimestampMicros(0);
+    /**
+     * Dumps the contents of the global lock manager to the server log for diagnostics.
+     */
+    enum {
+        LockMgrDumpThrottleMillis = 60000,
+        LockMgrDumpThrottleMicros = LockMgrDumpThrottleMillis * 1000
+    };
 
-        void dumpGlobalLockManagerAndCallstackThrottled(const Locker* locker) {
-            const uint64_t lastDumpMicros = lastDumpTimestampMicros.load();
+    AtomicUInt64 lastDumpTimestampMicros(0);
 
-            // Don't print too frequently
-            if (curTimeMicros64() - lastDumpMicros < LockMgrDumpThrottleMicros) return;
+    void dumpGlobalLockManagerAndCallstackThrottled(const Locker* locker) {
+        const uint64_t lastDumpMicros = lastDumpTimestampMicros.load();
 
-            // Only one thread should dump the lock manager in order to not pollute the log
-            if (lastDumpTimestampMicros.compareAndSwap(lastDumpMicros,
-                                                       curTimeMicros64()) == lastDumpMicros) {
+        // Don't print too frequently
+        if (curTimeMicros64() - lastDumpMicros < LockMgrDumpThrottleMicros) return;
 
-                log() << "LockerId " << locker->getId()
-                      << " has been waiting to acquire lock for more than 30 seconds. MongoDB will"
-                      << " print the lock manager state and the stack of the thread that has been"
-                      << " waiting, for diagnostic purposes. This message does not necessary imply"
-                      << " that the server is experiencing an outage, but might be an indication"
-                      << " of an overloaded server.";
+        // Only one thread should dump the lock manager in order to not pollute the log
+        if (lastDumpTimestampMicros.compareAndSwap(lastDumpMicros,
+            curTimeMicros64()) == lastDumpMicros) {
 
-                // Dump the lock manager state and the stack trace so we can investigate
-                globalLockManager.dump();
+            log() << "LockerId " << locker->getId()
+                  << " has been waiting to acquire lock for more than 30 seconds. MongoDB will"
+                  << " print the lock manager state and the stack of the thread that has been"
+                  << " waiting, for diagnostic purposes. This message does not necessary imply"
+                  << " that the server is experiencing an outage, but might be an indication of"
+                  << " an overload.";
 
-                log() << '\n';
-                printStackTrace();
+            // Dump the lock manager state and the stack trace so we can investigate
+            globalLockManager.dump();
 
-                // If a deadlock was discovered, the server will never recover from it, so shutdown
-                DeadlockDetector wfg(globalLockManager, locker);
-                if (wfg.check().hasCycle()) {
-                    severe() << "Deadlock found during lock acquisition: " << wfg.toString();
-                    fassertFailed(28557);
-                }
+            log() << '\n';
+            printStackTrace();
+
+            // If a deadlock was discovered, the server will never recover from it, so shutdown
+            DeadlockDetector wfg(globalLockManager, locker);
+            if (wfg.check().hasCycle()) {
+                severe() << "Deadlock found during lock acquisition: " << wfg.toString();
+                fassertFailed(28557);
             }
         }
     }
 
+} // namespace
+
 
     template<bool IsForMMAPV1>
     bool LockerImpl<IsForMMAPV1>::isW() const {
@@ -311,47 +313,63 @@ namespace mongo {
 
     template<bool IsForMMAPV1>
     LockResult LockerImpl<IsForMMAPV1>::lockGlobal(LockMode mode, unsigned timeoutMs) {
-        LockRequestsMap::Iterator it = _requests.find(resourceIdGlobal);
-        if (!it) {
-            // Global lock should be the first lock on any operation
-            invariant(_requests.empty());
+        LockResult globalLockResult = lockGlobalBegin(mode);
+        if (globalLockResult != LOCK_OK) {
+            // Could only be LOCK_WAITING (checked by lockGlobalComplete)
+            globalLockResult = lockGlobalComplete(timeoutMs);
 
-            // Start counting time since first global lock acquisition (that's when effectively
-            // any timing for the locker counts from).
-            _timer.reset();
+            // If waiting for the lock failed, no point in asking for the flush lock
+            if (globalLockResult != LOCK_OK) {
+                return globalLockResult;
+            }
         }
-        else {
-            // No upgrades on the GlobalLock are currently necessary. Should not be used until we
-            // are handling deadlocks on anything other than the flush thread.
-            invariant(it->mode >= mode);
+
+        // We would have returned above if global lock acquisition failed for any reason
+        invariant(globalLockResult == LOCK_OK);
+
+        // We are done if this is not MMAP V1
+        if (!IsForMMAPV1) {
+            return LOCK_OK;
         }
 
-        LockResult globalLockResult = lock(resourceIdGlobal, mode, timeoutMs);
-        if (globalLockResult != LOCK_OK) {
-            invariant(globalLockResult == LOCK_TIMEOUT);
+        // Special-handling for MMAP V1 commit concurrency control. We will not obey the timeout
+        // request to simplify the logic here, since the only places which acquire global lock with
+        // a timeout is the shutdown code.
 
-            return globalLockResult;
+        // The flush lock always has a reference count of 1, because it is dropped at the end of
+        // each write unit of work in order to allow the flush thread to run. See the comments in
+        // the header for information on how the MMAP V1 journaling system works.
+        const LockRequest* globalLockRequest = _requests.find(resourceIdGlobal).objAddr();
+        if (globalLockRequest->recursiveCount > 1){
+            return LOCK_OK;
         }
 
-        // Special-handling for MMAP V1 concurrency control
-        if (IsForMMAPV1 && !it) {
-            // Obey the requested timeout
-            const unsigned elapsedTimeMs = _timer.millis();
-            const unsigned remainingTimeMs =
-                elapsedTimeMs < timeoutMs ? (timeoutMs - elapsedTimeMs) : 0;
+        const LockResult flushLockResult = lock(resourceIdMMAPV1Flush,
+                                                _getModeForMMAPV1FlushLock());
+        if (flushLockResult != LOCK_OK) {
+            invariant(flushLockResult == LOCK_TIMEOUT);
+            invariant(unlock(resourceIdGlobal));
+        }
+
+        return flushLockResult;
+    }
+
+    template<bool IsForMMAPV1>
+    LockResult LockerImpl<IsForMMAPV1>::lockGlobalComplete(unsigned timeoutMs) {
+        return lockComplete(resourceIdGlobal, timeoutMs, false);
+    }
 
-            LockResult flushLockResult =
-                lock(resourceIdMMAPV1Flush, _getModeForMMAPV1FlushLock(), remainingTimeMs);
+    template<bool IsForMMAPV1>
+    LockResult LockerImpl<IsForMMAPV1>::lockGlobalBegin(LockMode mode) {
+        const LockResult result = lockBegin(resourceIdGlobal, mode);
 
-            if (flushLockResult != LOCK_OK) {
-                invariant(flushLockResult == LOCK_TIMEOUT);
-                invariant(unlock(resourceIdGlobal));
+        if (result == LOCK_OK) return LOCK_OK;
 
-                return flushLockResult;
-            }
-        }
+        // Currently, deadlock detection does not happen inline with lock acquisition so the only
+        // unsuccessful result that the lock manager would return is LOCK_WAITING.
+        invariant(result == LOCK_WAITING);
 
-        return LOCK_OK;
+        return result;
     }
 
     template<bool IsForMMAPV1>
@@ -704,7 +722,7 @@ namespace mongo {
 
             // This will occasionally dump the global lock manager in case lock acquisition is
             // taking too long.
-            if (elapsedTimeMs > 30000U) {
+            if (elapsedTimeMs > LockMgrDumpThrottleMillis) {
                 dumpGlobalLockManagerAndCallstackThrottled(this);
             }
         }
diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h
index 8dd1125cc14..9aa540dee6e 100644
--- a/src/mongo/db/concurrency/lock_state.h
+++ b/src/mongo/db/concurrency/lock_state.h
@@ -96,6 +96,9 @@ namespace mongo {
         virtual LockerId getId() const { return _id; }
 
         virtual LockResult lockGlobal(LockMode mode, unsigned timeoutMs = UINT_MAX);
+        virtual LockResult lockGlobalBegin(LockMode mode);
+        virtual LockResult lockGlobalComplete(unsigned timeoutMs);
+
         virtual void downgradeGlobalXtoSForMMAPV1();
         virtual bool unlockAll();
 
@@ -127,12 +130,11 @@ namespace mongo {
         virtual void restoreLockState(const LockSnapshot& stateToRestore);
 
         /**
-         * These two methods allow for lock requests to be acquired in a non-blocking way. There
-         * can be only one outstanding pending lock request per locker object. I.e., for each call
-         * to lockBegin, which does not return LOCK_OK, there needs to be a corresponding call to
-         * lockComplete or unlock.
+         * Allows for lock requests to be requested in a non-blocking way. There can be only one
+         * outstanding pending lock request per locker object.
          *
-         * lockBegin posts a request to the lock manager for the specified lock to be acquired
+         * lockBegin posts a request to the lock manager for the specified lock to be acquired,
+         * which either immediately grants the lock, or puts the requestor on the conflict queue
          * and returns immediately with the result of the acquisition. The result can be one of:
          *
          * LOCK_OK - Nothing more needs to be done. The lock is granted.
@@ -141,13 +143,16 @@ namespace mongo {
          *      order to wait for the actual grant to occur. If the caller no longer needs to wait
          *      for the grant to happen, unlock needs to be called with the same resource passed
          *      to lockBegin.
+         *
+         * In other words for each call to lockBegin, which does not return LOCK_OK, there needs to
+         * be a corresponding call to either lockComplete or unlock.
          */
         LockResult lockBegin(ResourceId resId, LockMode mode);
 
         /**
-         * Waits for the completion of a lock, previously requested through lockBegin. Must only be
-         * called, if lockBegin returned LOCK_WAITING. The resId argument must match what was
-         * previously passed to lockBegin.
+         * Waits for the completion of a lock, previously requested through lockBegin or
+         * lockGlobalBegin. Must only be called, if lockBegin returned LOCK_WAITING. The resId
+         * argument must match what was previously passed to lockBegin.
          */
         LockResult lockComplete(ResourceId resId, unsigned timeoutMs, bool checkDeadlock);
 
diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h
index db684c97f9c..157c380214c 100644
--- a/src/mongo/db/concurrency/locker.h
+++ b/src/mongo/db/concurrency/locker.h
@@ -62,8 +62,8 @@ namespace mongo {
          * X  - Stops all activity. Used for administrative operations (repl state changes,
          *          shutdown, etc).
          *
-         * This method can be called recursively, but each call to beginTransaction must be
-         * accompanied by a call to endTransaction.
+         * This method can be called recursively, but each call to lockGlobal must be accompanied
+         * by a call to unlockAll.
          *
          * @param mode Mode in which the global lock should be acquired. Also indicates the intent
          *              of the operation.
@@ -77,6 +77,18 @@ namespace mongo {
         virtual LockResult lockGlobal(LockMode mode, unsigned timeoutMs = UINT_MAX) = 0;
 
         /**
+         * Requests *only* the global lock to be acquired in the specified mode. Does not do the
+         * full MMAP V1 concurrency control functionality, which acquires the flush lock as well.
+         *
+         * Should only be used for cases, where no data reads or writes will be performed, such as
+         * replication step-down.
+         *
+         * See the comments for lockBegin/Complete for more information on the semantics.
+         */
+        virtual LockResult lockGlobalBegin(LockMode mode) = 0;
+        virtual LockResult lockGlobalComplete(unsigned timeoutMs) = 0;
+
+        /**
          * Decrements the reference count on the global lock.  If the reference count on the
          * global lock hits zero, the transaction is over, and unlockAll unlocks all other locks.
          *
author	Kaloian Manassiev <kaloian.manassiev@mongodb.com>	2014-11-18 11:35:22 -0500
committer	Kaloian Manassiev <kaloian.manassiev@mongodb.com>	2014-11-19 15:28:20 -0500
commit	2304904687c5d29e228d86da95244682dc62caa1 (patch)
tree	e4e999bf58fe0204f287e157b7bea4ed8b8afa77
parent	489690cfbccc24cf69b6ae7848581303ec8f4b0e (diff)
download	mongo-2304904687c5d29e228d86da95244682dc62caa1.tar.gz