SERVER-37990 Replace locking requirements in 'ReadWriteAbility' with new semantics around ReplicationStateTransitionLock

author: Samy Lanka <samy.lanka@mongodb.com> 2018-11-19 14:04:53 -0500
committer: Samy Lanka <samy.lanka@mongodb.com> 2018-11-29 10:40:14 -0500
commit: d963ad9dcca8258434e955891cbeccac7aaf3ad4 (patch)
tree: b3afb59c36851de3ea95e5521772b4b38c2c4c4a /src
parent: 01d25f74348e8594e96a9e01dfca60538677d078 (diff)
download: mongo-d963ad9dcca8258434e955891cbeccac7aaf3ad4.tar.gz
10 files changed, 123 insertions, 63 deletions
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index 2320b9421ff..e47fd47990f 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -168,6 +168,14 @@ bool LockerImpl::isReadLocked() const {
     return isLockHeldForMode(resourceIdGlobal, MODE_IS);
 }
 
+bool LockerImpl::isRSTLExclusive() const {
+    return getLockMode(resourceIdReplicationStateTransitionLock) == MODE_X;
+}
+
+bool LockerImpl::isRSTLLocked() const {
+    return getLockMode(resourceIdReplicationStateTransitionLock) != MODE_NONE;
+}
+
 void LockerImpl::dump() const {
     StringBuilder ss;
     ss << "Locker id " << _id << " status: ";
@@ -593,10 +601,12 @@ bool LockerImpl::saveLockStateAndUnlock(Locker::LockSnapshot* stateOut) {
         return false;
     }
 
-    // If the global lock has been acquired more than once, we're probably somewhere in a
+    // If the global lock or RSTL has been acquired more than once, we're probably somewhere in a
     // DBDirectClient call.  It's not safe to release and reacquire locks -- the context using
     // the DBDirectClient is probably not prepared for lock release.
-    if (globalRequest->recursiveCount > 1) {
+    LockRequestsMap::Iterator rstlRequest =
+        _requests.find(resourceIdReplicationStateTransitionLock);
+    if (globalRequest->recursiveCount > 1 || (rstlRequest && rstlRequest->recursiveCount > 1)) {
         return false;
     }
 
@@ -825,6 +835,15 @@ LockResult LockerImpl::lockComplete(OperationContext* opCtx,
     return result;
 }
 
+LockResult LockerImpl::lockRSTLBegin(OperationContext* opCtx) {
+    invariant(!opCtx->lockState()->isLocked());
+    return lockBegin(opCtx, resourceIdReplicationStateTransitionLock, MODE_X);
+}
+
+LockResult LockerImpl::lockRSTLComplete(OperationContext* opCtx, Date_t deadline) {
+    return lockComplete(opCtx, resourceIdReplicationStateTransitionLock, MODE_X, deadline);
+}
+
 void LockerImpl::releaseTicket() {
     invariant(_modeForTicket != MODE_NONE);
     _releaseTicket();
diff --git a/src/mongo/db/concurrency/lock_state.h b/src/mongo/db/concurrency/lock_state.h
index dc5f73e7273..599cb95bd2d 100644
--- a/src/mongo/db/concurrency/lock_state.h
+++ b/src/mongo/db/concurrency/lock_state.h
@@ -146,6 +146,9 @@ public:
 
     virtual bool unlockGlobal();
 
+    virtual LockResult lockRSTLBegin(OperationContext* opCtx);
+    virtual LockResult lockRSTLComplete(OperationContext* opCtx, Date_t deadline);
+
     virtual void beginWriteUnitOfWork();
     virtual void endWriteUnitOfWork();
 
@@ -348,6 +351,10 @@ public:
     virtual bool isLocked() const;
     virtual bool isWriteLocked() const;
     virtual bool isReadLocked() const;
+
+    virtual bool isRSTLExclusive() const;
+    virtual bool isRSTLLocked() const;
+
     bool isGlobalLockedRecursively() override;
 
     virtual bool hasLockPending() const {
diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h
index d347e3d4cc2..d1ea2830a91 100644
--- a/src/mongo/db/concurrency/locker.h
+++ b/src/mongo/db/concurrency/locker.h
@@ -191,6 +191,20 @@ public:
     virtual bool unlockGlobal() = 0;
 
     /**
+     * Requests the RSTL to be acquired in mode X. This should only be called inside
+     * ReplicationStateTransitionLockGuard.
+     *
+     * See the comments for lockBegin/Complete for more information on the semantics.
+     */
+    virtual LockResult lockRSTLBegin(OperationContext* opCtx) = 0;
+
+    /**
+     * Waits for the completion of acquiring the RSTL in mode X. This should only be called inside
+     * ReplicationStateTransitionLockGuard.
+     */
+    virtual LockResult lockRSTLComplete(OperationContext* opCtx, Date_t deadline) = 0;
+
+    /**
      * beginWriteUnitOfWork/endWriteUnitOfWork are called at the start and end of WriteUnitOfWorks.
      * They can be used to implement two-phase locking. Each call to begin should be matched with an
      * eventual call to end.
@@ -387,6 +401,10 @@ public:
     virtual bool isLocked() const = 0;
     virtual bool isWriteLocked() const = 0;
     virtual bool isReadLocked() const = 0;
+
+    virtual bool isRSTLExclusive() const = 0;
+    virtual bool isRSTLLocked() const = 0;
+
     virtual bool isGlobalLockedRecursively() = 0;
 
     /**
diff --git a/src/mongo/db/concurrency/locker_noop.h b/src/mongo/db/concurrency/locker_noop.h
index 0912dd8cb31..37abca2c99e 100644
--- a/src/mongo/db/concurrency/locker_noop.h
+++ b/src/mongo/db/concurrency/locker_noop.h
@@ -119,6 +119,14 @@ public:
         return false;
     }
 
+    virtual LockResult lockRSTLBegin(OperationContext* opCtx) {
+        MONGO_UNREACHABLE;
+    }
+
+    virtual LockResult lockRSTLComplete(OperationContext* opCtx, Date_t deadline) {
+        MONGO_UNREACHABLE;
+    }
+
     virtual LockResult lock(OperationContext* opCtx,
                             ResourceId resId,
                             LockMode mode,
@@ -213,6 +221,14 @@ public:
         return true;
     }
 
+    virtual bool isRSTLExclusive() const {
+        return true;
+    }
+
+    virtual bool isRSTLLocked() const {
+        return true;
+    }
+
     virtual bool hasLockPending() const {
         MONGO_UNREACHABLE;
     }
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index f5b1cbb55db..a7f5d41691b 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -479,7 +479,7 @@ void ReplicationCoordinatorExternalStateImpl::onDrainComplete(OperationContext*
 }
 
 OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationContext* opCtx) {
-    invariant(opCtx->lockState()->isW());
+    invariant(opCtx->lockState()->isRSTLExclusive());
 
     // Clear the appliedThrough marker so on startup we'll use the top of the oplog. This must be
     // done before we add anything to our oplog.
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 3f237917497..c1021b897d8 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -926,7 +926,7 @@ void ReplicationCoordinatorImpl::clearSyncSourceBlacklist() {
 Status ReplicationCoordinatorImpl::setFollowerModeStrict(OperationContext* opCtx,
                                                          const MemberState& newState) {
     invariant(opCtx);
-    invariant(opCtx->lockState()->isW());
+    invariant(opCtx->lockState()->isRSTLExclusive());
     return _setFollowerMode(opCtx, newState);
 }
 
@@ -1736,7 +1736,7 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
     transitionArgs.lockDeadline = force ? stepDownUntil : waitUntil;
 
     ReplicationStateTransitionLockGuard transitionGuard(opCtx, transitionArgs);
-    invariant(opCtx->lockState()->isW());
+    invariant(opCtx->lockState()->isRSTLExclusive());
 
     stdx::unique_lock<stdx::mutex> lk(_mutex);
 
@@ -1759,7 +1759,7 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
     // Make sure that we leave _canAcceptNonLocalWrites in the proper state.
     auto updateMemberState = [&] {
         invariant(lk.owns_lock());
-        invariant(opCtx->lockState()->isW());
+        invariant(opCtx->lockState()->isRSTLExclusive());
 
         auto action = _updateMemberStateFromTopologyCoordinator(lk, opCtx);
         lk.unlock();
@@ -1802,28 +1802,26 @@ void ReplicationCoordinatorImpl::stepDown(OperationContext* opCtx,
         while (!_topCoord->attemptStepDown(
             termAtStart, _replExecutor->now(), waitUntil, stepDownUntil, force)) {
 
-            // The stepdown attempt failed. We now release the global lock to allow secondaries
-            // to read the oplog, then wait until enough secondaries are caught up for us to
-            // finish stepdown.
-            transitionGuard.releaseGlobalLock();
+            // The stepdown attempt failed. We now release the RSTL to allow secondaries to read the
+            // oplog, then wait until enough secondaries are caught up for us to finish stepdown.
+            transitionGuard.releaseRSTL();
             invariant(!opCtx->lockState()->isLocked());
 
-            // Make sure we re-acquire the global lock before returning so that we're always holding
-            // the global lock when the onExitGuard set up earlier runs.
+            // Make sure we re-acquire the RSTL before returning so that we're always holding the
+            // RSTL when the onExitGuard set up earlier runs.
             ON_BLOCK_EXIT([&] {
-                // Need to release _mutex before re-acquiring the global lock to preserve lock
-                // acquisition order rules.
+                // Need to release _mutex before re-acquiring the RSTL to preserve lock acquisition
+                // order rules.
                 lk.unlock();
 
-                // Need to re-acquire the global lock before re-attempting stepdown.
+                // Need to re-acquire the RSTL before re-attempting stepdown.
                 // We use no timeout here even though that means the lock acquisition could take
-                // longer than the stepdown window.  If that happens, the call to _tryToStepDown
-                // immediately after will error.  Since we'll need the global lock no matter what to
+                // longer than the stepdown window. Since we'll need the RSTL no matter what to
                 // clean up a failed stepdown attempt, we might as well spend whatever time we need
                 // to acquire it now.  For the same reason, we also disable lock acquisition
                 // interruption, to guarantee that we get the lock eventually.
-                transitionGuard.reacquireGlobalLock();
-                invariant(opCtx->lockState()->isW());
+                transitionGuard.reacquireRSTL();
+                invariant(opCtx->lockState()->isRSTLExclusive());
                 lk.lock();
             });
 
@@ -3633,9 +3631,9 @@ void ReplicationCoordinatorImpl::ReadWriteAbility::setCanAcceptNonLocalWrites(
         return;
     }
 
-    // We must be holding the global X lock to change _canAcceptNonLocalWrites.
+    // We must be holding the RSTL in mode X to change _canAcceptNonLocalWrites.
     invariant(opCtx);
-    invariant(opCtx->lockState()->isW());
+    invariant(opCtx->lockState()->isRSTLExclusive());
     _canAcceptNonLocalWrites = canAcceptWrites;
 }
 
@@ -3649,9 +3647,9 @@ bool ReplicationCoordinatorImpl::ReadWriteAbility::canAcceptNonLocalWrites_UNSAF
 
 bool ReplicationCoordinatorImpl::ReadWriteAbility::canAcceptNonLocalWrites(
     OperationContext* opCtx) const {
-    // We must be holding the global lock.
+    // We must be holding the RSTL.
     invariant(opCtx);
-    invariant(opCtx->lockState()->isLocked());
+    invariant(opCtx->lockState()->isRSTLLocked());
     return _canAcceptNonLocalWrites;
 }
 
@@ -3661,17 +3659,17 @@ bool ReplicationCoordinatorImpl::ReadWriteAbility::canServeNonLocalReads_UNSAFE(
 
 bool ReplicationCoordinatorImpl::ReadWriteAbility::canServeNonLocalReads(
     OperationContext* opCtx) const {
-    // We must be holding the global lock.
+    // We must be holding the RSTL.
     invariant(opCtx);
-    invariant(opCtx->lockState()->isLocked());
+    invariant(opCtx->lockState()->isRSTLLocked());
     return _canServeNonLocalReads.loadRelaxed();
 }
 
 void ReplicationCoordinatorImpl::ReadWriteAbility::setCanServeNonLocalReads(OperationContext* opCtx,
                                                                             unsigned int newVal) {
-    // We must be holding the global X lock to change _canServeNonLocalReads.
+    // We must be holding the RSTL in mode X to change _canServeNonLocalReads.
     invariant(opCtx);
-    invariant(opCtx->lockState()->isW());
+    invariant(opCtx->lockState()->isRSTLExclusive());
     _canServeNonLocalReads.store(newVal);
 }
 
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index 31c39f32369..7691f3564fa 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -588,17 +588,17 @@ private:
     private:
         // Flag that indicates whether writes to databases other than "local" are allowed.  Used to
         // answer canAcceptWritesForDatabase() and canAcceptWritesFor() questions. In order to read
-        // it, must have either the RSTL in some intent mode or the replication coordinator mutex.
-        // To set it, must have both the RSTL in mode X and the replication coordinator mutex.
+        // it, must have either the RSTL or the replication coordinator mutex. To set it, must have
+        // both the RSTL in mode X and the replication coordinator mutex.
         // Always true for standalone nodes.
         bool _canAcceptNonLocalWrites;
 
         // Flag that indicates whether reads from databases other than "local" are allowed. Unlike
         // _canAcceptNonLocalWrites, above, this question is about admission control on secondaries.
         // Accidentally providing the prior value for a limited period of time is acceptable, except
-        // during rollback. In order to read it, must have the RSTL in some intent mode. To set it
-        // when transitioning into RS_ROLLBACK, must have the RSTL in mode X. Otherwise, no lock or
-        // mutex is necessary to set it.
+        // during rollback. In order to read it, must have the RSTL. To set it when transitioning
+        // into RS_ROLLBACK, must have the RSTL in mode X. Otherwise, no lock or mutex is necessary
+        // to set it.
         AtomicUInt32 _canServeNonLocalReads;
     };
 
diff --git a/src/mongo/db/repl/replication_coordinator_impl_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
index ccb4c6adc84..c27fb873a99 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_test.cpp
@@ -1886,7 +1886,7 @@ TEST_F(StepDownTest,
     // Make sure stepDown cannot grab the RSTL in mode X. We need to use a different
     // locker to test this, or otherwise stepDown will be granted the lock automatically.
     ReplicationStateTransitionLockGuard transitionGuard(opCtx.get());
-    ASSERT_TRUE(opCtx->lockState()->isW());
+    ASSERT_TRUE(opCtx->lockState()->isRSTLExclusive());
     auto locker = opCtx.get()->swapLockState(stdx::make_unique<LockerImpl>());
 
     ASSERT_THROWS_CODE(
@@ -1895,8 +1895,8 @@ TEST_F(StepDownTest,
         ErrorCodes::ExceededTimeLimit);
     ASSERT_TRUE(getReplCoord()->getMemberState().primary());
 
-    ASSERT_TRUE(locker->isW());
-    ASSERT_FALSE(opCtx->lockState()->isLocked());
+    ASSERT_TRUE(locker->isRSTLExclusive());
+    ASSERT_FALSE(opCtx->lockState()->isRSTLLocked());
 
     opCtx.get()->swapLockState(std::move(locker));
 }
diff --git a/src/mongo/db/repl/replication_state_transition_lock_guard.cpp b/src/mongo/db/repl/replication_state_transition_lock_guard.cpp
index 61ee8d68e72..f9bfb5040b9 100644
--- a/src/mongo/db/repl/replication_state_transition_lock_guard.cpp
+++ b/src/mongo/db/repl/replication_state_transition_lock_guard.cpp
@@ -45,11 +45,8 @@ ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(Operati
 ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(OperationContext* opCtx,
                                                                          const Args& args)
     : _opCtx(opCtx), _args(args) {
-    _globalLock.emplace(opCtx,
-                        MODE_X,
-                        args.lockDeadline,
-                        Lock::InterruptBehavior::kThrow,
-                        Lock::GlobalLock::EnqueueOnly());
+    // Enqueue a lock request for the RSTL in mode X.
+    LockResult result = _opCtx->lockState()->lockRSTLBegin(_opCtx);
 
     if (args.killUserOperations) {
         ServiceContext* environment = opCtx->getServiceContext();
@@ -62,26 +59,33 @@ ReplicationStateTransitionLockGuard::ReplicationStateTransitionLockGuard(Operati
             opCtx, matcherAllSessions, ErrorCodes::InterruptedDueToStepDown);
     }
 
-    _globalLock->waitForLockUntil(args.lockDeadline);
+    // We can return early if the lock request was already satisfied.
+    if (result == LOCK_OK) {
+        return;
+    }
+
+    // Wait for the completion of the lock request for the RSTL in mode X.
+    _opCtx->lockState()->lockRSTLComplete(opCtx, args.lockDeadline);
     uassert(ErrorCodes::ExceededTimeLimit,
-            "Could not acquire the global lock before the deadline",
-            _globalLock->isLocked());
+            "Could not acquire the RSTL before the deadline",
+            opCtx->lockState()->isRSTLExclusive());
 }
 
 ReplicationStateTransitionLockGuard::~ReplicationStateTransitionLockGuard() {
-    invariant(_globalLock->isLocked());
+    invariant(_opCtx->lockState()->isRSTLExclusive());
+    _opCtx->lockState()->unlock(resourceIdReplicationStateTransitionLock);
 }
 
-void ReplicationStateTransitionLockGuard::releaseGlobalLock() {
-    invariant(_globalLock->isLocked());
-    _globalLock.reset();
+void ReplicationStateTransitionLockGuard::releaseRSTL() {
+    invariant(_opCtx->lockState()->isRSTLExclusive());
+    _opCtx->lockState()->unlock(resourceIdReplicationStateTransitionLock);
 }
 
-void ReplicationStateTransitionLockGuard::reacquireGlobalLock() {
-    invariant(!_globalLock);
+void ReplicationStateTransitionLockGuard::reacquireRSTL() {
+    invariant(!_opCtx->lockState()->isRSTLLocked());
 
     UninterruptibleLockGuard noInterrupt(_opCtx->lockState());
-    _globalLock.emplace(_opCtx, MODE_X);
+    _opCtx->lockState()->lock(_opCtx, resourceIdReplicationStateTransitionLock, MODE_X);
 }
 
 }  // namespace repl
diff --git a/src/mongo/db/repl/replication_state_transition_lock_guard.h b/src/mongo/db/repl/replication_state_transition_lock_guard.h
index 7f42ce00e27..d05007a93ee 100644
--- a/src/mongo/db/repl/replication_state_transition_lock_guard.h
+++ b/src/mongo/db/repl/replication_state_transition_lock_guard.h
@@ -40,30 +40,29 @@ namespace mongo {
 namespace repl {
 
 /**
- * This object handles acquiring the global exclusive lock for replication state transitions, as
- * well as any actions that need to happen in between enqueuing the global lock request and waiting
- * for it to be granted.
+ * This object handles acquiring the RSTL for replication state transitions, as well as any actions
+ * that need to happen in between enqueuing the RSTL request and waiting for it to be granted.
  */
 class ReplicationStateTransitionLockGuard {
     MONGO_DISALLOW_COPYING(ReplicationStateTransitionLockGuard);
 
 public:
     struct Args {
-        // How long to wait for the global X lock.
+        // How long to wait for the RSTL in mode X.
         Date_t lockDeadline = Date_t::max();
 
-        // If true, will kill all user operations in between enqueuing the global lock request and
-        // waiting for it to be granted.
+        // If true, will kill all user operations in between enqueuing the RSTL request and waiting
+        // for it to be granted.
         bool killUserOperations = false;
     };
 
     /**
-     * Acquires the global X lock.
+     * Acquires the RSTL in mode X.
      */
     ReplicationStateTransitionLockGuard(OperationContext* opCtx);
 
     /**
-     * Acquires the global X lock and performs any other required actions according to the Args
+     * Acquires the RSTL in mode X and performs any other required actions according to the Args
      * provided.
      */
     ReplicationStateTransitionLockGuard(OperationContext* opCtx, const Args& args);
@@ -71,20 +70,19 @@ public:
     ~ReplicationStateTransitionLockGuard();
 
     /**
-     * Temporarily releases the global X lock.  Must be followed by a call to reacquireGlobalLock().
+     * Temporarily releases the RSTL in mode X.  Must be followed by a call to reacquireRSTL().
      */
-    void releaseGlobalLock();
+    void releaseRSTL();
 
     /**
-     * Requires the global X lock after it was released via a call to releaseGlobalLock.  Ignores
+     * Re-acquires the RSTL in mode X after it was released via a call to releaseRSTL.  Ignores
      * the configured 'lockDeadline' and instead waits forever for the lock to be acquired.
      */
-    void reacquireGlobalLock();
+    void reacquireRSTL();
 
 private:
     OperationContext* const _opCtx;
     Args _args;
-    boost::optional<Lock::GlobalLock> _globalLock = boost::none;
 };
 
 }  // namespace repl
author	Samy Lanka <samy.lanka@mongodb.com>	2018-11-19 14:04:53 -0500
committer	Samy Lanka <samy.lanka@mongodb.com>	2018-11-29 10:40:14 -0500
commit	d963ad9dcca8258434e955891cbeccac7aaf3ad4 (patch)
tree	b3afb59c36851de3ea95e5521772b4b38c2c4c4a /src
parent	01d25f74348e8594e96a9e01dfca60538677d078 (diff)
download	mongo-d963ad9dcca8258434e955891cbeccac7aaf3ad4.tar.gz