summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Chan <jason.chan@mongodb.com>2021-06-15 13:51:43 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-06-15 19:58:39 +0000
commit4d53dbd076f5d63197c88fe9509038a4b3c90055 (patch)
tree80748f7cb03d4c4bb33f892da9e4a936640e2585
parentdb5ca00459eb05bea982cbf858d2cbdfb1ef221b (diff)
downloadmongo-4d53dbd076f5d63197c88fe9509038a4b3c90055.tar.gz
SERVER-57602 Don't acquire FCV lock on reconfig triggered by signalDrainComplete()
-rw-r--r--src/mongo/db/commands/feature_compatibility_version.cpp1
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp10
2 files changed, 9 insertions, 2 deletions
diff --git a/src/mongo/db/commands/feature_compatibility_version.cpp b/src/mongo/db/commands/feature_compatibility_version.cpp
index 69046789c52..70a125c59df 100644
--- a/src/mongo/db/commands/feature_compatibility_version.cpp
+++ b/src/mongo/db/commands/feature_compatibility_version.cpp
@@ -577,6 +577,7 @@ Status FeatureCompatibilityVersionParameter::setFromString(const std::string&) {
FixedFCVRegion::FixedFCVRegion(OperationContext* opCtx)
: _lk([&] {
invariant(!opCtx->lockState()->isLocked());
+ invariant(!opCtx->lockState()->isRSTLLocked());
return Lock::SharedLock(opCtx->lockState(), fcvLock);
}()) {}
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 84224b73ad3..bb96489547b 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -3425,7 +3425,13 @@ Status ReplicationCoordinatorImpl::_doReplSetReconfig(OperationContext* opCtx,
// So, acquire FCV mutex lock in shared mode to block writers from modifying the fcv document
// to make sure fcv is not changed between getNewConfig() and storing the new config
// document locally.
- boost::optional<FixedFCVRegion> fixedFcvRegion(opCtx);
+ // Since 'skipSafetyChecks' is only true when this reconfig is invoked as part of
+ // 'signalDrainComplete', we can skip taking the FCV lock here because:
+ // 1. 'signalDrainComplete' acquires the RSTL in X mode prior to this reconfig, which will block
+ // all external writers. This is also important because we must not acquire the FCV lock
+ // while holding the RSTL to avoid deadlocking.
+ // 2. We are not able to accept replicated writes as primary until we fully exit drain mode.
+ auto fixedFcvRegion = skipSafetyChecks ? nullptr : std::make_unique<FixedFCVRegion>(opCtx);
// Call the callback to get the new config given the old one.
auto newConfigStatus = getNewConfig(oldConfig, topCoordTerm);
@@ -3519,7 +3525,7 @@ Status ReplicationCoordinatorImpl::_doReplSetReconfig(OperationContext* opCtx,
// 1) For fcv 4.4, addition of new voter nodes.
// 2) For fcv 4.7+, only if the current config doesn't contain the 'newlyAdded' field but the
// new config got mutated to append 'newlyAdded' field.
- if (force || !needsFcvLock()) {
+ if (fixedFcvRegion && (force || !needsFcvLock())) {
fixedFcvRegion.reset();
}