summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregory Noma <gregory.noma@gmail.com>2022-05-09 16:16:13 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-05-10 14:47:20 +0000
commit007846ad9c22495af276034c6d855572947e2742 (patch)
tree6c34832a63776c0f79c5e77cb624587b8a1516b0
parentbc1ac35debfc926c3d12f11f5ca7230010aacd55 (diff)
downloadmongo-007846ad9c22495af276034c6d855572947e2742.tar.gz
SERVER-65821 Allow prepared transactions to write commit decision across FCV barrierr5.3.2-rc0
(cherry picked from commit 5f15e515c617fca69a4a6dc4be741c19e2d07aa8)
-rw-r--r--etc/backports_required_for_multiversion_tests.yml4
-rw-r--r--jstests/noPassthrough/set_fcv_prepared_transaction.js93
-rw-r--r--jstests/replsets/dbhash_lock_acquisition.js9
-rw-r--r--src/mongo/db/commands/feature_compatibility_version.cpp6
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp16
-rw-r--r--src/mongo/db/concurrency/d_concurrency.cpp14
-rw-r--r--src/mongo/db/concurrency/d_concurrency.h1
-rw-r--r--src/mongo/db/concurrency/lock_manager_defs.cpp11
-rw-r--r--src/mongo/db/concurrency/lock_manager_defs.h57
-rw-r--r--src/mongo/db/concurrency/lock_state.cpp12
-rw-r--r--src/mongo/db/concurrency/lock_stats.cpp18
-rw-r--r--src/mongo/db/concurrency/lock_stats.h27
-rw-r--r--src/mongo/db/concurrency/locker.h33
-rw-r--r--src/mongo/db/s/transaction_coordinator_util.cpp9
-rw-r--r--src/mongo/db/stats/fill_locker_info.cpp21
-rw-r--r--src/mongo/db/storage/control/journal_flusher.cpp22
16 files changed, 298 insertions, 55 deletions
diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml
index 805b4e0ae8f..fc3ad73145e 100644
--- a/etc/backports_required_for_multiversion_tests.yml
+++ b/etc/backports_required_for_multiversion_tests.yml
@@ -156,6 +156,8 @@ last-continuous:
test_file: jstests/replsets/apply_ops_dropDatabase.js
- ticket: SERVER-64485
test_file: jstests/sharding/update_with_dollar_fields.js
+ - ticket: SERVER-65821
+ test_file: jstests/replsets/dbhash_lock_acquisition.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
@@ -455,6 +457,8 @@ last-lts:
test_file: jstests/replsets/buildindexes_false_commit_quorum.js
- ticket: SERVER-64780
test_file: jstest/sharding/resharding_change_stream_namespace_filtering.js
+ - ticket: SERVER-65821
+ test_file: jstests/replsets/dbhash_lock_acquisition.js
# Tests that should only be excluded from particular suites should be listed under that suite.
suites:
diff --git a/jstests/noPassthrough/set_fcv_prepared_transaction.js b/jstests/noPassthrough/set_fcv_prepared_transaction.js
new file mode 100644
index 00000000000..86cd3b4837a
--- /dev/null
+++ b/jstests/noPassthrough/set_fcv_prepared_transaction.js
@@ -0,0 +1,93 @@
+/**
+ * Tests running the setFeatureCompatibilityVersion command concurrently with a prepared
+ * transaction. Specifically, runs the setFCV right before the TransactionCoordinator writes the
+ * commit decision.
+ *
+ * @tags: [
+ * requires_sharding,
+ * ]
+ */
+(function() {
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+const st = new ShardingTest({shards: 2});
+const shard0Primary = st.rs0.getPrimary();
+
+// Set up a sharded collection with two chunks:
+// shard0: [MinKey, 0]
+// shard1: [0, MaxKey]
+const dbName = "testDb";
+const collName = "testColl";
+const ns = dbName + "." + collName;
+assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
+assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}}));
+assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: 0}}));
+assert.commandWorked(
+ st.s.adminCommand({moveChunk: ns, find: {x: MinKey}, to: st.shard0.shardName}));
+assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {x: 1}, to: st.shard1.shardName}));
+
+function runTxn(mongosHost, dbName, collName) {
+ const mongosConn = new Mongo(mongosHost);
+ jsTest.log("Starting a cross-shard transaction with shard0 and shard1 as the participants " +
+ "and shard0 as the coordinator shard");
+ const lsid = {id: UUID()};
+ const txnNumber = NumberLong(35);
+ assert.commandWorked(mongosConn.getDB(dbName).runCommand({
+ insert: collName,
+ documents: [{x: -1}],
+ lsid,
+ txnNumber,
+ startTransaction: true,
+ autocommit: false,
+ }));
+ assert.commandWorked(mongosConn.getDB(dbName).runCommand({
+ insert: collName,
+ documents: [{x: 1}],
+ lsid,
+ txnNumber,
+ autocommit: false,
+ }));
+ assert.commandWorked(
+ mongosConn.adminCommand({commitTransaction: 1, lsid, txnNumber, autocommit: false}));
+ jsTest.log("Committed the cross-shard transaction");
+}
+
+function runSetFCV(primaryHost) {
+ const primaryConn = new Mongo(primaryHost);
+ jsTest.log("Starting a setFCV command on " + primaryHost);
+ assert.commandWorked(primaryConn.adminCommand({setFeatureCompatibilityVersion: lastLTSFCV}));
+ jsTest.log("Completed the setFCV command");
+}
+
+// Run a cross-shard transaction that has shard0 as the coordinator. Make the TransactionCoordinator
+// thread hang right before the commit decision is written (i.e. after the transaction has entered
+// the "prepared" state).
+const writeDecisionFp = configureFailPoint(shard0Primary, "hangBeforeWritingDecision");
+const txnThread = new Thread(runTxn, st.s.host, dbName, collName);
+txnThread.start();
+writeDecisionFp.wait();
+
+// Run a setFCV command against shard0 and wait for the setFCV thread to start waiting to acquire
+// the setFCV S lock (i.e. waiting for existing prepared transactions to commit or abort).
+const setFCVThread = new Thread(runSetFCV, shard0Primary.host);
+setFCVThread.start();
+assert.soon(() => {
+ return shard0Primary.getDB(dbName).currentOp().inprog.find(
+ op => op.command && op.command.setFeatureCompatibilityVersion && op.locks &&
+ op.locks.FeatureCompatibilityVersion === "R" && op.waitingForLock === true);
+});
+
+// Unpause the TransactionCoordinator. The transaction should be able to commit despite the fact
+// that the FCV S lock is enqueued.
+writeDecisionFp.off();
+
+jsTest.log("Waiting for the cross-shard transaction to commit");
+txnThread.join();
+jsTest.log("Waiting for setFCV command to complete");
+setFCVThread.join();
+jsTest.log("Done");
+
+st.stop();
+})();
diff --git a/jstests/replsets/dbhash_lock_acquisition.js b/jstests/replsets/dbhash_lock_acquisition.js
index c05a5c1d698..594df5dc8e0 100644
--- a/jstests/replsets/dbhash_lock_acquisition.js
+++ b/jstests/replsets/dbhash_lock_acquisition.js
@@ -32,8 +32,13 @@ assert.commandWorked(sessionDB.mycoll.insert({}));
const ops = db.currentOp({"lsid.id": session.getSessionId().id}).inprog;
assert.eq(
1, ops.length, () => "Failed to find session in currentOp() output: " + tojson(db.currentOp()));
-assert.eq(ops[0].locks,
- {ReplicationStateTransition: "w", Global: "w", Database: "w", Collection: "w"});
+assert.eq(ops[0].locks, {
+ FeatureCompatibilityVersion: "w",
+ ReplicationStateTransition: "w",
+ Global: "w",
+ Database: "w",
+ Collection: "w",
+});
const threadCaptruncCmd = new Thread(function(host) {
try {
diff --git a/src/mongo/db/commands/feature_compatibility_version.cpp b/src/mongo/db/commands/feature_compatibility_version.cpp
index 5adcdffef21..fc0c2ff0fde 100644
--- a/src/mongo/db/commands/feature_compatibility_version.cpp
+++ b/src/mongo/db/commands/feature_compatibility_version.cpp
@@ -194,7 +194,7 @@ private:
*
* setFCV takes this lock in exclusive mode when changing the FCV value.
*/
-Lock::ResourceMutex fcvLock("featureCompatibilityVersionLock");
+Lock::ResourceMutex fcvDocumentLock("featureCompatibilityVersionDocumentLock");
// lastFCVUpdateTimestamp contains the latest oplog entry timestamp which updated the FCV.
// It is reset on rollback.
Timestamp lastFCVUpdateTimestamp;
@@ -506,7 +506,7 @@ void FeatureCompatibilityVersion::fassertInitializedAfterStartup(OperationContex
Lock::ExclusiveLock FeatureCompatibilityVersion::enterFCVChangeRegion(OperationContext* opCtx) {
invariant(!opCtx->lockState()->isLocked());
- return Lock::ExclusiveLock(opCtx->lockState(), fcvLock);
+ return Lock::ExclusiveLock(opCtx->lockState(), fcvDocumentLock);
}
void FeatureCompatibilityVersion::advanceLastFCVUpdateTimestamp(Timestamp fcvUpdateTimestamp) {
@@ -571,7 +571,7 @@ FixedFCVRegion::FixedFCVRegion(OperationContext* opCtx)
: _lk([&] {
invariant(!opCtx->lockState()->isLocked());
invariant(!opCtx->lockState()->isRSTLLocked());
- return Lock::SharedLock(opCtx->lockState(), fcvLock);
+ return Lock::SharedLock(opCtx->lockState(), fcvDocumentLock);
}()) {}
FixedFCVRegion::~FixedFCVRegion() = default;
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index f07008508d8..b205e77b981 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -478,13 +478,15 @@ private:
_cancelTenantMigrations(opCtx);
{
- // Take the global lock in S mode to create a barrier for operations taking the global
- // IX or X locks. This ensures that either:
+ // Take the FCV full transition lock in S mode to create a barrier for operations taking
+ // the global IX or X locks, which implicitly take the FCV full transition lock in IX
+ // mode (aside from those which explicitly opt out). This ensures that either:
// - The global IX/X locked operation will start after the FCV change, see the
// upgrading to the latest FCV and act accordingly.
// - The global IX/X locked operation began prior to the FCV change, is acting on that
// assumption and will finish before upgrade procedures begin right after this.
- Lock::GlobalLock lk(opCtx, MODE_S);
+ Lock::ResourceLock lk(
+ opCtx, opCtx->lockState(), resourceIdFeatureCompatibilityVersion, MODE_S);
}
// (Generic FCV reference): TODO SERVER-60912: When kLastLTS is 6.0, remove this FCV-gated
@@ -593,13 +595,15 @@ private:
_cancelTenantMigrations(opCtx);
{
- // Take the global lock in S mode to create a barrier for operations taking the global
- // IX or X locks. This ensures that either
+ // Take the FCV full transition lock in S mode to create a barrier for operations taking
+ // the global IX or X locks, which implicitly take the FCV full transition lock in IX
+ // mode (aside from those which explicitly opt out). This ensures that either:
// - The global IX/X locked operation will start after the FCV change, see the
// downgrading to the last-lts or last-continuous FCV and act accordingly.
// - The global IX/X locked operation began prior to the FCV change, is acting on that
// assumption and will finish before downgrade procedures begin right after this.
- Lock::GlobalLock lk(opCtx, MODE_S);
+ Lock::ResourceLock lk(
+ opCtx, opCtx->lockState(), resourceIdFeatureCompatibilityVersion, MODE_S);
}
// (Generic FCV reference): TODO SERVER-60912: When kLastLTS is 6.0, remove this FCV-gated
diff --git a/src/mongo/db/concurrency/d_concurrency.cpp b/src/mongo/db/concurrency/d_concurrency.cpp
index db11862b65b..d63ac182dc1 100644
--- a/src/mongo/db/concurrency/d_concurrency.cpp
+++ b/src/mongo/db/concurrency/d_concurrency.cpp
@@ -133,6 +133,7 @@ Lock::GlobalLock::GlobalLock(OperationContext* opCtx,
: _opCtx(opCtx),
_result(LOCK_INVALID),
_pbwm(opCtx->lockState(), resourceIdParallelBatchWriterMode),
+ _fcvLock(opCtx->lockState(), resourceIdFeatureCompatibilityVersion),
_interruptBehavior(behavior),
_skipRSTLLock(skipRSTLLock),
_isOutermostLock(!opCtx->lockState()->isLocked()) {
@@ -148,6 +149,17 @@ Lock::GlobalLock::GlobalLock(OperationContext* opCtx,
}
});
+ if (_opCtx->lockState()->shouldConflictWithSetFeatureCompatibilityVersion() &&
+ !isSharedLockMode(lockMode)) {
+ _fcvLock.lock(_opCtx, MODE_IX, deadline);
+ }
+ ScopeGuard unlockFCVLock([this, lockMode] {
+ if (_opCtx->lockState()->shouldConflictWithSetFeatureCompatibilityVersion() &&
+ !isSharedLockMode(lockMode)) {
+ _fcvLock.unlock();
+ }
+ });
+
_result = LOCK_INVALID;
if (skipRSTLLock) {
_takeGlobalLockOnly(lockMode, deadline);
@@ -156,6 +168,7 @@ Lock::GlobalLock::GlobalLock(OperationContext* opCtx,
}
_result = LOCK_OK;
+ unlockFCVLock.dismiss();
unlockPBWM.dismiss();
} catch (const ExceptionForCat<ErrorCategory::Interruption>&) {
// The kLeaveUnlocked behavior suppresses this exception.
@@ -184,6 +197,7 @@ Lock::GlobalLock::GlobalLock(GlobalLock&& otherLock)
: _opCtx(otherLock._opCtx),
_result(otherLock._result),
_pbwm(std::move(otherLock._pbwm)),
+ _fcvLock(std::move(otherLock._fcvLock)),
_interruptBehavior(otherLock._interruptBehavior),
_skipRSTLLock(otherLock._skipRSTLLock),
_isOutermostLock(otherLock._isOutermostLock) {
diff --git a/src/mongo/db/concurrency/d_concurrency.h b/src/mongo/db/concurrency/d_concurrency.h
index 1c21e9362af..4e3433d5f6d 100644
--- a/src/mongo/db/concurrency/d_concurrency.h
+++ b/src/mongo/db/concurrency/d_concurrency.h
@@ -252,6 +252,7 @@ public:
OperationContext* const _opCtx;
LockResult _result;
ResourceLock _pbwm;
+ ResourceLock _fcvLock;
InterruptBehavior _interruptBehavior;
bool _skipRSTLLock;
const bool _isOutermostLock;
diff --git a/src/mongo/db/concurrency/lock_manager_defs.cpp b/src/mongo/db/concurrency/lock_manager_defs.cpp
index 01ae4586cad..bc9c357cc01 100644
--- a/src/mongo/db/concurrency/lock_manager_defs.cpp
+++ b/src/mongo/db/concurrency/lock_manager_defs.cpp
@@ -35,8 +35,13 @@ namespace mongo {
const ResourceId resourceIdLocalDB = ResourceId(RESOURCE_DATABASE, StringData("local"));
const ResourceId resourceIdOplog = ResourceId(RESOURCE_COLLECTION, StringData("local.oplog.rs"));
const ResourceId resourceIdAdminDB = ResourceId(RESOURCE_DATABASE, StringData("admin"));
-const ResourceId resourceIdGlobal = ResourceId(RESOURCE_GLOBAL, 1ULL);
-const ResourceId resourceIdParallelBatchWriterMode = ResourceId(RESOURCE_PBWM, 1ULL);
-const ResourceId resourceIdReplicationStateTransitionLock = ResourceId(RESOURCE_RSTL, 1ULL);
+const ResourceId resourceIdGlobal =
+ ResourceId(RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kGlobal));
+const ResourceId resourceIdParallelBatchWriterMode =
+ ResourceId(RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kParallelBatchWriterMode));
+const ResourceId resourceIdFeatureCompatibilityVersion = ResourceId(
+ RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kFeatureCompatibilityVersion));
+const ResourceId resourceIdReplicationStateTransitionLock = ResourceId(
+ RESOURCE_GLOBAL, static_cast<uint8_t>(ResourceGlobalId::kReplicationStateTransitionLock));
} // namespace mongo
diff --git a/src/mongo/db/concurrency/lock_manager_defs.h b/src/mongo/db/concurrency/lock_manager_defs.h
index 857599e8cb8..d2fe7a1df62 100644
--- a/src/mongo/db/concurrency/lock_manager_defs.h
+++ b/src/mongo/db/concurrency/lock_manager_defs.h
@@ -155,12 +155,6 @@ enum LockResult {
enum ResourceType {
RESOURCE_INVALID = 0,
- /** Parallel batch writer mode lock */
- RESOURCE_PBWM,
-
- /** Replication state transition lock. */
- RESOURCE_RSTL,
-
/** Used for global exclusive operations */
RESOURCE_GLOBAL,
@@ -177,21 +171,42 @@ enum ResourceType {
};
/**
+ * IDs for usages of RESOURCE_GLOBAL.
+ */
+enum class ResourceGlobalId : uint8_t {
+ kParallelBatchWriterMode,
+ kFeatureCompatibilityVersion,
+ kReplicationStateTransitionLock,
+ kGlobal,
+
+ // The number of global resource ids. Always insert new ids above this entry.
+ kNumIds
+};
+
+/**
* Maps the resource id to a human-readable string.
*/
-static const char* ResourceTypeNames[] = {"Invalid",
- "ParallelBatchWriterMode",
- "ReplicationStateTransition",
- "Global",
- "Database",
- "Collection",
- "Metadata",
- "Mutex"};
+static const char* ResourceTypeNames[] = {
+ "Invalid", "Global", "Database", "Collection", "Metadata", "Mutex"};
+
+/**
+ * Maps the global resource id to a human-readable string.
+ */
+static const char* ResourceGlobalIdNames[] = {
+ "ParallelBatchWriterMode",
+ "FeatureCompatibilityVersion",
+ "ReplicationStateTransition",
+ "Global",
+};
// Ensure we do not add new types without updating the names array.
MONGO_STATIC_ASSERT((sizeof(ResourceTypeNames) / sizeof(ResourceTypeNames[0])) ==
ResourceTypesCount);
+// Ensure we do not add new global resource ids without updating the names array.
+MONGO_STATIC_ASSERT((sizeof(ResourceGlobalIdNames) / sizeof(ResourceGlobalIdNames[0])) ==
+ static_cast<uint8_t>(ResourceGlobalId::kNumIds));
+
/**
* Returns a human-readable name for the specified resource type.
*/
@@ -200,6 +215,13 @@ static const char* resourceTypeName(ResourceType resourceType) {
}
/**
+ * Returns a human-readable name for the specified global resource.
+ */
+static const char* resourceGlobalIdName(ResourceGlobalId id) {
+ return ResourceGlobalIdNames[static_cast<uint8_t>(id)];
+}
+
+/**
* Uniquely identifies a lockable resource.
*/
class ResourceId {
@@ -291,9 +313,14 @@ extern const ResourceId resourceIdGlobal;
// this lock.
extern const ResourceId resourceIdParallelBatchWriterMode;
+// Hardcoded resource id for a full FCV transition from start -> upgrading -> upgraded (or
+// equivalent for downgrading). This lock is used as a barrier to prevent writes from spanning an
+// FCV change. This lock is acquired after the PBWM but before the RSTL and resourceIdGlobal.
+extern const ResourceId resourceIdFeatureCompatibilityVersion;
+
// Hardcoded resource id for the ReplicationStateTransitionLock (RSTL). This lock is acquired in
// mode X for any replication state transition and is acquired by all other reads and writes in mode
-// IX. This lock is acquired after the PBWM but before the resourceIdGlobal.
+// IX. This lock is acquired after the PBWM and FCV locks but before the resourceIdGlobal.
extern const ResourceId resourceIdReplicationStateTransitionLock;
/**
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index 33567b900b8..fa7dd845bb4 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -144,8 +144,6 @@ bool LockerImpl::_shouldDelayUnlock(ResourceId resId, LockMode mode) const {
return false;
case RESOURCE_GLOBAL:
- case RESOURCE_PBWM:
- case RESOURCE_RSTL:
case RESOURCE_DATABASE:
case RESOURCE_COLLECTION:
case RESOURCE_METADATA:
@@ -437,8 +435,7 @@ bool LockerImpl::unlockGlobal() {
// error for any lock used with multi-granularity locking to have more references than
// the global lock, because every scope starts by calling lockGlobal.
const auto resType = it.key().getType();
- if (resType == RESOURCE_GLOBAL || resType == RESOURCE_PBWM || resType == RESOURCE_RSTL ||
- resType == RESOURCE_MUTEX) {
+ if (resType == RESOURCE_GLOBAL || resType == RESOURCE_MUTEX) {
it.next();
} else {
invariant(_unlockImpl(&it));
@@ -789,8 +786,9 @@ bool LockerImpl::saveLockStateAndUnlock(Locker::LockSnapshot* stateOut) {
// We should never have to save and restore metadata locks.
invariant(RESOURCE_DATABASE == resType || RESOURCE_COLLECTION == resType ||
- (RESOURCE_PBWM == resType && isSharedLockMode(it->mode)) ||
- (RESOURCE_RSTL == resType && it->mode == MODE_IX));
+ (resId == resourceIdParallelBatchWriterMode && isSharedLockMode(it->mode)) ||
+ resId == resourceIdFeatureCompatibilityVersion ||
+ (resId == resourceIdReplicationStateTransitionLock && it->mode == MODE_IX));
// And, stuff the info into the out parameter.
OneLock info;
@@ -890,7 +888,7 @@ LockResult LockerImpl::_lockBegin(OperationContext* opCtx, ResourceId resId, Loc
// Give priority to the full modes for Global, PBWM, and RSTL resources so we don't stall global
// operations such as shutdown or stepdown.
const ResourceType resType = resId.getType();
- if (resType == RESOURCE_GLOBAL || resType == RESOURCE_PBWM || resType == RESOURCE_RSTL) {
+ if (resType == RESOURCE_GLOBAL) {
if (mode == MODE_S || mode == MODE_X) {
request->enqueueAtFront = true;
request->compatibleFirst = true;
diff --git a/src/mongo/db/concurrency/lock_stats.cpp b/src/mongo/db/concurrency/lock_stats.cpp
index f724c90a240..980b4fd0a5e 100644
--- a/src/mongo/db/concurrency/lock_stats.cpp
+++ b/src/mongo/db/concurrency/lock_stats.cpp
@@ -42,9 +42,15 @@ LockStats<CounterType>::LockStats() {
template <typename CounterType>
void LockStats<CounterType>::report(BSONObjBuilder* builder) const {
- // All indexing below starts from offset 1, because we do not want to report/account
- // position 0, which is a sentinel value for invalid resource/no lock.
- for (int i = 1; i < ResourceTypesCount; i++) {
+ for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) {
+ _report(builder,
+ resourceGlobalIdName(static_cast<ResourceGlobalId>(i)),
+ _resourceGlobalStats[i]);
+ }
+
+ // Index starting from offset 2 because position 0 is a sentinel value for invalid resource/no
+ // lock, and position 1 is the global resource which was already reported above.
+ for (int i = 2; i < ResourceTypesCount; i++) {
_report(builder, resourceTypeName(static_cast<ResourceType>(i)), _stats[i]);
}
@@ -119,6 +125,12 @@ void LockStats<CounterType>::_report(BSONObjBuilder* builder,
template <typename CounterType>
void LockStats<CounterType>::reset() {
+ for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) {
+ for (uint8_t mode = 0; mode < LockModesCount; ++mode) {
+ _resourceGlobalStats[i].modeStats[mode].reset();
+ }
+ }
+
for (int i = 0; i < ResourceTypesCount; i++) {
for (int mode = 0; mode < LockModesCount; mode++) {
_stats[i].modeStats[mode].reset();
diff --git a/src/mongo/db/concurrency/lock_stats.h b/src/mongo/db/concurrency/lock_stats.h
index d45b3b38864..69855480393 100644
--- a/src/mongo/db/concurrency/lock_stats.h
+++ b/src/mongo/db/concurrency/lock_stats.h
@@ -141,6 +141,10 @@ public:
return _oplogStats.modeStats[mode];
}
+ if (resId.getType() == RESOURCE_GLOBAL) {
+ return _resourceGlobalStats[resId.getHashId()].modeStats[mode];
+ }
+
return _stats[resId.getType()].modeStats[mode];
}
@@ -148,7 +152,15 @@ public:
void append(const LockStats<OtherType>& other) {
typedef LockStatCounters<OtherType> OtherLockStatCountersType;
- // Append all lock stats
+ // Append global lock stats.
+ for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) {
+ for (uint8_t mode = 0; mode < LockModesCount; ++mode) {
+ _resourceGlobalStats[i].modeStats[mode].append(
+ other._resourceGlobalStats[i].modeStats[mode]);
+ }
+ }
+
+ // Append all non-global, non-oplog lock stats.
for (int i = 0; i < ResourceTypesCount; i++) {
for (int mode = 0; mode < LockModesCount; mode++) {
const OtherLockStatCountersType& otherStats = other._stats[i].modeStats[mode];
@@ -169,6 +181,13 @@ public:
void subtract(const LockStats<OtherType>& other) {
typedef LockStatCounters<OtherType> OtherLockStatCountersType;
+ for (uint8_t i = 0; i < static_cast<uint8_t>(ResourceGlobalId::kNumIds); ++i) {
+ for (uint8_t mode = 0; mode < LockModesCount; ++mode) {
+ _resourceGlobalStats[i].modeStats[mode].subtract(
+ other._resourceGlobalStats[i].modeStats[mode]);
+ }
+ }
+
for (int i = 0; i < ResourceTypesCount; i++) {
for (int mode = 0; mode < LockModesCount; mode++) {
const OtherLockStatCountersType& otherStats = other._stats[i].modeStats[mode];
@@ -206,8 +225,10 @@ private:
const PerModeLockStatCounters& stat) const;
- // Split the lock stats per resource type. Special-case the oplog so we can collect more
- // detailed stats for it.
+ // For the global resource, split the lock stats per ID since each one should be reported
+ // separately. For the remaining resources, split the lock stats per resource type. Special-case
+ // the oplog so we can collect more detailed stats for it.
+ PerModeLockStatCounters _resourceGlobalStats[static_cast<uint8_t>(ResourceGlobalId::kNumIds)];
PerModeLockStatCounters _stats[ResourceTypesCount];
PerModeLockStatCounters _oplogStats;
};
diff --git a/src/mongo/db/concurrency/locker.h b/src/mongo/db/concurrency/locker.h
index 37b6dbeedb8..5945ab1857b 100644
--- a/src/mongo/db/concurrency/locker.h
+++ b/src/mongo/db/concurrency/locker.h
@@ -494,6 +494,18 @@ public:
}
/**
+ * If set to false, this opts out of conflicting with the barrier created by the
+ * setFeatureCompatibilityVersion command. Code that opts-out must be ok with writes being able
+ * to start under one FCV and complete under a different FCV.
+ */
+ void setShouldConflictWithSetFeatureCompatibilityVersion(bool newValue) {
+ _shouldConflictWithSetFeatureCompatibilityVersion = newValue;
+ }
+ bool shouldConflictWithSetFeatureCompatibilityVersion() const {
+ return _shouldConflictWithSetFeatureCompatibilityVersion;
+ }
+
+ /**
* If set to true, this opts out of a fatal assertion where operations which are holding open an
* oplog hole cannot try to acquire subsequent locks.
*/
@@ -570,6 +582,7 @@ protected:
private:
bool _shouldConflictWithSecondaryBatchApplication = true;
+ bool _shouldConflictWithSetFeatureCompatibilityVersion = true;
bool _shouldAllowLockAcquisitionOnTimestampedUnitOfWork = false;
bool _shouldAcquireTicket = true;
std::string _debugInfo; // Extra info about this locker for debugging purpose
@@ -634,6 +647,26 @@ private:
};
/**
+ * RAII-style class to opt out the FeatureCompatibilityVersion lock.
+ */
+class ShouldNotConflictWithSetFeatureCompatibilityVersionBlock {
+public:
+ explicit ShouldNotConflictWithSetFeatureCompatibilityVersionBlock(Locker* lockState)
+ : _lockState(lockState),
+ _originalShouldConflict(_lockState->shouldConflictWithSetFeatureCompatibilityVersion()) {
+ _lockState->setShouldConflictWithSetFeatureCompatibilityVersion(false);
+ }
+
+ ~ShouldNotConflictWithSetFeatureCompatibilityVersionBlock() {
+ _lockState->setShouldConflictWithSetFeatureCompatibilityVersion(_originalShouldConflict);
+ }
+
+private:
+ Locker* const _lockState;
+ const bool _originalShouldConflict;
+};
+
+/**
* RAII-style class to opt out of a fatal assertion where operations that set a timestamp on a
* WriteUnitOfWork cannot try to acquire subsequent locks. When an operation is writing at a
* specific timestamp, it creates an oplog hole at that timestamp. The oplog visibility rules only
diff --git a/src/mongo/db/s/transaction_coordinator_util.cpp b/src/mongo/db/s/transaction_coordinator_util.cpp
index 6c71a3071cb..776ead96215 100644
--- a/src/mongo/db/s/transaction_coordinator_util.cpp
+++ b/src/mongo/db/s/transaction_coordinator_util.cpp
@@ -351,6 +351,15 @@ repl::OpTime persistDecisionBlocking(OperationContext* opCtx,
sessionInfo.setTxnRetryCounter(*txnNumberAndRetryCounter.getTxnRetryCounter());
}
+ // The transaction participant is already holding a global IX lock (and therefore an FCV IX
+ // lock) when we get to this point. Since the setFCV command takes an FCV S lock, we can hit a
+ // deadlock if the setFCV enqueues its lock after the transaction participant has already
+ // acquired its lock, but before we (the transaction coordinator) acquire ours. To remedy this,
+ // we choose to bypass this barrier that setFCV creates. This is safe because the setFCV command
+ // drains any outstanding cross-shard transactions before completing an FCV upgrade/downgrade.
+ // It does so by waiting for the participant portion of the cross-shard transaction to have
+ // released its FCV IX lock.
+ ShouldNotConflictWithSetFeatureCompatibilityVersionBlock noFCVLock{opCtx->lockState()};
DBDirectClient client(opCtx);
// Throws if serializing the request or deserializing the response fails.
diff --git a/src/mongo/db/stats/fill_locker_info.cpp b/src/mongo/db/stats/fill_locker_info.cpp
index f78b3a991e4..1f73d4dcc72 100644
--- a/src/mongo/db/stats/fill_locker_info.cpp
+++ b/src/mongo/db/stats/fill_locker_info.cpp
@@ -41,12 +41,17 @@ void fillLockerInfo(const Locker::LockerInfo& lockerInfo, BSONObjBuilder& infoBu
BSONObjBuilder locks(infoBuilder.subobjStart("locks"));
const size_t locksSize = lockerInfo.locks.size();
- // Only add the last lock of each type, and use the largest mode encountered
- LockMode modeForType[ResourceTypesCount] = {}; // default initialize to zero (min value)
+ // Only add the last lock of each type, and use the largest mode encountered. Each type of
+ // global resource is reported as its own type.
+ LockMode modeForType[static_cast<uint8_t>(ResourceGlobalId::kNumIds) + ResourceTypesCount - 1] =
+ {}; // default initialize to zero (min value)
for (size_t i = 0; i < locksSize; i++) {
const Locker::OneLock& lock = lockerInfo.locks[i];
const ResourceType lockType = lock.resourceId.getType();
- const LockMode lockMode = std::max(lock.mode, modeForType[lockType]);
+ auto index = lockType == RESOURCE_GLOBAL
+ ? lock.resourceId.getHashId()
+ : static_cast<uint8_t>(ResourceGlobalId::kNumIds) + lockType - 1;
+ const LockMode lockMode = std::max(lock.mode, modeForType[index]);
// Check that lockerInfo is sorted on resource type
invariant(i == 0 || lockType >= lockerInfo.locks[i - 1].resourceId.getType());
@@ -56,10 +61,16 @@ void fillLockerInfo(const Locker::LockerInfo& lockerInfo, BSONObjBuilder& infoBu
continue;
}
- modeForType[lockType] = lockMode;
+ modeForType[index] = lockMode;
- if (i + 1 < locksSize && lockerInfo.locks[i + 1].resourceId.getType() == lockType) {
+ if (i + 1 < locksSize && lockerInfo.locks[i + 1].resourceId.getType() == lockType &&
+ (lockType != RESOURCE_GLOBAL ||
+ lock.resourceId.getHashId() == lockerInfo.locks[i + 1].resourceId.getHashId())) {
continue; // skip this lock as it is not the last one of its type
+ } else if (lockType == RESOURCE_GLOBAL) {
+ locks.append(
+ resourceGlobalIdName(static_cast<ResourceGlobalId>(lock.resourceId.getHashId())),
+ legacyModeName(lockMode));
} else {
locks.append(resourceTypeName(lockType), legacyModeName(lockMode));
}
diff --git a/src/mongo/db/storage/control/journal_flusher.cpp b/src/mongo/db/storage/control/journal_flusher.cpp
index 212f51cd88f..765d826cda8 100644
--- a/src/mongo/db/storage/control/journal_flusher.cpp
+++ b/src/mongo/db/storage/control/journal_flusher.cpp
@@ -87,13 +87,20 @@ void JournalFlusher::run() {
[&] { return _flushJournalNow || _needToPause || _shuttingDown; });
}
- // Initialize the thread's opCtx.
- _uniqueCtx.emplace(tc->makeOperationContext());
+ auto setUpOpCtx = [&] {
+ // Initialize the thread's opCtx.
+ _uniqueCtx.emplace(tc->makeOperationContext());
- // Updates to a non-replicated collection, oplogTruncateAfterPoint, are made by this thread.
- // Non-replicated writes will not contribute to replication lag and can be safely excluded
- // from Flow Control.
- _uniqueCtx->get()->setShouldParticipateInFlowControl(false);
+ // Updates to a non-replicated collection, oplogTruncateAfterPoint, are made by this thread.
+ // Non-replicated writes will not contribute to replication lag and can be safely excluded
+ // from Flow Control.
+ _uniqueCtx->get()->setShouldParticipateInFlowControl(false);
+
+ // The journal flusher should not conflict with the setFCV command.
+ _uniqueCtx->get()->lockState()->setShouldConflictWithSetFeatureCompatibilityVersion(false);
+ };
+
+ setUpOpCtx();
while (true) {
pauseJournalFlusherBeforeFlush.pauseWhileSet();
try {
@@ -107,8 +114,7 @@ void JournalFlusher::run() {
// the time during or before the next flush.
stdx::lock_guard<Latch> lk(_opCtxMutex);
_uniqueCtx.reset();
- _uniqueCtx.emplace(tc->makeOperationContext());
- _uniqueCtx->get()->setShouldParticipateInFlowControl(false);
+ setUpOpCtx();
});
_uniqueCtx->get()->recoveryUnit()->waitUntilDurable(_uniqueCtx->get());