summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingzhi Deng <lingzhi.deng@mongodb.com>2021-05-07 21:53:44 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2021-05-07 22:30:08 +0000
commit1690976bdb4bab7d03641391d72391394d23085c (patch)
tree6d3a55ef211cb6e7949e87a5e45d1285143be9cb
parenta243d0222bd009020fe0152359536823af36f986 (diff)
downloadmongo-1690976bdb4bab7d03641391d72391394d23085c.tar.gz
SERVER-55578: Disallow atClusterTime reads and afterClusterTime majority reads on config.transactions
-rw-r--r--jstests/noPassthrough/config_transactions_reads.js37
-rw-r--r--src/mongo/db/db_raii.cpp28
-rw-r--r--src/mongo/db/repl/read_concern_args.cpp10
-rw-r--r--src/mongo/db/repl/read_concern_args.h8
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp1
-rw-r--r--src/mongo/db/s/resharding/resharding_txn_cloner.cpp4
6 files changed, 81 insertions, 7 deletions
diff --git a/jstests/noPassthrough/config_transactions_reads.js b/jstests/noPassthrough/config_transactions_reads.js
new file mode 100644
index 00000000000..d5ec5dd5f92
--- /dev/null
+++ b/jstests/noPassthrough/config_transactions_reads.js
@@ -0,0 +1,37 @@
+/* Test that snapshot reads and afterClusterTime majority reads are not allowed on
+ * config.transactions.
+ *
+ * @tags: [
+ * requires_majority_read_concern,
+ * requires_persistence,
+ * requires_replication,
+ * ]
+ */
+(function() {
+"use strict";
+
+const replSet = new ReplSetTest({nodes: 1});
+
+replSet.startSet();
+replSet.initiate();
+
+const primary = replSet.getPrimary();
+const primaryDB = primary.getDB('config');
+
+const operationTime =
+ assert.commandWorked(primaryDB.runCommand({find: "transactions"})).operationTime;
+assert.commandWorked(
+ primaryDB.runCommand({find: "transactions", readConcern: {level: "majority"}}));
+assert.commandFailedWithCode(
+ primaryDB.runCommand(
+ {find: "transactions", readConcern: {level: "majority", afterClusterTime: operationTime}}),
+ 5557800);
+assert.commandFailedWithCode(
+ primaryDB.runCommand({find: "transactions", readConcern: {level: "snapshot"}}), 5557800);
+assert.commandFailedWithCode(
+ primaryDB.runCommand(
+ {find: "transactions", readConcern: {level: "snapshot", atClusterTime: operationTime}}),
+ 5557800);
+
+replSet.stopSet();
+})();
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index b2052bb8080..7a0eda9b506 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -254,6 +254,30 @@ AutoGetCollectionForReadBase<AutoGetCollectionType, EmplaceAutoCollFunc>::
!coll->isCapped() ||
readConcernLevel != repl::ReadConcernLevel::kSnapshotReadConcern);
+ // Disallow snapshot reads and causal consistent majority reads on config.transactions
+ // outside of transactions to avoid running the collection at a point-in-time in the middle
+ // of a secondary batch. Such reads are unsafe because config.transactions updates are
+ // coalesced on secondaries. Majority reads without an afterClusterTime is allowed because
+ // they are allowed to return arbitrarily stale data. We allow kNoTimestamp and kLastApplied
+ // reads because they must be from internal readers given the snapshot/majority readConcern
+ // (e.g. for session checkout).
+ const NamespaceString nss = coll->ns();
+ const auto afterClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAfterClusterTime();
+ const auto allowTransactionTableSnapshot =
+ repl::ReadConcernArgs::get(opCtx).allowTransactionTableSnapshot();
+ auto readSource = opCtx->recoveryUnit()->getTimestampReadSource();
+ if (nss == NamespaceString::kSessionTransactionsTableNamespace &&
+ readSource != RecoveryUnit::ReadSource::kNoTimestamp &&
+ readSource != RecoveryUnit::ReadSource::kLastApplied &&
+ ((readConcernLevel == repl::ReadConcernLevel::kSnapshotReadConcern &&
+ !allowTransactionTableSnapshot) ||
+ (readConcernLevel == repl::ReadConcernLevel::kMajorityReadConcern &&
+ afterClusterTime))) {
+ uasserted(5557800,
+ "Snapshot reads and causal consistent majority reads on config.transactions "
+ "are not supported");
+ }
+
// During batch application on secondaries, there is a potential to read inconsistent states
// that would normally be protected by the PBWM lock. In order to serve secondary reads
// during this period, we default to not acquiring the lock (by setting
@@ -262,9 +286,6 @@ AutoGetCollectionForReadBase<AutoGetCollectionType, EmplaceAutoCollFunc>::
// guarantee we read at a consistent state, so we must read at the lastApplied timestamp,
// which is set after each complete batch.
- const NamespaceString nss = coll->ns();
- auto readSource = opCtx->recoveryUnit()->getTimestampReadSource();
-
// Once we have our locks, check whether or not we should override the ReadSource that was
// set before acquiring locks.
auto [newReadSource, shouldReadAtLastApplied] =
@@ -275,7 +296,6 @@ AutoGetCollectionForReadBase<AutoGetCollectionType, EmplaceAutoCollFunc>::
}
const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx);
- const auto afterClusterTime = repl::ReadConcernArgs::get(opCtx).getArgsAfterClusterTime();
if (readTimestamp && afterClusterTime) {
// Readers that use afterClusterTime have already waited at a higher level for the
// all_durable time to advance to a specified optime, and they assume the read timestamp
diff --git a/src/mongo/db/repl/read_concern_args.cpp b/src/mongo/db/repl/read_concern_args.cpp
index 27c949b1d63..a95cf27880f 100644
--- a/src/mongo/db/repl/read_concern_args.cpp
+++ b/src/mongo/db/repl/read_concern_args.cpp
@@ -187,6 +187,12 @@ Status ReadConcernArgs::parse(const BSONObj& readConcernObj) {
<< readConcernLevels::kAvailableName << "', or '"
<< readConcernLevels::kSnapshotName << "'");
}
+ } else if (fieldName == kAllowTransactionTableSnapshot) {
+ auto status = bsonExtractBooleanField(
+ readConcernObj, kAllowTransactionTableSnapshot, &_allowTransactionTableSnapshot);
+ if (!status.isOK()) {
+ return status;
+ }
} else if (fieldName == ReadWriteConcernProvenance::kSourceFieldName) {
try {
_provenance = ReadWriteConcernProvenance::parse(
@@ -298,6 +304,10 @@ void ReadConcernArgs::_appendInfoInner(BSONObjBuilder* builder) const {
builder->append(kAtClusterTimeFieldName, _atClusterTime->asTimestamp());
}
+ if (_allowTransactionTableSnapshot) {
+ builder->append(kAllowTransactionTableSnapshot, _allowTransactionTableSnapshot);
+ }
+
_provenance.serialize(builder);
}
diff --git a/src/mongo/db/repl/read_concern_args.h b/src/mongo/db/repl/read_concern_args.h
index 5c8b0d101fd..2bc1b5bdee4 100644
--- a/src/mongo/db/repl/read_concern_args.h
+++ b/src/mongo/db/repl/read_concern_args.h
@@ -54,6 +54,8 @@ public:
static constexpr StringData kAfterClusterTimeFieldName = "afterClusterTime"_sd;
static constexpr StringData kAtClusterTimeFieldName = "atClusterTime"_sd;
static constexpr StringData kLevelFieldName = "level"_sd;
+ static constexpr StringData kAllowTransactionTableSnapshot =
+ "$_allowTransactionTableSnapshot"_sd;
static const BSONObj kImplicitDefault;
@@ -213,6 +215,10 @@ public:
return _atClusterTimeSelected;
}
+ bool allowTransactionTableSnapshot() const {
+ return _allowTransactionTableSnapshot;
+ }
+
private:
/**
* Appends level, afterOpTime, and the other "inner" fields of the read concern args.
@@ -249,6 +255,8 @@ private:
ReadWriteConcernProvenance _provenance;
bool _atClusterTimeSelected = false;
+
+ bool _allowTransactionTableSnapshot = false;
};
} // namespace repl
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index 37e099b14b1..8ce9d416084 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -890,7 +890,6 @@ TenantMigrationRecipientService::Instance::_makeCommittedTransactionsAggregation
std::move(serializedPipeline));
auto readConcern = repl::ReadConcernArgs(
- boost::optional<LogicalTime>(startFetchingTimestamp),
boost::optional<repl::ReadConcernLevel>(repl::ReadConcernLevel::kMajorityReadConcern));
aggRequest.setReadConcern(readConcern.toBSONInner());
diff --git a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
index dbbe4ca3049..161c3ef9533 100644
--- a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
+++ b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
@@ -127,8 +127,8 @@ std::unique_ptr<Pipeline, PipelineDeleter> ReshardingTxnCloner::_targetAggregati
request.setReadConcern(BSON(repl::ReadConcernArgs::kLevelFieldName
<< repl::readConcernLevels::kSnapshotName
- << repl::ReadConcernArgs::kAtClusterTimeFieldName
- << _fetchTimestamp));
+ << repl::ReadConcernArgs::kAtClusterTimeFieldName << _fetchTimestamp
+ << repl::ReadConcernArgs::kAllowTransactionTableSnapshot << true));
request.setWriteConcern(WriteConcernOptions());
request.setHint(BSON(SessionTxnRecord::kSessionIdFieldName << 1));
request.setUnwrappedReadPref(ReadPreferenceSetting{ReadPreference::Nearest}.toContainingBSON());