summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosef Ahmad <josef.ahmad@mongodb.com>2022-08-31 07:01:16 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-08-31 08:03:39 +0000
commita4bd3ce3607d2c3020d7efa3501240ae4b1a1b03 (patch)
treef791f99354476f893c1b79356accfb3a50335f92
parent6ffbe0d149bf9419b227a0fa80928437c92ed8c5 (diff)
downloadmongo-a4bd3ce3607d2c3020d7efa3501240ae4b1a1b03.tar.gz
SERVER-67538 Make multi-doc txns return WCE on index catalog changes
Background: SERVER-47866 stopped bumping the collection's minimum visibility timestamp on catalog changes related to an index; only the index's minimum visibility snapshot continues to be updated. One side effect of this change is that a multi-document transaction can read a at a snapshot where the index is not yet ready and commit at a timestamp when the index is ready, which not intended behaviour and can open the opportunity for a race to happen. This patch introduces a check for the indices' minimum visible timestamp. Attempting to write to an index entry while reading at an incompatible timestamp returns a write conflict exception. Locking rules guarantee that we see a consistent in-memory view of the indices' minimum visible snapshot.
-rw-r--r--jstests/noPassthrough/txn_index_catalog_changes.js80
-rw-r--r--src/mongo/db/index/index_access_method.cpp20
2 files changed, 100 insertions, 0 deletions
diff --git a/jstests/noPassthrough/txn_index_catalog_changes.js b/jstests/noPassthrough/txn_index_catalog_changes.js
new file mode 100644
index 00000000000..b29a8129a8b
--- /dev/null
+++ b/jstests/noPassthrough/txn_index_catalog_changes.js
@@ -0,0 +1,80 @@
+/**
+ * Verifies that a multi-document transaction aborts with WriteConflictError if an index build has
+ * committed since the transaction's read snapshot.
+ *
+ * @tags: [
+ * requires_replication,
+ * ]
+ */
+(function() {
+'use strict';
+
+const replTest = new ReplSetTest({nodes: 2});
+replTest.startSet();
+replTest.initiate();
+
+const primary = replTest.getPrimary();
+const db = primary.getDB('test');
+
+// Transaction inserting an index key.
+{
+ assert.commandWorked(db['c'].insertOne({_id: 0, num: 0}));
+
+ const s0 = db.getMongo().startSession();
+ s0.startTransaction();
+ assert.commandWorked(s0.getDatabase('test')['c'].deleteOne({_id: 0}));
+ s0.commitTransaction();
+
+ const clusterTime = s0.getClusterTime().clusterTime;
+
+ assert.commandWorked(db['c'].createIndex({num: 1}));
+
+ // Start a transaction whose snapshot predates the completion of the index build, and which
+ // reserves an oplog entry after the index build commits.
+ try {
+ const s1 = db.getMongo().startSession();
+ s1.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}});
+ s1.getDatabase('test').c.insertOne({_id: 1, num: 1});
+
+ // Transaction should have failed.
+ assert(0);
+ } catch (e) {
+ assert(e.hasOwnProperty("errorLabels"), tojson(e));
+ assert.contains("TransientTransactionError", e.errorLabels, tojson(e));
+ assert.eq(e["code"], ErrorCodes.WriteConflict, tojson(e));
+ }
+}
+
+db.c.drop();
+
+// Transaction deleting an index key.
+{
+ assert.commandWorked(db.createCollection('c'));
+
+ const s0 = db.getMongo().startSession();
+ s0.startTransaction();
+ assert.commandWorked(s0.getDatabase('test')['c'].insertOne({_id: 0, num: 0}));
+ s0.commitTransaction();
+
+ const clusterTime = s0.getClusterTime().clusterTime;
+
+ assert.commandWorked(db['c'].createIndex({num: 1}));
+
+ // Start a transaction whose snapshot predates the completion of the index build, and which
+ // reserves an oplog entry after the index build commits.
+ try {
+ const s1 = db.getMongo().startSession();
+ s1.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}});
+ s1.getDatabase('test').c.deleteOne({_id: 0});
+
+ // Transaction should have failed.
+ assert(0);
+ } catch (e) {
+ assert(e.hasOwnProperty("errorLabels"), tojson(e));
+ assert.contains("TransientTransactionError", e.errorLabels, tojson(e));
+ assert.eq(e["code"], ErrorCodes.WriteConflict, tojson(e));
+ }
+}
+
+replTest.stopSet();
+})();
diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp
index f7bba190c2e..cac540aae8f 100644
--- a/src/mongo/db/index/index_access_method.cpp
+++ b/src/mongo/db/index/index_access_method.cpp
@@ -1116,6 +1116,17 @@ Status SortedDataIndexAccessMethod::_indexKeysOrWriteToSideTable(
*keysInsertedOut += inserted;
}
} else {
+ // Ensure that our snapshot is compatible with the index's minimum visibile snapshot.
+ const auto minVisibleTimestamp = _indexCatalogEntry->getMinimumVisibleSnapshot();
+ const auto readTimestamp =
+ opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx).value_or(
+ opCtx->recoveryUnit()->getCatalogConflictingTimestamp());
+ if (minVisibleTimestamp && !readTimestamp.isNull() &&
+ readTimestamp < *minVisibleTimestamp) {
+ throwWriteConflictException(
+ "Unable to read from a snapshot due to pending catalog changes.");
+ }
+
int64_t numInserted = 0;
status = insertKeysAndUpdateMultikeyPaths(
opCtx,
@@ -1178,6 +1189,15 @@ void SortedDataIndexAccessMethod::_unindexKeysOrWriteToSideTable(
options.dupsAllowed = options.dupsAllowed || !_indexCatalogEntry->isReady(opCtx) ||
(checkRecordId == CheckRecordId::On);
+ // Ensure that our snapshot is compatible with the index's minimum visibile snapshot.
+ const auto minVisibleTimestamp = _indexCatalogEntry->getMinimumVisibleSnapshot();
+ const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx).value_or(
+ opCtx->recoveryUnit()->getCatalogConflictingTimestamp());
+ if (minVisibleTimestamp && !readTimestamp.isNull() && readTimestamp < *minVisibleTimestamp) {
+ throwWriteConflictException(
+ "Unable to read from a snapshot due to pending catalog changes.");
+ }
+
int64_t removed = 0;
Status status = removeKeys(opCtx, keys, options, &removed);