From fd61bf1c3764ee079d3dcde44a2eb24352c22ae5 Mon Sep 17 00:00:00 2001 From: Josef Ahmad Date: Wed, 31 Aug 2022 07:01:16 +0000 Subject: SERVER-67538 Make multi-doc txns return WCE on index catalog changes Background: SERVER-47866 stopped bumping the collection's minimum visibility timestamp on catalog changes related to an index; only the index's minimum visibility snapshot continues to be updated. One side effect of this change is that a multi-document transaction can read a at a snapshot where the index is not yet ready and commit at a timestamp when the index is ready, which not intended behaviour and can open the opportunity for a race to happen. This patch introduces a check for the indices' minimum visible timestamp. Attempting to write to an index entry while reading at an incompatible timestamp returns a write conflict exception. Locking rules guarantee that we see a consistent in-memory view of the indices' minimum visible snapshot. (cherry picked from commit a4bd3ce3607d2c3020d7efa3501240ae4b1a1b03) --- jstests/noPassthrough/txn_index_catalog_changes.js | 80 ++++++++++++++++++++++ src/mongo/db/index/index_access_method.cpp | 20 ++++++ 2 files changed, 100 insertions(+) create mode 100644 jstests/noPassthrough/txn_index_catalog_changes.js diff --git a/jstests/noPassthrough/txn_index_catalog_changes.js b/jstests/noPassthrough/txn_index_catalog_changes.js new file mode 100644 index 00000000000..b29a8129a8b --- /dev/null +++ b/jstests/noPassthrough/txn_index_catalog_changes.js @@ -0,0 +1,80 @@ +/** + * Verifies that a multi-document transaction aborts with WriteConflictError if an index build has + * committed since the transaction's read snapshot. + * + * @tags: [ + * requires_replication, + * ] + */ +(function() { +'use strict'; + +const replTest = new ReplSetTest({nodes: 2}); +replTest.startSet(); +replTest.initiate(); + +const primary = replTest.getPrimary(); +const db = primary.getDB('test'); + +// Transaction inserting an index key. +{ + assert.commandWorked(db['c'].insertOne({_id: 0, num: 0})); + + const s0 = db.getMongo().startSession(); + s0.startTransaction(); + assert.commandWorked(s0.getDatabase('test')['c'].deleteOne({_id: 0})); + s0.commitTransaction(); + + const clusterTime = s0.getClusterTime().clusterTime; + + assert.commandWorked(db['c'].createIndex({num: 1})); + + // Start a transaction whose snapshot predates the completion of the index build, and which + // reserves an oplog entry after the index build commits. + try { + const s1 = db.getMongo().startSession(); + s1.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}}); + s1.getDatabase('test').c.insertOne({_id: 1, num: 1}); + + // Transaction should have failed. + assert(0); + } catch (e) { + assert(e.hasOwnProperty("errorLabels"), tojson(e)); + assert.contains("TransientTransactionError", e.errorLabels, tojson(e)); + assert.eq(e["code"], ErrorCodes.WriteConflict, tojson(e)); + } +} + +db.c.drop(); + +// Transaction deleting an index key. +{ + assert.commandWorked(db.createCollection('c')); + + const s0 = db.getMongo().startSession(); + s0.startTransaction(); + assert.commandWorked(s0.getDatabase('test')['c'].insertOne({_id: 0, num: 0})); + s0.commitTransaction(); + + const clusterTime = s0.getClusterTime().clusterTime; + + assert.commandWorked(db['c'].createIndex({num: 1})); + + // Start a transaction whose snapshot predates the completion of the index build, and which + // reserves an oplog entry after the index build commits. + try { + const s1 = db.getMongo().startSession(); + s1.startTransaction({readConcern: {level: "snapshot", atClusterTime: clusterTime}}); + s1.getDatabase('test').c.deleteOne({_id: 0}); + + // Transaction should have failed. + assert(0); + } catch (e) { + assert(e.hasOwnProperty("errorLabels"), tojson(e)); + assert.contains("TransientTransactionError", e.errorLabels, tojson(e)); + assert.eq(e["code"], ErrorCodes.WriteConflict, tojson(e)); + } +} + +replTest.stopSet(); +})(); diff --git a/src/mongo/db/index/index_access_method.cpp b/src/mongo/db/index/index_access_method.cpp index f7bba190c2e..cac540aae8f 100644 --- a/src/mongo/db/index/index_access_method.cpp +++ b/src/mongo/db/index/index_access_method.cpp @@ -1116,6 +1116,17 @@ Status SortedDataIndexAccessMethod::_indexKeysOrWriteToSideTable( *keysInsertedOut += inserted; } } else { + // Ensure that our snapshot is compatible with the index's minimum visibile snapshot. + const auto minVisibleTimestamp = _indexCatalogEntry->getMinimumVisibleSnapshot(); + const auto readTimestamp = + opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx).value_or( + opCtx->recoveryUnit()->getCatalogConflictingTimestamp()); + if (minVisibleTimestamp && !readTimestamp.isNull() && + readTimestamp < *minVisibleTimestamp) { + throwWriteConflictException( + "Unable to read from a snapshot due to pending catalog changes."); + } + int64_t numInserted = 0; status = insertKeysAndUpdateMultikeyPaths( opCtx, @@ -1178,6 +1189,15 @@ void SortedDataIndexAccessMethod::_unindexKeysOrWriteToSideTable( options.dupsAllowed = options.dupsAllowed || !_indexCatalogEntry->isReady(opCtx) || (checkRecordId == CheckRecordId::On); + // Ensure that our snapshot is compatible with the index's minimum visibile snapshot. + const auto minVisibleTimestamp = _indexCatalogEntry->getMinimumVisibleSnapshot(); + const auto readTimestamp = opCtx->recoveryUnit()->getPointInTimeReadTimestamp(opCtx).value_or( + opCtx->recoveryUnit()->getCatalogConflictingTimestamp()); + if (minVisibleTimestamp && !readTimestamp.isNull() && readTimestamp < *minVisibleTimestamp) { + throwWriteConflictException( + "Unable to read from a snapshot due to pending catalog changes."); + } + int64_t removed = 0; Status status = removeKeys(opCtx, keys, options, &removed); -- cgit v1.2.1