summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLouis Williams <louis.williams@mongodb.com>2020-11-10 17:07:36 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-11-11 00:21:42 +0000
commit523247d096a796c15c911370e622a3614411a25b (patch)
tree1c22df8471d85f79fce4fed8ecaad68be1122f9e
parent9f0d4a3e43786d006e96babad79b94bfcf5e8dc8 (diff)
downloadmongo-523247d096a796c15c911370e622a3614411a25b.tar.gz
SERVER-48002 Do not enforce DataCorruptionDetected assertion when ignoring prepare conflicts
Snapshot isolation cannot be guaranteed for operations that ignore prepare conflicts. This means that two reads of the same record in the same snapshot can return different results. In practice, this can lead to false positive DataCorrutionDetected assertions.
-rw-r--r--jstests/replsets/query_with_txn_prepared.js63
-rw-r--r--src/mongo/db/exec/working_set_common.cpp7
2 files changed, 69 insertions, 1 deletions
diff --git a/jstests/replsets/query_with_txn_prepared.js b/jstests/replsets/query_with_txn_prepared.js
new file mode 100644
index 00000000000..fcf05002310
--- /dev/null
+++ b/jstests/replsets/query_with_txn_prepared.js
@@ -0,0 +1,63 @@
+/**
+ * This test attempts to reproduce the bug described in SERVER-48002. This is a best-effort test
+ * that will not detect this bug on every run, even if a bug exists.
+ *
+ * Snapshot isolation cannot be guaranteed for operations that ignore prepare conflicts.
+ * This means that two reads of the same record in the same snapshot can return different results.
+ * In practice, the DataCorruptionDetected assertion added by SERVER-40620 will trigger if an index
+ * points to a non-existent record.
+ *
+ * Queries that ignore prepare conflicts and use an index to satisfy a read can read a key from an
+ * index and fetch a record that appears to go missing within the same snapshot. This may happen
+ * when the collection read races with a prepared transaction that commits and deletes the record.
+ *
+ *
+ * @tags: [
+ * uses_prepare_transaction,
+ * uses_transactions,
+ * ]
+ */
+(function() {
+"use strict";
+
+const replTest = new ReplSetTest({nodes: 2});
+replTest.startSet();
+replTest.initiate();
+
+const primary = replTest.getPrimary();
+const dbName = "query_with_txn_prepared";
+const collName = "coll";
+
+assert.commandWorked(primary.getDB(dbName)[collName].createIndexes([{x: 1}]));
+
+const transactionShell = startParallelShell(function() {
+ load("jstests/core/txns/libs/prepare_helpers.js"); // For PrepareHelpers.
+
+ while (db.getSiblingDB("query_with_txn_prepared")["stopQueries"].find().count() == 0) {
+ for (let i = 0; i < 100; ++i) {
+ const session = db.getMongo().startSession();
+ const sessionColl = session.getDatabase("query_with_txn_prepared")["coll"];
+
+ session.startTransaction({readConcern: {level: "majority"}});
+ if (Math.random() < 0.5) {
+ assert.commandWorked(sessionColl.update({x: 1}, {x: 1}, {upsert: true}));
+ } else {
+ assert.commandWorked(sessionColl.remove({x: 1}));
+ }
+
+ const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+ assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp));
+ }
+ }
+}, primary.port);
+
+for (let i = 0; i < 2000; ++i) {
+ const result = primary.getDB(dbName)[collName].find({x: 1}).toArray();
+ assert([0, 1].includes(result.length), result);
+}
+
+assert.commandWorked(primary.getDB(dbName)["stopQueries"].insert({stop: 1}));
+transactionShell();
+
+replTest.stopSet();
+}());
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index c6083642478..33a9f6074e7 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -82,9 +82,14 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
// The record referenced by this index entry is gone. If the query yielded some time after
// we first examined the index entry, then it's likely that the record was deleted while we
// were yielding. However, if the snapshot id hasn't changed since the index lookup, then
- // there could not have been a yield, and the only explanation is corruption.
+ // there could not have been a yield, meaning the document we are searching for has been
+ // deleted.
+ // One possibility is that the record was deleted by a prepared transaction, but if we are
+ // not ignoring prepare conflicts, then this definitely indicates an error.
std::vector<IndexKeyDatum>::iterator keyDataIt;
if (member->getState() == WorkingSetMember::RID_AND_IDX &&
+ opCtx->recoveryUnit()->getPrepareConflictBehavior() ==
+ PrepareConflictBehavior::kEnforce &&
(keyDataIt = std::find_if(member->keyData.begin(),
member->keyData.end(),
[currentSnapshotId = opCtx->recoveryUnit()->getSnapshotId()](