SERVER-48002 Do not enforce DataCorruptionDetected assertion when ignoring prepare conflicts

Snapshot isolation cannot be guaranteed for operations that ignore prepare conflicts. This means that two reads of the same record in the same snapshot can return different results. In practice, this can lead to false positive DataCorrutionDetected assertions.
author: Louis Williams <louis.williams@mongodb.com> 2020-11-18 14:18:08 -0500
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-11-18 21:00:20 +0000
commit: a01be7ed8e475775ebec57dd6291c3cf5cd33ccf (patch)
tree: f02abe85616a83775a1b96a677d4d0cbbc1cc088
parent: 3aaf530850346c3e80a4e5f891c666b6a31e8809 (diff)
download: mongo-a01be7ed8e475775ebec57dd6291c3cf5cd33ccf.tar.gz
3 files changed, 101 insertions, 21 deletions
diff --git a/jstests/noPassthrough/query_yields_catch_index_corruption.js b/jstests/noPassthrough/query_yields_catch_index_corruption.js
index 99efd963bf1..6f1d7ac3c45 100644
--- a/jstests/noPassthrough/query_yields_catch_index_corruption.js
+++ b/jstests/noPassthrough/query_yields_catch_index_corruption.js
@@ -2,25 +2,23 @@
 (function() {
 "use strict";
 
-const name = "query_yields_catch_index_corruption";
-const dbpath = MongoRunner.dataPath + name + "/";
+const dbName = "test";
+const collName = "query_yields_catch_index_corruption";
 
-resetDbpath(dbpath);
+const replSet = new ReplSetTest({nodes: 1});
+replSet.startSet();
+replSet.initiate();
 
-let mongod = MongoRunner.runMongod({dbpath: dbpath});
-assert.neq(null, mongod, "mongod failed to start.");
-
-let db = mongod.getDB("test");
+const primary = replSet.getPrimary();
 
+let db = primary.getDB(dbName);
 assert.commandWorked(db.adminCommand({
     configureFailPoint: "skipUnindexingDocumentWhenDeleted",
     mode: "alwaysOn",
     data: {indexName: "a_1_b_1"}
 }));
 
-let coll = db.getCollection(name);
-coll.drop();
-
+let coll = db.getCollection(collName);
 assert.commandWorked(coll.createIndex({a: 1, b: 1}));
 
 // Corrupt the collection by inserting a document and then deleting it without deleting its index
@@ -34,9 +32,22 @@ function createDanglingIndexEntry(doc) {
     assert.eq(false, validateRes.valid);
 
     // A query that accesses the now dangling index entry should fail with a
-    // "DataCorruptionDetected" error.
-    const error = assert.throws(() => coll.find(doc).toArray());
-    assert.eq(error.code, ErrorCodes.DataCorruptionDetected, error);
+    // "DataCorruptionDetected" error. Most reads will not detect this problem because they ignore
+    // prepare conflicts by default and that exempts them from checking this assertion. Only writes
+    // and reads in multi-document transactions enforce prepare conflicts and should encounter this
+    // assertion.
+    assert.commandFailedWithCode(coll.update(doc, {$set: {c: 1}}),
+                                 ErrorCodes.DataCorruptionDetected);
+
+    const session = db.getMongo().startSession();
+    const sessionDB = session.getDatabase(dbName);
+    session.startTransaction();
+
+    const error = assert.throws(() => {
+        sessionDB[collName].find(doc).toArray();
+    });
+    assert.eq(error.code, ErrorCodes.DataCorruptionDetected);
+    session.abortTransaction_forTesting();
 }
 
 createDanglingIndexEntry({a: 1, b: 1});
@@ -51,19 +62,19 @@ assert.eq(true, validateRes.valid, tojson(validateRes));
 // Reintroduce the dangling index entry, and this time fix it using the "repair" flag.
 createDanglingIndexEntry({a: 1, b: 1});
 
-MongoRunner.stopMongod(mongod, MongoRunner.EXIT_CLEAN, {skipValidation: true});
-mongod = MongoRunner.runMongod({dbpath: dbpath, noCleanData: true, repair: ""});
+const dbpath = replSet.getDbPath(primary);
+replSet.stopSet(MongoRunner.EXIT_CLEAN, true /* forRestart */, {skipValidation: true});
+
+let mongod = MongoRunner.runMongod({dbpath: dbpath, noCleanData: true, repair: ""});
 assert.eq(null, mongod, "Expect this to exit cleanly");
 
-// Verify that the server starts up successfully after the repair and that validate() now succeeds.
+// Verify that the server starts up successfully after the repair.
 mongod = MongoRunner.runMongod({dbpath: dbpath, noCleanData: true});
 assert.neq(null, mongod, "mongod failed to start after repair");
 
 db = mongod.getDB("test");
-coll = db.getCollection(name);
-
-validateRes = assert.commandWorked(coll.validate());
-assert.eq(true, validateRes.valid, tojson(validateRes));
+coll = db.getCollection(collName);
 
+// Runs validate before shutting down.
 MongoRunner.stopMongod(mongod);
 })();
diff --git a/jstests/replsets/query_with_txn_prepared.js b/jstests/replsets/query_with_txn_prepared.js
new file mode 100644
index 00000000000..19d52395208
--- /dev/null
+++ b/jstests/replsets/query_with_txn_prepared.js
@@ -0,0 +1,64 @@
+/**
+ * This test attempts to reproduce the bug described in SERVER-48002. This is a best-effort test
+ * that will not detect this bug on every run, even if a bug exists.
+ *
+ * Snapshot isolation cannot be guaranteed for operations that ignore prepare conflicts.
+ * This means that two reads of the same record in the same snapshot can return different results.
+ * In practice, the DataCorruptionDetected assertion added by SERVER-40620 will trigger if an index
+ * points to a non-existent record.
+ *
+ * Queries that ignore prepare conflicts and use an index to satisfy a read can read a key from an
+ * index and fetch a record that appears to go missing within the same snapshot. This may happen
+ * when the collection read races with a prepared transaction that commits and deletes the record.
+ *
+ *
+ * @tags: [
+ *   requires_fcv_49,
+ *   uses_prepare_transaction,
+ *   uses_transactions,
+ * ]
+ */
+(function() {
+"use strict";
+
+const replTest = new ReplSetTest({nodes: 2});
+replTest.startSet();
+replTest.initiate();
+
+const primary = replTest.getPrimary();
+const dbName = "query_with_txn_prepared";
+const collName = "coll";
+
+assert.commandWorked(primary.getDB(dbName)[collName].createIndexes([{x: 1}]));
+
+const transactionShell = startParallelShell(function() {
+    load("jstests/core/txns/libs/prepare_helpers.js");  // For PrepareHelpers.
+
+    while (db.getSiblingDB("query_with_txn_prepared")["stopQueries"].find().count() == 0) {
+        for (let i = 0; i < 100; ++i) {
+            const session = db.getMongo().startSession();
+            const sessionColl = session.getDatabase("query_with_txn_prepared")["coll"];
+
+            session.startTransaction({readConcern: {level: "majority"}});
+            if (Math.random() < 0.5) {
+                assert.commandWorked(sessionColl.update({x: 1}, {x: 1}, {upsert: true}));
+            } else {
+                assert.commandWorked(sessionColl.remove({x: 1}));
+            }
+
+            const prepareTimestamp = PrepareHelpers.prepareTransaction(session);
+            assert.commandWorked(PrepareHelpers.commitTransaction(session, prepareTimestamp));
+        }
+    }
+}, primary.port);
+
+for (let i = 0; i < 2000; ++i) {
+    const result = primary.getDB(dbName)[collName].find({x: 1}).toArray();
+    assert([0, 1].includes(result.length), result);
+}
+
+assert.commandWorked(primary.getDB(dbName)["stopQueries"].insert({stop: 1}));
+transactionShell();
+
+replTest.stopSet();
+}());
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index c6083642478..33a9f6074e7 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -82,9 +82,14 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
         // The record referenced by this index entry is gone. If the query yielded some time after
         // we first examined the index entry, then it's likely that the record was deleted while we
         // were yielding. However, if the snapshot id hasn't changed since the index lookup, then
-        // there could not have been a yield, and the only explanation is corruption.
+        // there could not have been a yield, meaning the document we are searching for has been
+        // deleted.
+        // One possibility is that the record was deleted by a prepared transaction, but if we are
+        // not ignoring prepare conflicts, then this definitely indicates an error.
         std::vector<IndexKeyDatum>::iterator keyDataIt;
         if (member->getState() == WorkingSetMember::RID_AND_IDX &&
+            opCtx->recoveryUnit()->getPrepareConflictBehavior() ==
+                PrepareConflictBehavior::kEnforce &&
             (keyDataIt = std::find_if(member->keyData.begin(),
                                       member->keyData.end(),
                                       [currentSnapshotId = opCtx->recoveryUnit()->getSnapshotId()](
author	Louis Williams <louis.williams@mongodb.com>	2020-11-18 14:18:08 -0500
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-11-18 21:00:20 +0000
commit	a01be7ed8e475775ebec57dd6291c3cf5cd33ccf (patch)
tree	f02abe85616a83775a1b96a677d4d0cbbc1cc088
parent	3aaf530850346c3e80a4e5f891c666b6a31e8809 (diff)
download	mongo-a01be7ed8e475775ebec57dd6291c3cf5cd33ccf.tar.gz