summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenety Goh <benety@mongodb.com>2023-03-08 15:34:15 -0500
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-03-16 21:46:12 +0000
commit0c6cc277345584054b3b50a507f629ba4b937e9d (patch)
tree7aa0df304114dc67279c72db917b631cf443b7e7
parent38bf4fb1db89f7753c29cdddc817b4fba2d11e53 (diff)
downloadmongo-0c6cc277345584054b3b50a507f629ba4b937e9d.tar.gz
SERVER-74642 validate fetches and logs oplog entries for corrupted records
(cherry picked from commit bb2e76b99df5c766260d4e47f54f5445a63853cf)
-rw-r--r--jstests/noPassthrough/query_yields_catch_index_corruption.js28
-rw-r--r--src/mongo/db/catalog/collection_validation.cpp62
2 files changed, 90 insertions, 0 deletions
diff --git a/jstests/noPassthrough/query_yields_catch_index_corruption.js b/jstests/noPassthrough/query_yields_catch_index_corruption.js
index b4ee73fc565..13606411541 100644
--- a/jstests/noPassthrough/query_yields_catch_index_corruption.js
+++ b/jstests/noPassthrough/query_yields_catch_index_corruption.js
@@ -30,12 +30,40 @@ assert.commandWorked(coll.createIndex({a: 1, b: 1}));
// entry (thanks to the "skipUnindexingDocumentWhenDeleted" failpoint).
function createDanglingIndexEntry(doc) {
assert.commandWorked(coll.insert(doc));
+ const docId = coll.findOne(doc)._id;
assert.commandWorked(coll.remove(doc));
// Validation should now fail.
const validateRes = assert.commandWorked(coll.validate());
assert.eq(false, validateRes.valid);
+ // Server logs for failed validation command should contain oplog entries related to corrupted
+ // index entry.
+ let foundInsert = false;
+ let foundDelete = false;
+ // Look for log message "Oplog entry found for corrupted collection and index entry" (msg id
+ // 7462402).
+ checkLog.containsJson(db.getMongo(), 7464202, {
+ oplogEntryDoc: (oplogDoc) => {
+ let oplogDocId;
+ try {
+ oplogDocId = ObjectId(oplogDoc.o._id.$oid);
+ } catch (ex) {
+ return false;
+ }
+ if (!oplogDocId.equals(docId)) {
+ return false;
+ }
+ jsTestLog('Found oplog entry for corrupted index entry: ' + tojson(oplogDoc));
+ if (oplogDoc.op === 'd') {
+ foundDelete = true;
+ } else if (oplogDoc.op === 'i') {
+ foundInsert = true;
+ }
+ return foundDelete && foundInsert;
+ }
+ });
+
// A query that accesses the now dangling index entry should fail with a
// "DataCorruptionDetected" error. Most reads will not detect this problem because they ignore
// prepare conflicts by default and that exempts them from checking this assertion. Only writes
diff --git a/src/mongo/db/catalog/collection_validation.cpp b/src/mongo/db/catalog/collection_validation.cpp
index 517c18aaeaf..673324df4f6 100644
--- a/src/mongo/db/catalog/collection_validation.cpp
+++ b/src/mongo/db/catalog/collection_validation.cpp
@@ -256,6 +256,67 @@ void _printIndexSpec(const ValidateState* validateState, StringData indexName) {
}
}
+/**
+ * Logs oplog entries related to corrupted records/indexes in validation results.
+ */
+void _logOplogEntriesForInvalidResults(OperationContext* opCtx, ValidateResults* results) {
+ if (results->recordTimestamps.empty()) {
+ return;
+ }
+
+ LOGV2(
+ 7464200,
+ "Validation failed: oplog timestamps referenced by corrupted collection and index entries",
+ "numTimestamps"_attr = results->recordTimestamps.size());
+
+ // Set up read on oplog collection.
+ try {
+ AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead);
+ const auto& oplogCollection = oplogRead.getCollection();
+
+ // Log oplog entries in reverse from most recent timestamp to oldest.
+ // Due to oplog truncation, if we fail to find any oplog entry for a particular timestamp,
+ // we can stop searching for oplog entries with earlier timestamps.
+ auto recordStore = oplogCollection->getRecordStore();
+ uassert(ErrorCodes::InternalError,
+ "Validation failed: Unable to get oplog record store for corrupted collection and "
+ "index entries",
+ recordStore);
+
+ auto cursor = recordStore->getCursor(opCtx, /*forward=*/false);
+ uassert(ErrorCodes::CursorNotFound,
+ "Validation failed: Unable to get cursor to oplog collection.",
+ cursor);
+
+ for (auto it = results->recordTimestamps.rbegin(); it != results->recordTimestamps.rend();
+ it++) {
+ const auto& timestamp = *it;
+
+ // A record id in the oplog collection is equivalent to the document's timestamp field.
+ RecordId recordId(timestamp.asULL());
+ auto record = cursor->seekExact(recordId);
+ if (!record) {
+ LOGV2(7464201,
+ " Validation failed: Stopping oplog entry search for corrupted collection "
+ "and index entries.",
+ "timestamp"_attr = timestamp);
+ break;
+ }
+
+ LOGV2(
+ 7464202,
+ " Validation failed: Oplog entry found for corrupted collection and index entry",
+ "timestamp"_attr = timestamp,
+ "oplogEntryDoc"_attr = redact(record->data.toBson()));
+ }
+ } catch (DBException& ex) {
+ LOGV2_ERROR(7464203,
+ "Validation failed: Unable to fetch entries from oplog collection for "
+ "corrupted collection and index entries",
+ "ex"_attr = ex);
+ }
+}
+
void _reportValidationResults(OperationContext* opCtx,
ValidateState* validateState,
ValidateResults* results,
@@ -315,6 +376,7 @@ void _reportInvalidResults(OperationContext* opCtx,
ValidateResults* results,
BSONObjBuilder* output) {
_reportValidationResults(opCtx, validateState, results, output);
+ _logOplogEntriesForInvalidResults(opCtx, results);
LOGV2_OPTIONS(20302,
{LogComponent::kIndex},
"Validation complete -- Corruption found",