diff options
author | Benety Goh <benety@mongodb.com> | 2023-03-08 15:34:15 -0500 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-03-16 21:46:12 +0000 |
commit | 0c6cc277345584054b3b50a507f629ba4b937e9d (patch) | |
tree | 7aa0df304114dc67279c72db917b631cf443b7e7 | |
parent | 38bf4fb1db89f7753c29cdddc817b4fba2d11e53 (diff) | |
download | mongo-0c6cc277345584054b3b50a507f629ba4b937e9d.tar.gz |
SERVER-74642 validate fetches and logs oplog entries for corrupted records
(cherry picked from commit bb2e76b99df5c766260d4e47f54f5445a63853cf)
-rw-r--r-- | jstests/noPassthrough/query_yields_catch_index_corruption.js | 28 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_validation.cpp | 62 |
2 files changed, 90 insertions, 0 deletions
diff --git a/jstests/noPassthrough/query_yields_catch_index_corruption.js b/jstests/noPassthrough/query_yields_catch_index_corruption.js index b4ee73fc565..13606411541 100644 --- a/jstests/noPassthrough/query_yields_catch_index_corruption.js +++ b/jstests/noPassthrough/query_yields_catch_index_corruption.js @@ -30,12 +30,40 @@ assert.commandWorked(coll.createIndex({a: 1, b: 1})); // entry (thanks to the "skipUnindexingDocumentWhenDeleted" failpoint). function createDanglingIndexEntry(doc) { assert.commandWorked(coll.insert(doc)); + const docId = coll.findOne(doc)._id; assert.commandWorked(coll.remove(doc)); // Validation should now fail. const validateRes = assert.commandWorked(coll.validate()); assert.eq(false, validateRes.valid); + // Server logs for failed validation command should contain oplog entries related to corrupted + // index entry. + let foundInsert = false; + let foundDelete = false; + // Look for log message "Oplog entry found for corrupted collection and index entry" (msg id + // 7462402). + checkLog.containsJson(db.getMongo(), 7464202, { + oplogEntryDoc: (oplogDoc) => { + let oplogDocId; + try { + oplogDocId = ObjectId(oplogDoc.o._id.$oid); + } catch (ex) { + return false; + } + if (!oplogDocId.equals(docId)) { + return false; + } + jsTestLog('Found oplog entry for corrupted index entry: ' + tojson(oplogDoc)); + if (oplogDoc.op === 'd') { + foundDelete = true; + } else if (oplogDoc.op === 'i') { + foundInsert = true; + } + return foundDelete && foundInsert; + } + }); + // A query that accesses the now dangling index entry should fail with a // "DataCorruptionDetected" error. Most reads will not detect this problem because they ignore // prepare conflicts by default and that exempts them from checking this assertion. Only writes diff --git a/src/mongo/db/catalog/collection_validation.cpp b/src/mongo/db/catalog/collection_validation.cpp index 517c18aaeaf..673324df4f6 100644 --- a/src/mongo/db/catalog/collection_validation.cpp +++ b/src/mongo/db/catalog/collection_validation.cpp @@ -256,6 +256,67 @@ void _printIndexSpec(const ValidateState* validateState, StringData indexName) { } } +/** + * Logs oplog entries related to corrupted records/indexes in validation results. + */ +void _logOplogEntriesForInvalidResults(OperationContext* opCtx, ValidateResults* results) { + if (results->recordTimestamps.empty()) { + return; + } + + LOGV2( + 7464200, + "Validation failed: oplog timestamps referenced by corrupted collection and index entries", + "numTimestamps"_attr = results->recordTimestamps.size()); + + // Set up read on oplog collection. + try { + AutoGetOplog oplogRead(opCtx, OplogAccessMode::kRead); + const auto& oplogCollection = oplogRead.getCollection(); + + // Log oplog entries in reverse from most recent timestamp to oldest. + // Due to oplog truncation, if we fail to find any oplog entry for a particular timestamp, + // we can stop searching for oplog entries with earlier timestamps. + auto recordStore = oplogCollection->getRecordStore(); + uassert(ErrorCodes::InternalError, + "Validation failed: Unable to get oplog record store for corrupted collection and " + "index entries", + recordStore); + + auto cursor = recordStore->getCursor(opCtx, /*forward=*/false); + uassert(ErrorCodes::CursorNotFound, + "Validation failed: Unable to get cursor to oplog collection.", + cursor); + + for (auto it = results->recordTimestamps.rbegin(); it != results->recordTimestamps.rend(); + it++) { + const auto& timestamp = *it; + + // A record id in the oplog collection is equivalent to the document's timestamp field. + RecordId recordId(timestamp.asULL()); + auto record = cursor->seekExact(recordId); + if (!record) { + LOGV2(7464201, + " Validation failed: Stopping oplog entry search for corrupted collection " + "and index entries.", + "timestamp"_attr = timestamp); + break; + } + + LOGV2( + 7464202, + " Validation failed: Oplog entry found for corrupted collection and index entry", + "timestamp"_attr = timestamp, + "oplogEntryDoc"_attr = redact(record->data.toBson())); + } + } catch (DBException& ex) { + LOGV2_ERROR(7464203, + "Validation failed: Unable to fetch entries from oplog collection for " + "corrupted collection and index entries", + "ex"_attr = ex); + } +} + void _reportValidationResults(OperationContext* opCtx, ValidateState* validateState, ValidateResults* results, @@ -315,6 +376,7 @@ void _reportInvalidResults(OperationContext* opCtx, ValidateResults* results, BSONObjBuilder* output) { _reportValidationResults(opCtx, validateState, results, output); + _logOplogEntriesForInvalidResults(opCtx, results); LOGV2_OPTIONS(20302, {LogComponent::kIndex}, "Validation complete -- Corruption found", |