summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jstests/noPassthrough/query_yields_catch_index_corruption.js69
-rw-r--r--src/mongo/base/error_codes.yml1
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp11
-rw-r--r--src/mongo/db/exec/fetch.cpp2
-rw-r--r--src/mongo/db/exec/idhack.cpp3
-rw-r--r--src/mongo/db/exec/text_or.cpp3
-rw-r--r--src/mongo/db/exec/working_set_common.cpp46
-rw-r--r--src/mongo/db/exec/working_set_common.h4
-rw-r--r--src/mongo/db/exec/write_stage_common.cpp2
-rw-r--r--src/mongo/dbtests/query_stage_merge_sort.cpp4
10 files changed, 138 insertions, 7 deletions
diff --git a/jstests/noPassthrough/query_yields_catch_index_corruption.js b/jstests/noPassthrough/query_yields_catch_index_corruption.js
new file mode 100644
index 00000000000..8cfae1559eb
--- /dev/null
+++ b/jstests/noPassthrough/query_yields_catch_index_corruption.js
@@ -0,0 +1,69 @@
+// @tags: [requires_persistence, requires_journaling]
+(function() {
+"use strict";
+
+const name = "query_yields_catch_index_corruption";
+const dbpath = MongoRunner.dataPath + name + "/";
+
+resetDbpath(dbpath);
+
+let mongod = MongoRunner.runMongod({dbpath: dbpath});
+assert.neq(null, mongod, "mongod failed to start.");
+
+let db = mongod.getDB("test");
+
+assert.commandWorked(db.adminCommand({
+ configureFailPoint: "skipUnindexingDocumentWhenDeleted",
+ mode: "alwaysOn",
+ data: {indexName: "a_1"}
+}));
+
+let coll = db.getCollection(name);
+coll.drop();
+
+assert.commandWorked(coll.createIndex({a: 1}));
+
+// Corrupt the collection by inserting a document and then deleting it without deleting its index
+// entry (thanks to the "skipUnindexingDocumentWhenDeleted" failpoint).
+function createDanglingIndexEntry(doc) {
+ assert.commandWorked(coll.insert(doc));
+ assert.commandWorked(coll.remove(doc));
+
+ // Validation should now fail.
+ const validateRes = assert.commandWorked(coll.validate());
+ assert.eq(false, validateRes.valid);
+
+ // A query that accesses the now dangling index entry should fail with a
+ // "DataCorruptionDetected" error.
+ const error = assert.throws(() => coll.find(doc).toArray());
+ assert.eq(error.code, ErrorCodes.DataCorruptionDetected, error);
+}
+
+createDanglingIndexEntry({a: 1});
+
+// Fix the index by rebuilding it, and ensure that it validates.
+assert.commandWorked(coll.dropIndex({a: 1}));
+assert.commandWorked(coll.createIndex({a: 1}));
+
+let validateRes = assert.commandWorked(coll.validate());
+assert.eq(true, validateRes.valid, tojson(validateRes));
+
+// Reintroduce the dangling index entry, and this time fix it using the "repair" flag.
+createDanglingIndexEntry({a: 1});
+
+MongoRunner.stopMongod(mongod, MongoRunner.EXIT_CLEAN, {skipValidation: true});
+mongod = MongoRunner.runMongod({dbpath: dbpath, noCleanData: true, repair: ""});
+assert.eq(null, mongod, "Expect this to exit cleanly");
+
+// Verify that the server starts up successfully after the repair and that validate() now succeeds.
+mongod = MongoRunner.runMongod({dbpath: dbpath, noCleanData: true});
+assert.neq(null, mongod, "mongod failed to start after repair");
+
+db = mongod.getDB("test");
+coll = db.getCollection(name);
+
+validateRes = assert.commandWorked(coll.validate());
+assert.eq(true, validateRes.valid, tojson(validateRes));
+
+MongoRunner.stopMongod(mongod);
+})(); \ No newline at end of file
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index 02b57214382..baa25cc2821 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -334,6 +334,7 @@ error_codes:
- {code: 298,name: InvalidServerType}
- {code: 299,name: OCSPCertificateStatusRevoked}
- {code: 300,name: RangeDeletionAbandonedDueToCollectionDrop}
+ - {code: 301,name: DataCorruptionDetected}
# Error codes 4000-8999 are reserved.
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 4ad0a76d98d..10fa568a735 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -79,6 +79,8 @@
namespace mongo {
+MONGO_FAIL_POINT_DEFINE(skipUnindexingDocumentWhenDeleted);
+
using std::endl;
using std::string;
using std::unique_ptr;
@@ -1475,6 +1477,15 @@ void IndexCatalogImpl::_unindexRecord(OperationContext* opCtx,
nullptr,
loc);
+ // Tests can enable this failpoint to produce index corruption scenarios where an index has
+ // extra keys.
+ if (auto failpoint = skipUnindexingDocumentWhenDeleted.scoped();
+ MONGO_unlikely(failpoint.isActive())) {
+ auto indexName = failpoint.getData()["indexName"].valueStringDataSafe();
+ if (indexName == entry->descriptor()->indexName()) {
+ return;
+ }
+ }
_unindexKeys(opCtx, entry, {keys.begin(), keys.end()}, obj, loc, logIfError, keysDeletedOut);
}
diff --git a/src/mongo/db/exec/fetch.cpp b/src/mongo/db/exec/fetch.cpp
index 6e9eb4fbeb6..153ed5a878e 100644
--- a/src/mongo/db/exec/fetch.cpp
+++ b/src/mongo/db/exec/fetch.cpp
@@ -103,7 +103,7 @@ PlanStage::StageState FetchStage::doWork(WorkingSetID* out) {
if (!_cursor)
_cursor = collection()->getCursor(getOpCtx());
- if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor)) {
+ if (!WorkingSetCommon::fetch(getOpCtx(), _ws, id, _cursor, collection()->ns())) {
_ws->free(id);
return NEED_TIME;
}
diff --git a/src/mongo/db/exec/idhack.cpp b/src/mongo/db/exec/idhack.cpp
index 64848cb50e6..c3378824a58 100644
--- a/src/mongo/db/exec/idhack.cpp
+++ b/src/mongo/db/exec/idhack.cpp
@@ -103,7 +103,8 @@ PlanStage::StageState IDHackStage::doWork(WorkingSetID* out) {
_recordCursor = collection()->getCursor(getOpCtx());
// Find the document associated with 'id' in the collection's record store.
- if (!WorkingSetCommon::fetch(getOpCtx(), _workingSet, id, _recordCursor)) {
+ if (!WorkingSetCommon::fetch(
+ getOpCtx(), _workingSet, id, _recordCursor, collection()->ns())) {
// We didn't find a document with RecordId 'id'.
_workingSet->free(id);
_commonStats.isEOF = true;
diff --git a/src/mongo/db/exec/text_or.cpp b/src/mongo/db/exec/text_or.cpp
index 3bd267271da..8202ad623c7 100644
--- a/src/mongo/db/exec/text_or.cpp
+++ b/src/mongo/db/exec/text_or.cpp
@@ -268,7 +268,8 @@ PlanStage::StageState TextOrStage::addTerm(WorkingSetID wsid, WorkingSetID* out)
// Our parent expects RID_AND_OBJ members, so we fetch the document here if we haven't
// already.
try {
- if (!WorkingSetCommon::fetch(getOpCtx(), _ws, wsid, _recordCursor)) {
+ if (!WorkingSetCommon::fetch(
+ getOpCtx(), _ws, wsid, _recordCursor, collection()->ns())) {
_ws->free(wsid);
textRecordData->score = -1;
return NEED_TIME;
diff --git a/src/mongo/db/exec/working_set_common.cpp b/src/mongo/db/exec/working_set_common.cpp
index c8c9c3c00a5..b95dccf286b 100644
--- a/src/mongo/db/exec/working_set_common.cpp
+++ b/src/mongo/db/exec/working_set_common.cpp
@@ -27,6 +27,8 @@
* it in the license file.
*/
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kQuery
+
#include "mongo/platform/basic.h"
#include "mongo/db/exec/working_set_common.h"
@@ -37,13 +39,34 @@
#include "mongo/db/index/index_access_method.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/service_context.h"
+#include "mongo/util/log.h"
namespace mongo {
+namespace {
+std::string indexKeyVectorDebugString(const std::vector<IndexKeyDatum>& keyData) {
+ StringBuilder sb;
+ sb << "[";
+ if (keyData.size() > 0) {
+ auto it = keyData.begin();
+ sb << "(key: " << redact(it->keyData) << ", index key pattern: " << it->indexKeyPattern
+ << ")";
+ while (++it != keyData.end()) {
+ sb << ", (key: " << redact(it->keyData)
+ << ", index key pattern: " << it->indexKeyPattern << ")";
+ }
+ }
+ sb << "]";
+ return sb.str();
+}
+} // namespace
+
+// static
bool WorkingSetCommon::fetch(OperationContext* opCtx,
WorkingSet* workingSet,
WorkingSetID id,
- unowned_ptr<SeekableRecordCursor> cursor) {
+ unowned_ptr<SeekableRecordCursor> cursor,
+ const NamespaceString& ns) {
WorkingSetMember* member = workingSet->get(id);
// We should have a RecordId but need to retrieve the obj. Get the obj now and reset all WSM
@@ -52,6 +75,27 @@ bool WorkingSetCommon::fetch(OperationContext* opCtx,
auto record = cursor->seekExact(member->recordId);
if (!record) {
+ // The record referenced by this index entry is gone. If the query yielded some time after
+ // we first examined the index entry, then it's likely that the record was deleted while we
+ // were yielding. However, if the snapshot id hasn't changed since the index lookup, then
+ // there could not have been a yield, and the only explanation is corruption.
+ std::vector<IndexKeyDatum>::iterator keyDataIt;
+ if (member->getState() == WorkingSetMember::RID_AND_IDX &&
+ (keyDataIt = std::find_if(member->keyData.begin(),
+ member->keyData.end(),
+ [currentSnapshotId = opCtx->recoveryUnit()->getSnapshotId()](
+ const auto& keyDatum) {
+ return keyDatum.snapshotId == currentSnapshotId;
+ })) != member->keyData.end()) {
+ std::stringstream ss;
+ ss << "Erroneous index key found with reference to non-existent record id "
+ << member->recordId << ": " << indexKeyVectorDebugString(member->keyData)
+ << ". Consider dropping and then re-creating the index with key pattern "
+ << keyDataIt->indexKeyPattern << " and then running the validate command on the "
+ << ns << " collection.";
+ error() << ss.str();
+ uasserted(ErrorCodes::DataCorruptionDetected, ss.str());
+ }
return false;
}
diff --git a/src/mongo/db/exec/working_set_common.h b/src/mongo/db/exec/working_set_common.h
index 0ebeb2ae65c..f62d861142f 100644
--- a/src/mongo/db/exec/working_set_common.h
+++ b/src/mongo/db/exec/working_set_common.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/db/exec/working_set.h"
+#include "mongo/db/namespace_string.h"
#include "mongo/util/unowned_ptr.h"
namespace mongo {
@@ -53,7 +54,8 @@ public:
static bool fetch(OperationContext* opCtx,
WorkingSet* workingSet,
WorkingSetID id,
- unowned_ptr<SeekableRecordCursor> cursor);
+ unowned_ptr<SeekableRecordCursor> cursor,
+ const NamespaceString& ns);
/**
* Build a Document which represents a Status to return in a WorkingSet.
diff --git a/src/mongo/db/exec/write_stage_common.cpp b/src/mongo/db/exec/write_stage_common.cpp
index 74a3d846a2a..301ced6f3a5 100644
--- a/src/mongo/db/exec/write_stage_common.cpp
+++ b/src/mongo/db/exec/write_stage_common.cpp
@@ -50,7 +50,7 @@ bool ensureStillMatches(const Collection* collection,
if (opCtx->recoveryUnit()->getSnapshotId() != member->doc.snapshotId()) {
std::unique_ptr<SeekableRecordCursor> cursor(collection->getCursor(opCtx));
- if (!WorkingSetCommon::fetch(opCtx, ws, id, cursor)) {
+ if (!WorkingSetCommon::fetch(opCtx, ws, id, cursor, collection->ns())) {
// Doc is already deleted.
return false;
}
diff --git a/src/mongo/dbtests/query_stage_merge_sort.cpp b/src/mongo/dbtests/query_stage_merge_sort.cpp
index b0df7ed51fc..78fdcfd0ead 100644
--- a/src/mongo/dbtests/query_stage_merge_sort.cpp
+++ b/src/mongo/dbtests/query_stage_merge_sort.cpp
@@ -612,7 +612,9 @@ public:
// An attempt to fetch the WSM should show that the key is no longer present in the
// index.
- ASSERT_FALSE(WorkingSetCommon::fetch(&_opCtx, &ws, id, coll->getCursor(&_opCtx)));
+ NamespaceString fakeNS("test", "coll");
+ ASSERT_FALSE(
+ WorkingSetCommon::fetch(&_opCtx, &ws, id, coll->getCursor(&_opCtx), fakeNS));
++it;
++count;