diff options
author | Dan Larkin-York <dan.larkin-york@mongodb.com> | 2023-04-05 13:09:32 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-04-05 23:21:36 +0000 |
commit | 5427de4a9289cb30154249200944fb9a75110eba (patch) | |
tree | 837265cc0ed085f38478f247c57ff85cece424f1 | |
parent | f1304cded1b4303f062bd81ee5c51d18a6e5d18e (diff) | |
download | mongo-5427de4a9289cb30154249200944fb9a75110eba.tar.gz |
SERVER-75561 Report and log more detailed information when validate encounters multikey inconsistencies
(cherry picked from commit fc3f5184c88025330c2e729188f4e38bac7d9886)
-rw-r--r-- | jstests/noPassthrough/validate_multikey_failures.js | 72 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_catalog_entry_impl.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/catalog/validate_adaptor.cpp | 51 |
3 files changed, 125 insertions, 3 deletions
diff --git a/jstests/noPassthrough/validate_multikey_failures.js b/jstests/noPassthrough/validate_multikey_failures.js new file mode 100644 index 00000000000..9c8b01e8bd9 --- /dev/null +++ b/jstests/noPassthrough/validate_multikey_failures.js @@ -0,0 +1,72 @@ +/** + * Test that validate detects and properly reports multikey inconsistencies. + */ +(function() { +"use strict"; + +const baseName = "validate_multikey_failures"; +const dbpath = MongoRunner.dataPath + baseName + "/"; +let conn = MongoRunner.runMongod({dbpath: dbpath}); +let coll = conn.getDB("test").getCollection("corrupt"); + +const resetCollection = () => { + coll.drop(); + coll.createIndex({"a.b": 1}); +}; + +const disableMultikeyUpdate = () => { + assert.commandWorked( + conn.adminCommand({configureFailPoint: "skipUpdateIndexMultikey", mode: "alwaysOn"})); +}; + +const enableMultikeyUpdate = () => { + assert.commandWorked( + conn.adminCommand({configureFailPoint: "skipUpdateIndexMultikey", mode: "off"})); +}; + +// Test that multiple keys suggest index should be marked multikey. +resetCollection(); +disableMultikeyUpdate(); +assert.commandWorked(coll.insert({a: {b: [1, 2]}})); +enableMultikeyUpdate(); +let res = coll.validate(); +assert.commandWorked(res); +assert(!res.valid); +assert.eq(res.indexDetails["a.b_1"].errors.length, 1); +assert(res.indexDetails["a.b_1"].errors[0].startsWith("Index a.b_1 is not multikey")); +assert(res.indexDetails["a.b_1"].errors[0].includes("2 key(s)")); +assert(checkLog.checkContainsOnceJson(conn, 7556100, {"indexName": "a.b_1"})); +assert(checkLog.checkContainsOnceJson(conn, 7556101, {"indexKey": {"a.b": 1}})); +assert(checkLog.checkContainsOnceJson(conn, 7556101, {"indexKey": {"a.b": 2}})); + +// Test that a single-entry array suggests index should be marked multikey. +resetCollection(); +disableMultikeyUpdate(); +assert.commandWorked(coll.insert({a: {b: [3]}})); +enableMultikeyUpdate(); +res = coll.validate(); +assert.commandWorked(res); +assert(!res.valid); +assert.eq(res.indexDetails["a.b_1"].errors.length, 1); +assert(res.indexDetails["a.b_1"].errors[0].startsWith("Index a.b_1 is not multikey")); +assert(res.indexDetails["a.b_1"].errors[0].includes("1 key(s)")); +assert(checkLog.checkContainsOnceJson(conn, 7556100, {"indexName": "a.b_1"})); +assert(checkLog.checkContainsOnceJson(conn, 7556101, {"indexKey": {"a.b": 3}})); + +// Test that a mis-match in multikey paths should be marked multikey. +resetCollection(); +assert.commandWorked(coll.insert({a: [{b: 4}, {b: 5}]})); +disableMultikeyUpdate(); +assert.commandWorked(coll.insert({a: {b: [6]}})); +enableMultikeyUpdate(); +res = coll.validate(); +assert.commandWorked(res); +assert(!res.valid); +assert.eq(res.indexDetails["a.b_1"].errors.length, 1); +assert(res.indexDetails["a.b_1"].errors[0].startsWith( + "Index a.b_1 multikey paths do not cover a document")); +assert(checkLog.checkContainsOnceJson(conn, 7556100, {"indexName": "a.b_1"})); +assert(checkLog.checkContainsOnceJson(conn, 7556101, {"indexKey": {"a.b": 6}})); + +MongoRunner.stopMongod(conn, null, {skipValidation: true}); +})();
\ No newline at end of file diff --git a/src/mongo/db/catalog/index_catalog_entry_impl.cpp b/src/mongo/db/catalog/index_catalog_entry_impl.cpp index 15a3252eeae..07fc01757c9 100644 --- a/src/mongo/db/catalog/index_catalog_entry_impl.cpp +++ b/src/mongo/db/catalog/index_catalog_entry_impl.cpp @@ -58,6 +58,7 @@ #include "mongo/util/scopeguard.h" namespace mongo { +MONGO_FAIL_POINT_DEFINE(skipUpdateIndexMultikey); using std::string; @@ -220,6 +221,10 @@ void IndexCatalogEntryImpl::setMultikey(OperationContext* opCtx, } } + if (MONGO_unlikely(skipUpdateIndexMultikey.shouldFail())) { + return; + } + MultikeyPaths paths = _indexTracksMultikeyPathsInCatalog ? multikeyPaths : MultikeyPaths{}; // On a primary, we can simply assign this write the same timestamp as the index creation, diff --git a/src/mongo/db/catalog/validate_adaptor.cpp b/src/mongo/db/catalog/validate_adaptor.cpp index cd31d3f36cd..f964838db2e 100644 --- a/src/mongo/db/catalog/validate_adaptor.cpp +++ b/src/mongo/db/catalog/validate_adaptor.cpp @@ -62,6 +62,23 @@ MONGO_FAIL_POINT_DEFINE(failIndexKeyOrdering); const long long kInterruptIntervalNumRecords = 4096; const long long kInterruptIntervalNumBytes = 50 * 1024 * 1024; // 50MB. +BSONObj rehydrateKey(const BSONObj& keyPattern, const BSONObj& indexKey) { + // We need to rehydrate the indexKey for improved readability. + // {"": ObjectId(...)} -> {"_id": ObjectId(...)} + auto keysIt = keyPattern.begin(); + auto valuesIt = indexKey.begin(); + + BSONObjBuilder b; + while (keysIt != keyPattern.end()) { + // keysIt and valuesIt must have the same number of elements. + invariant(valuesIt != indexKey.end()); + b.appendAs(*valuesIt, keysIt->fieldName()); + keysIt++; + valuesIt++; + } + + return b.obj(); +} } // namespace Status ValidateAdaptor::validateRecord(OperationContext* opCtx, @@ -117,6 +134,25 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, recordId, IndexAccessMethod::kNoopOnSuppressedErrorFn); + auto printMultikeyMetadata = [&]() { + LOGV2(7556100, + "Index is not multikey but document has multikey data", + "indexName"_attr = descriptor->indexName(), + "recordId"_attr = recordId, + "record"_attr = redact(recordBson)); + for (auto& key : documentKeySet) { + auto indexKey = KeyString::toBsonSafe(key.getBuffer(), + key.getSize(), + iam->getSortedDataInterface()->getOrdering(), + key.getTypeBits()); + const BSONObj rehydratedKey = rehydrateKey(descriptor->keyPattern(), indexKey); + LOGV2(7556101, + "Index key for document with multikey inconsistency", + "indexName"_attr = descriptor->indexName(), + "recordId"_attr = recordId, + "indexKey"_attr = redact(rehydratedKey)); + } + }; if (!descriptor->isMultikey() && iam->shouldMarkIndexAsMultikey( documentKeySet.size(), multikeyMetadataKeys, documentMultikeyPaths)) { @@ -136,10 +172,17 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, << " set to multikey."); results->repaired = true; } else { + printMultikeyMetadata(); + ValidateResults& curRecordResults = (*_indexNsResultsMap)[descriptor->indexName()]; - std::string msg = str::stream() << "Index " << descriptor->indexName() - << " is not multikey but has more than one" - << " key in document " << recordId; + const std::string msg = fmt::format( + "Index {} is not multikey but document with RecordId({}) and {} has multikey " + "data, " + "{} key(s)", + descriptor->indexName(), + recordId.repr(), + recordBson.getField("_id").toString(), + documentKeySet.size()); curRecordResults.errors.push_back(msg); curRecordResults.valid = false; if (crashOnMultikeyValidateFailure.shouldFail()) { @@ -167,6 +210,8 @@ Status ValidateAdaptor::validateRecord(OperationContext* opCtx, << " multikey paths updated."); results->repaired = true; } else { + printMultikeyMetadata(); + std::string msg = str::stream() << "Index " << descriptor->indexName() << " multikey paths do not cover a document. RecordId: " << recordId; |