diff options
author | Dan Larkin-York <dan.larkin-york@mongodb.com> | 2023-02-03 23:45:23 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2023-02-11 14:05:59 +0000 |
commit | bcf0c75c8dbed2a84ad69ba5bd7aa200115f8cb9 (patch) | |
tree | 1addd97ed85c13bf8619f31760091a7d4dc80a2c | |
parent | 78f09d6c5a0a4c814335e5ec7608d9b0c5099092 (diff) | |
download | mongo-bcf0c75c8dbed2a84ad69ba5bd7aa200115f8cb9.tar.gz |
SERVER-72512 SERVER-73636 Ensure validate surfaces index key inconsistencies if they exist
(cherry picked from commit c0e1438cad31e00372a9f9edaee80f9db5a4e8ed)
(cherry picked from commit 2d90115a227572cc0f46b309ceb47dda6b740da3)
-rw-r--r-- | jstests/disk/libs/wt_file_helper.js | 10 | ||||
-rw-r--r-- | jstests/noPassthrough/validate_memory_limit.js | 58 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_consistency.cpp | 92 |
3 files changed, 135 insertions, 25 deletions
diff --git a/jstests/disk/libs/wt_file_helper.js b/jstests/disk/libs/wt_file_helper.js index 1d2cd673596..4a8b136e938 100644 --- a/jstests/disk/libs/wt_file_helper.js +++ b/jstests/disk/libs/wt_file_helper.js @@ -207,3 +207,13 @@ let runWiredTigerTool = function(...args) { const cmd = ['wt'].concat(args); assert.eq(run.apply(undefined, cmd), 0, "error executing: " + cmd.join(' ')); }; + +/** + * Stops the given mongod, runs the truncate command on the given uri using the WiredTiger tool, and + * starts mongod again on the same path. + */ +let truncateUriAndRestartMongod = function(uri, conn) { + MongoRunner.stopMongod(conn, null, {skipValidation: true}); + runWiredTigerTool("-h", conn.dbpath, "truncate", uri); + return startMongodOnExistingPath(conn.dbpath, {}); +};
\ No newline at end of file diff --git a/jstests/noPassthrough/validate_memory_limit.js b/jstests/noPassthrough/validate_memory_limit.js new file mode 100644 index 00000000000..a3903e36905 --- /dev/null +++ b/jstests/noPassthrough/validate_memory_limit.js @@ -0,0 +1,58 @@ +/** + * Test that the memory usage of validate is properly limited according to the + * maxValidateMemoryUsageMB parameter. + * + * @tags: [requires_wiredtiger, requires_persistence] + */ +(function() { +"use strict"; + +load("jstests/disk/libs/wt_file_helper.js"); + +const kIndexKeyLength = 1024 * 1024; + +const baseName = "validate_memory_limit"; +const dbpath = MongoRunner.dataPath + baseName + "/"; +let conn = MongoRunner.runMongod({dbpath: dbpath}); +let coll = conn.getDB("test").getCollection("corrupt"); + +function corruptIndex() { + const uri = getUriForIndex(coll, "_id_"); + conn = truncateUriAndRestartMongod(uri, conn); + coll = conn.getDB("test").getCollection("corrupt"); +} + +function checkValidate(maxMemoryUsage, {minMissingKeys, maxMissingKeys}) { + conn.getDB("test").adminCommand({setParameter: 1, maxValidateMemoryUsageMB: maxMemoryUsage}); + const res = coll.validate({full: true}); + assert.commandWorked(res); + assert(!res.valid, tojson(res)); + assert.gte(res.missingIndexEntries.length, minMissingKeys, tojson(res)); + assert.lte(res.missingIndexEntries.length, maxMissingKeys, tojson(res)); +} + +// Insert a document with a key larger than maxValidateMemoryUsageMB and test that we still report +// at least one inconsistency. +const indexKey = "a".repeat(kIndexKeyLength); +assert.commandWorked(coll.insert({_id: indexKey})); +corruptIndex(); +checkValidate(1, {minMissingKeys: 1, maxMissingKeys: 1}); + +// Clear collection between tests. +coll.drop(); + +// Test that if we have keys distributed across many buckets, and would exceed +// maxValidateMemoryUsageMB, we report as many inconsistencies as we can. +for (let i = 0; i < 10; ++i) { + const indexKey = i.toString().repeat(kIndexKeyLength / 5); + assert.commandWorked(coll.insert({_id: indexKey})); +} + +corruptIndex(); +// If each key is maxMem/5, then we can keep 4 of them (the 5th would put us at the limit). However, +// each key is counted twice, so realistically we only expect to track 2 of them. However, there's +// a small chance we could get hash collisions that would lead to us reporting only 1. +checkValidate(1, {minMissingKeys: 1, maxMissingKeys: 2}); + +MongoRunner.stopMongod(conn, null, {skipValidation: true}); +})(); diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp index da72e768898..bde9394b2f7 100644 --- a/src/mongo/db/catalog/index_consistency.cpp +++ b/src/mongo/db/catalog/index_consistency.cpp @@ -155,15 +155,31 @@ void IndexConsistency::addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap numExtraIndexEntryErrors += item.second.size(); } - // Inform which indexes have inconsistences and add the BSON objects of the inconsistent index + // Sort missing index entries by size so we can process in order of increasing size and return + // as many as possible within memory limits. + using MissingIt = decltype(_missingIndexEntries)::const_iterator; + std::vector<MissingIt> missingIndexEntriesBySize; + missingIndexEntriesBySize.reserve(_missingIndexEntries.size()); + for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end(); ++it) { + missingIndexEntriesBySize.push_back(it); + } + std::sort(missingIndexEntriesBySize.begin(), + missingIndexEntriesBySize.end(), + [](const MissingIt& a, const MissingIt& b) { + return a->second.objsize() < b->second.objsize(); + }); + + // Inform which indexes have inconsistencies and add the BSON objects of the inconsistent index // entries to the results vector. bool missingIndexEntrySizeLimitWarning = false; - for (const auto& missingIndexEntry : _missingIndexEntries) { - const BSONObj& entry = missingIndexEntry.second; + bool first = true; + for (const auto& it : missingIndexEntriesBySize) { + const auto& entry = it->second; numMissingIndexEntriesSizeBytes += entry.objsize(); - if (numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) { + if (first || numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) { results->missingIndexEntries.push_back(entry); + first = false; } else if (!missingIndexEntrySizeLimitWarning) { StringBuilder ss; ss << "Not all missing index entry inconsistencies are listed due to size limitations."; @@ -184,33 +200,58 @@ void IndexConsistency::addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap indexNsResultsMap->at(indexName).valid = false; } - bool extraIndexEntrySizeLimitWarning = false; + // Sort extra index entries by size so we can process in order of increasing size and return as + // many as possible within memory limits. + using ExtraIt = SimpleBSONObjSet::const_iterator; + std::vector<ExtraIt> extraIndexEntriesBySize; + // Since the extra entries are stored in a map of sets, we have to iterate the entries in the + // map and sum the size of the sets in order to get the total number. Given that we can have at + // most 64 indexes per collection, and the total number of entries could potentially be in the + // millions, we expect that iterating the map will be much less costly than the additional + // allocations and copies that could result from not calling 'reserve' on the vector. + size_t totalExtraIndexEntriesCount = + std::accumulate(_extraIndexEntries.begin(), + _extraIndexEntries.end(), + 0, + [](size_t total, const std::pair<IndexKey, SimpleBSONObjSet>& set) { + return total + set.second.size(); + }); + extraIndexEntriesBySize.reserve(totalExtraIndexEntriesCount); for (const auto& extraIndexEntry : _extraIndexEntries) { const SimpleBSONObjSet& entries = extraIndexEntry.second; - for (const auto& entry : entries) { - numExtraIndexEntriesSizeBytes += entry.objsize(); - if (numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) { - results->extraIndexEntries.push_back(entry); - } else if (!extraIndexEntrySizeLimitWarning) { - StringBuilder ss; - ss << "Not all extra index entry inconsistencies are listed due to size " - "limitations."; - results->errors.push_back(ss.str()); - - extraIndexEntrySizeLimitWarning = true; - } - - std::string indexName = entry["indexName"].String(); - if (!indexNsResultsMap->at(indexName).valid) { - continue; - } + for (auto it = entries.begin(); it != entries.end(); ++it) { + extraIndexEntriesBySize.push_back(it); + } + } + std::sort(extraIndexEntriesBySize.begin(), + extraIndexEntriesBySize.end(), + [](const ExtraIt& a, const ExtraIt& b) { return a->objsize() < b->objsize(); }); + bool extraIndexEntrySizeLimitWarning = false; + for (const auto& entry : extraIndexEntriesBySize) { + numExtraIndexEntriesSizeBytes += entry->objsize(); + if (first || numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) { + results->extraIndexEntries.push_back(*entry); + first = false; + } else if (!extraIndexEntrySizeLimitWarning) { StringBuilder ss; - ss << "Index with name '" << indexName << "' has inconsistencies."; + ss << "Not all extra index entry inconsistencies are listed due to size " + "limitations."; results->errors.push_back(ss.str()); - indexNsResultsMap->at(indexName).valid = false; + extraIndexEntrySizeLimitWarning = true; } + + std::string indexName = (*entry)["indexName"].String(); + if (!indexNsResultsMap->at(indexName).valid) { + continue; + } + + StringBuilder ss; + ss << "Index with name '" << indexName << "' has inconsistencies."; + results->errors.push_back(ss.str()); + + indexNsResultsMap->at(indexName).valid = false; } // Inform how many inconsistencies were detected. @@ -256,7 +297,8 @@ void IndexConsistency::addDocKey(const KeyString& ks, BSONObj info = _generateInfo(*indexInfo, recordId, indexKey, idKey); - // Cannot have duplicate KeyStrings during the document scan phase for the same index. + // Cannot have duplicate KeyStrings during the document scan phase for the same + // index. IndexKey key = _generateKeyForMap(*indexInfo, ks); invariant(_missingIndexEntries.count(key) == 0); _missingIndexEntries.insert(std::make_pair(key, info)); |