summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Larkin-York <dan.larkin-york@mongodb.com>2023-02-03 23:45:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-11 14:05:59 +0000
commitbcf0c75c8dbed2a84ad69ba5bd7aa200115f8cb9 (patch)
tree1addd97ed85c13bf8619f31760091a7d4dc80a2c
parent78f09d6c5a0a4c814335e5ec7608d9b0c5099092 (diff)
downloadmongo-bcf0c75c8dbed2a84ad69ba5bd7aa200115f8cb9.tar.gz
SERVER-72512 SERVER-73636 Ensure validate surfaces index key inconsistencies if they exist
(cherry picked from commit c0e1438cad31e00372a9f9edaee80f9db5a4e8ed) (cherry picked from commit 2d90115a227572cc0f46b309ceb47dda6b740da3)
-rw-r--r--jstests/disk/libs/wt_file_helper.js10
-rw-r--r--jstests/noPassthrough/validate_memory_limit.js58
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp92
3 files changed, 135 insertions, 25 deletions
diff --git a/jstests/disk/libs/wt_file_helper.js b/jstests/disk/libs/wt_file_helper.js
index 1d2cd673596..4a8b136e938 100644
--- a/jstests/disk/libs/wt_file_helper.js
+++ b/jstests/disk/libs/wt_file_helper.js
@@ -207,3 +207,13 @@ let runWiredTigerTool = function(...args) {
const cmd = ['wt'].concat(args);
assert.eq(run.apply(undefined, cmd), 0, "error executing: " + cmd.join(' '));
};
+
+/**
+ * Stops the given mongod, runs the truncate command on the given uri using the WiredTiger tool, and
+ * starts mongod again on the same path.
+ */
+let truncateUriAndRestartMongod = function(uri, conn) {
+ MongoRunner.stopMongod(conn, null, {skipValidation: true});
+ runWiredTigerTool("-h", conn.dbpath, "truncate", uri);
+ return startMongodOnExistingPath(conn.dbpath, {});
+}; \ No newline at end of file
diff --git a/jstests/noPassthrough/validate_memory_limit.js b/jstests/noPassthrough/validate_memory_limit.js
new file mode 100644
index 00000000000..a3903e36905
--- /dev/null
+++ b/jstests/noPassthrough/validate_memory_limit.js
@@ -0,0 +1,58 @@
+/**
+ * Test that the memory usage of validate is properly limited according to the
+ * maxValidateMemoryUsageMB parameter.
+ *
+ * @tags: [requires_wiredtiger, requires_persistence]
+ */
+(function() {
+"use strict";
+
+load("jstests/disk/libs/wt_file_helper.js");
+
+const kIndexKeyLength = 1024 * 1024;
+
+const baseName = "validate_memory_limit";
+const dbpath = MongoRunner.dataPath + baseName + "/";
+let conn = MongoRunner.runMongod({dbpath: dbpath});
+let coll = conn.getDB("test").getCollection("corrupt");
+
+function corruptIndex() {
+ const uri = getUriForIndex(coll, "_id_");
+ conn = truncateUriAndRestartMongod(uri, conn);
+ coll = conn.getDB("test").getCollection("corrupt");
+}
+
+function checkValidate(maxMemoryUsage, {minMissingKeys, maxMissingKeys}) {
+ conn.getDB("test").adminCommand({setParameter: 1, maxValidateMemoryUsageMB: maxMemoryUsage});
+ const res = coll.validate({full: true});
+ assert.commandWorked(res);
+ assert(!res.valid, tojson(res));
+ assert.gte(res.missingIndexEntries.length, minMissingKeys, tojson(res));
+ assert.lte(res.missingIndexEntries.length, maxMissingKeys, tojson(res));
+}
+
+// Insert a document with a key larger than maxValidateMemoryUsageMB and test that we still report
+// at least one inconsistency.
+const indexKey = "a".repeat(kIndexKeyLength);
+assert.commandWorked(coll.insert({_id: indexKey}));
+corruptIndex();
+checkValidate(1, {minMissingKeys: 1, maxMissingKeys: 1});
+
+// Clear collection between tests.
+coll.drop();
+
+// Test that if we have keys distributed across many buckets, and would exceed
+// maxValidateMemoryUsageMB, we report as many inconsistencies as we can.
+for (let i = 0; i < 10; ++i) {
+ const indexKey = i.toString().repeat(kIndexKeyLength / 5);
+ assert.commandWorked(coll.insert({_id: indexKey}));
+}
+
+corruptIndex();
+// If each key is maxMem/5, then we can keep 4 of them (the 5th would put us at the limit). However,
+// each key is counted twice, so realistically we only expect to track 2 of them. However, there's
+// a small chance we could get hash collisions that would lead to us reporting only 1.
+checkValidate(1, {minMissingKeys: 1, maxMissingKeys: 2});
+
+MongoRunner.stopMongod(conn, null, {skipValidation: true});
+})();
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index da72e768898..bde9394b2f7 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -155,15 +155,31 @@ void IndexConsistency::addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap
numExtraIndexEntryErrors += item.second.size();
}
- // Inform which indexes have inconsistences and add the BSON objects of the inconsistent index
+ // Sort missing index entries by size so we can process in order of increasing size and return
+ // as many as possible within memory limits.
+ using MissingIt = decltype(_missingIndexEntries)::const_iterator;
+ std::vector<MissingIt> missingIndexEntriesBySize;
+ missingIndexEntriesBySize.reserve(_missingIndexEntries.size());
+ for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end(); ++it) {
+ missingIndexEntriesBySize.push_back(it);
+ }
+ std::sort(missingIndexEntriesBySize.begin(),
+ missingIndexEntriesBySize.end(),
+ [](const MissingIt& a, const MissingIt& b) {
+ return a->second.objsize() < b->second.objsize();
+ });
+
+ // Inform which indexes have inconsistencies and add the BSON objects of the inconsistent index
// entries to the results vector.
bool missingIndexEntrySizeLimitWarning = false;
- for (const auto& missingIndexEntry : _missingIndexEntries) {
- const BSONObj& entry = missingIndexEntry.second;
+ bool first = true;
+ for (const auto& it : missingIndexEntriesBySize) {
+ const auto& entry = it->second;
numMissingIndexEntriesSizeBytes += entry.objsize();
- if (numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) {
+ if (first || numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) {
results->missingIndexEntries.push_back(entry);
+ first = false;
} else if (!missingIndexEntrySizeLimitWarning) {
StringBuilder ss;
ss << "Not all missing index entry inconsistencies are listed due to size limitations.";
@@ -184,33 +200,58 @@ void IndexConsistency::addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap
indexNsResultsMap->at(indexName).valid = false;
}
- bool extraIndexEntrySizeLimitWarning = false;
+ // Sort extra index entries by size so we can process in order of increasing size and return as
+ // many as possible within memory limits.
+ using ExtraIt = SimpleBSONObjSet::const_iterator;
+ std::vector<ExtraIt> extraIndexEntriesBySize;
+ // Since the extra entries are stored in a map of sets, we have to iterate the entries in the
+ // map and sum the size of the sets in order to get the total number. Given that we can have at
+ // most 64 indexes per collection, and the total number of entries could potentially be in the
+ // millions, we expect that iterating the map will be much less costly than the additional
+ // allocations and copies that could result from not calling 'reserve' on the vector.
+ size_t totalExtraIndexEntriesCount =
+ std::accumulate(_extraIndexEntries.begin(),
+ _extraIndexEntries.end(),
+ 0,
+ [](size_t total, const std::pair<IndexKey, SimpleBSONObjSet>& set) {
+ return total + set.second.size();
+ });
+ extraIndexEntriesBySize.reserve(totalExtraIndexEntriesCount);
for (const auto& extraIndexEntry : _extraIndexEntries) {
const SimpleBSONObjSet& entries = extraIndexEntry.second;
- for (const auto& entry : entries) {
- numExtraIndexEntriesSizeBytes += entry.objsize();
- if (numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) {
- results->extraIndexEntries.push_back(entry);
- } else if (!extraIndexEntrySizeLimitWarning) {
- StringBuilder ss;
- ss << "Not all extra index entry inconsistencies are listed due to size "
- "limitations.";
- results->errors.push_back(ss.str());
-
- extraIndexEntrySizeLimitWarning = true;
- }
-
- std::string indexName = entry["indexName"].String();
- if (!indexNsResultsMap->at(indexName).valid) {
- continue;
- }
+ for (auto it = entries.begin(); it != entries.end(); ++it) {
+ extraIndexEntriesBySize.push_back(it);
+ }
+ }
+ std::sort(extraIndexEntriesBySize.begin(),
+ extraIndexEntriesBySize.end(),
+ [](const ExtraIt& a, const ExtraIt& b) { return a->objsize() < b->objsize(); });
+ bool extraIndexEntrySizeLimitWarning = false;
+ for (const auto& entry : extraIndexEntriesBySize) {
+ numExtraIndexEntriesSizeBytes += entry->objsize();
+ if (first || numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) {
+ results->extraIndexEntries.push_back(*entry);
+ first = false;
+ } else if (!extraIndexEntrySizeLimitWarning) {
StringBuilder ss;
- ss << "Index with name '" << indexName << "' has inconsistencies.";
+ ss << "Not all extra index entry inconsistencies are listed due to size "
+ "limitations.";
results->errors.push_back(ss.str());
- indexNsResultsMap->at(indexName).valid = false;
+ extraIndexEntrySizeLimitWarning = true;
}
+
+ std::string indexName = (*entry)["indexName"].String();
+ if (!indexNsResultsMap->at(indexName).valid) {
+ continue;
+ }
+
+ StringBuilder ss;
+ ss << "Index with name '" << indexName << "' has inconsistencies.";
+ results->errors.push_back(ss.str());
+
+ indexNsResultsMap->at(indexName).valid = false;
}
// Inform how many inconsistencies were detected.
@@ -256,7 +297,8 @@ void IndexConsistency::addDocKey(const KeyString& ks,
BSONObj info = _generateInfo(*indexInfo, recordId, indexKey, idKey);
- // Cannot have duplicate KeyStrings during the document scan phase for the same index.
+ // Cannot have duplicate KeyStrings during the document scan phase for the same
+ // index.
IndexKey key = _generateKeyForMap(*indexInfo, ks);
invariant(_missingIndexEntries.count(key) == 0);
_missingIndexEntries.insert(std::make_pair(key, info));