summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Larkin-York <dan.larkin-york@mongodb.com>2023-02-03 23:45:23 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-04 02:08:36 +0000
commitc0e1438cad31e00372a9f9edaee80f9db5a4e8ed (patch)
tree0a5f6882020b11cd90fa83723d6988559c9679d1
parent94098eff431d7bb65da31516cc4df2a895d27dd8 (diff)
downloadmongo-c0e1438cad31e00372a9f9edaee80f9db5a4e8ed.tar.gz
SERVER-72512 Ensure validate surfaces index key inconsistencies if they exist
-rw-r--r--jstests/noPassthrough/validate_memory_limit.js51
-rw-r--r--src/mongo/db/catalog/index_consistency.cpp156
2 files changed, 127 insertions, 80 deletions
diff --git a/jstests/noPassthrough/validate_memory_limit.js b/jstests/noPassthrough/validate_memory_limit.js
index 106e94f5487..5a576743177 100644
--- a/jstests/noPassthrough/validate_memory_limit.js
+++ b/jstests/noPassthrough/validate_memory_limit.js
@@ -25,44 +25,53 @@ function corruptIndex() {
coll = conn.getDB("test").getCollection("corrupt");
}
-function checkValidate(errorPrefix, numMissingIndexEntries) {
- conn.getDB("test").adminCommand({setParameter: 1, maxValidateMemoryUsageMB: 1});
+function checkValidate(maxMemoryUsage, {minMissingKeys, maxMissingKeys}) {
+ conn.getDB("test").adminCommand({setParameter: 1, maxValidateMemoryUsageMB: maxMemoryUsage});
const res = coll.validate();
assert.commandWorked(res);
- assert(!res.valid);
- assert.containsPrefix(errorPrefix, res.errors);
- assert.eq(res.missingIndexEntries.length, numMissingIndexEntries);
+ assert(!res.valid, tojson(res));
+ const notAllReportedPrefix =
+ "Not all index entry inconsistencies are reported due to memory limitations.";
+ assert.containsPrefix(notAllReportedPrefix, res.errors, tojson(res));
+ assert.gte(res.missingIndexEntries.length, minMissingKeys, tojson(res));
+ assert.lte(res.missingIndexEntries.length, maxMissingKeys, tojson(res));
}
-function checkValidateRepair(expectRepair) {
+function checkValidateRepair() {
const res = coll.validate({repair: true});
assert.commandWorked(res);
- assert(!res.valid, printjson(res));
- assert.eq(res.repaired, expectRepair, printjson(res));
+ assert(!res.valid, tojson(res));
+ assert(res.repaired, tojson(res));
}
-const noneReportedPrefix =
- "Unable to report index entry inconsistencies due to memory limitations.";
-const notAllReportedPrefix =
- "Not all index entry inconsistencies are reported due to memory limitations.";
-
-// Insert a document with a key larger than maxValidateMemoryUsageMB so that validate does not
-// report any missing index entries.
+// Insert a document with a key larger than maxValidateMemoryUsageMB and test that we still report
+// at least one inconsistency.
const indexKey = "a".repeat(kIndexKeyLength);
assert.commandWorked(coll.insert({_id: indexKey}));
corruptIndex();
-checkValidate(noneReportedPrefix, 0);
+checkValidate(1, {minMissingKeys: 1, maxMissingKeys: 1});
// Can't repair successfully if there aren't any index inconsistencies reported.
-checkValidateRepair(false);
+checkValidateRepair();
+
+// Clear collection between tests.
+coll.drop();
+
+// Test that if we have keys distributed across many buckets, and would exceed
+// maxValidateMemoryUsageMB, we report as many inconsistencies as we can.
+for (let i = 0; i < 10; ++i) {
+ const indexKey = i.toString().repeat(kIndexKeyLength / 5);
+ assert.commandWorked(coll.insert({_id: indexKey}));
+}
-// Insert a document with a small key so that validate reports one missing index entry.
-assert.commandWorked(coll.insert({_id: 1}));
corruptIndex();
-checkValidate(notAllReportedPrefix, 1);
+// If each key is maxMem/5, then we can keep 4 of them (the 5th would put us at the limit). However,
+// each key is counted twice, so realistically we only expect to track 2 of them. However, there's
+// a small chance we could get hash collisions that would lead to us reporting only 1.
+checkValidate(1, {minMissingKeys: 1, maxMissingKeys: 2});
// Repair, but incompletely if only some inconsistencies are reported.
-checkValidateRepair(true);
+checkValidateRepair();
MongoRunner.stopMongod(conn, null, {skipValidation: true});
})(); \ No newline at end of file
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index c9526d94a6d..48cba4589de 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -223,11 +223,26 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
numExtraIndexEntryErrors += item.second.size();
}
+ // Sort missing index entries by size so we can process in order of increasing size and return
+ // as many as possible within memory limits.
+ using MissingIt = decltype(_missingIndexEntries)::const_iterator;
+ std::vector<MissingIt> missingIndexEntriesBySize;
+ missingIndexEntriesBySize.reserve(_missingIndexEntries.size());
+ for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end(); ++it) {
+ missingIndexEntriesBySize.push_back(it);
+ }
+ std::sort(missingIndexEntriesBySize.begin(),
+ missingIndexEntriesBySize.end(),
+ [](const MissingIt& a, const MissingIt& b) {
+ return a->second.keyString.getSize() < b->second.keyString.getSize();
+ });
+
// Inform which indexes have inconsistencies and add the BSON objects of the inconsistent index
// entries to the results vector.
bool missingIndexEntrySizeLimitWarning = false;
- for (const auto& missingIndexEntry : _missingIndexEntries) {
- const IndexEntryInfo& entryInfo = missingIndexEntry.second;
+ bool first = true;
+ for (const auto& missingIndexEntry : missingIndexEntriesBySize) {
+ const IndexEntryInfo& entryInfo = missingIndexEntry->second;
KeyString::Value ks = entryInfo.keyString;
auto indexKey =
KeyString::toBsonSafe(ks.getBuffer(), ks.getSize(), entryInfo.ord, ks.getTypeBits());
@@ -238,8 +253,9 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
entryInfo.idKey);
numMissingIndexEntriesSizeBytes += entry.objsize();
- if (numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) {
+ if (first || numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) {
results->missingIndexEntries.push_back(entry);
+ first = false;
} else if (!missingIndexEntrySizeLimitWarning) {
StringBuilder ss;
ss << "Not all missing index entry inconsistencies are listed due to size limitations.";
@@ -260,33 +276,46 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
results->indexResultsMap.at(indexName).valid = false;
}
- bool extraIndexEntrySizeLimitWarning = false;
+ // Sort extra index entries by size so we can process in order of increasing size and return as
+ // many as possible within memory limits.
+ using ExtraIt = SimpleBSONObjSet::const_iterator;
+ std::vector<ExtraIt> extraIndexEntriesBySize;
for (const auto& extraIndexEntry : _extraIndexEntries) {
const SimpleBSONObjSet& entries = extraIndexEntry.second;
- for (const auto& entry : entries) {
- numExtraIndexEntriesSizeBytes += entry.objsize();
- if (numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) {
- results->extraIndexEntries.push_back(entry);
- } else if (!extraIndexEntrySizeLimitWarning) {
- StringBuilder ss;
- ss << "Not all extra index entry inconsistencies are listed due to size "
- "limitations.";
- results->errors.push_back(ss.str());
-
- extraIndexEntrySizeLimitWarning = true;
- }
-
- std::string indexName = entry["indexName"].String();
- if (!results->indexResultsMap.at(indexName).valid) {
- continue;
- }
+ extraIndexEntriesBySize.reserve(extraIndexEntriesBySize.size() + entries.size());
+ for (auto it = entries.begin(); it != entries.end(); ++it) {
+ extraIndexEntriesBySize.push_back(it);
+ }
+ }
+ std::sort(extraIndexEntriesBySize.begin(),
+ extraIndexEntriesBySize.end(),
+ [](const ExtraIt& a, const ExtraIt& b) { return a->objsize() < b->objsize(); });
+ bool extraIndexEntrySizeLimitWarning = false;
+ for (const auto& entry : extraIndexEntriesBySize) {
+ numExtraIndexEntriesSizeBytes += entry->objsize();
+ if (first || numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) {
+ results->extraIndexEntries.push_back(*entry);
+ first = false;
+ } else if (!extraIndexEntrySizeLimitWarning) {
StringBuilder ss;
- ss << "Index with name '" << indexName << "' has inconsistencies.";
+ ss << "Not all extra index entry inconsistencies are listed due to size "
+ "limitations.";
results->errors.push_back(ss.str());
- results->indexResultsMap.at(indexName).valid = false;
+ extraIndexEntrySizeLimitWarning = true;
}
+
+ std::string indexName = (*entry)["indexName"].String();
+ if (!results->indexResultsMap.at(indexName).valid) {
+ continue;
+ }
+
+ StringBuilder ss;
+ ss << "Index with name '" << indexName << "' has inconsistencies.";
+ results->errors.push_back(ss.str());
+
+ results->indexResultsMap.at(indexName).valid = false;
}
// Inform how many inconsistencies were detected.
@@ -476,48 +505,57 @@ bool KeyStringIndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults*
return true;
}
- bool hasNonZeroBucket = false;
- uint64_t memoryUsedSoFarBytes = 0;
- uint32_t smallestBucketBytes = std::numeric_limits<uint32_t>::max();
- // Zero out any nonzero buckets that would put us over maxMemoryUsageBytes.
- std::for_each(_indexKeyBuckets.begin(), _indexKeyBuckets.end(), [&](IndexKeyBucket& bucket) {
- if (bucket.indexKeyCount == 0) {
- return;
- }
-
- smallestBucketBytes = std::min(smallestBucketBytes, bucket.bucketSizeBytes);
- if (bucket.bucketSizeBytes + memoryUsedSoFarBytes > maxMemoryUsageBytes) {
- // Including this bucket would put us over the memory limit, so zero this bucket. We
- // don't want to keep any entry that will exceed the memory limit in the second phase so
- // we don't double the 'maxMemoryUsageBytes' here.
- bucket.indexKeyCount = 0;
- return;
- }
- memoryUsedSoFarBytes += bucket.bucketSizeBytes;
- hasNonZeroBucket = true;
- });
+ // At this point we know we'll exceed the memory limit, and will pare back some of the buckets.
+ // First we'll see what the smallest bucket is, and if that's over the limit by itself, then
+ // we can zero out all the other buckets. Otherwise we'll keep as many buckets as we can.
- StringBuilder memoryLimitMessage;
- memoryLimitMessage << "Memory limit for validation is currently set to "
- << maxValidateMemoryUsageMB.load()
- << "MB and can be configured via the 'maxValidateMemoryUsageMB' parameter.";
+ auto smallestBucketWithAnInconsistency = std::min_element(
+ _indexKeyBuckets.begin(),
+ _indexKeyBuckets.end(),
+ [](const IndexKeyBucket& lhs, const IndexKeyBucket& rhs) {
+ if (lhs.indexKeyCount != 0) {
+ return rhs.indexKeyCount == 0 || lhs.bucketSizeBytes < rhs.bucketSizeBytes;
+ }
+ return false;
+ });
+ invariant(smallestBucketWithAnInconsistency->indexKeyCount != 0);
+
+ if (smallestBucketWithAnInconsistency->bucketSizeBytes > maxMemoryUsageBytes) {
+ // We're going to just keep the smallest bucket, and zero everything else.
+ std::for_each(
+ _indexKeyBuckets.begin(), _indexKeyBuckets.end(), [&](IndexKeyBucket& bucket) {
+ if (&bucket == &(*smallestBucketWithAnInconsistency)) {
+ // We keep the smallest bucket.
+ return;
+ }
- if (!hasNonZeroBucket) {
- const uint32_t minMemoryNeededMB = (smallestBucketBytes / (1024 * 1024)) + 1;
- StringBuilder ss;
- ss << "Unable to report index entry inconsistencies due to memory limitations. Need at "
- "least "
- << minMemoryNeededMB << "MB to report at least one index entry inconsistency. "
- << memoryLimitMessage.str();
- result->errors.push_back(ss.str());
- result->valid = false;
+ bucket.indexKeyCount = 0;
+ });
+ } else {
+ // We're going to scan through the buckets and keep as many as we can.
+ std::uint32_t memoryUsedSoFarBytes = 0;
+ std::for_each(
+ _indexKeyBuckets.begin(), _indexKeyBuckets.end(), [&](IndexKeyBucket& bucket) {
+ if (bucket.indexKeyCount == 0) {
+ return;
+ }
- return false;
+ if (bucket.bucketSizeBytes + memoryUsedSoFarBytes > maxMemoryUsageBytes) {
+ // Including this bucket would put us over the memory limit, so zero this
+ // bucket. We don't want to keep any entry that will exceed the memory limit in
+ // the second phase so we don't double the 'maxMemoryUsageBytes' here.
+ bucket.indexKeyCount = 0;
+ return;
+ }
+ memoryUsedSoFarBytes += bucket.bucketSizeBytes;
+ });
}
StringBuilder ss;
- ss << "Not all index entry inconsistencies are reported due to memory limitations. "
- << memoryLimitMessage.str();
+ ss << "Not all index entry inconsistencies are reported due to memory limitations. Memory "
+ "limit for validation is currently set to "
+ << maxValidateMemoryUsageMB.load()
+ << "MB and can be configured via the 'maxValidateMemoryUsageMB' parameter.";
result->errors.push_back(ss.str());
result->valid = false;