diff options
Diffstat (limited to 'src/mongo/db/catalog/index_consistency.cpp')
-rw-r--r-- | src/mongo/db/catalog/index_consistency.cpp | 156 |
1 files changed, 97 insertions, 59 deletions
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp index c9526d94a6d..48cba4589de 100644 --- a/src/mongo/db/catalog/index_consistency.cpp +++ b/src/mongo/db/catalog/index_consistency.cpp @@ -223,11 +223,26 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx, numExtraIndexEntryErrors += item.second.size(); } + // Sort missing index entries by size so we can process in order of increasing size and return + // as many as possible within memory limits. + using MissingIt = decltype(_missingIndexEntries)::const_iterator; + std::vector<MissingIt> missingIndexEntriesBySize; + missingIndexEntriesBySize.reserve(_missingIndexEntries.size()); + for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end(); ++it) { + missingIndexEntriesBySize.push_back(it); + } + std::sort(missingIndexEntriesBySize.begin(), + missingIndexEntriesBySize.end(), + [](const MissingIt& a, const MissingIt& b) { + return a->second.keyString.getSize() < b->second.keyString.getSize(); + }); + // Inform which indexes have inconsistencies and add the BSON objects of the inconsistent index // entries to the results vector. bool missingIndexEntrySizeLimitWarning = false; - for (const auto& missingIndexEntry : _missingIndexEntries) { - const IndexEntryInfo& entryInfo = missingIndexEntry.second; + bool first = true; + for (const auto& missingIndexEntry : missingIndexEntriesBySize) { + const IndexEntryInfo& entryInfo = missingIndexEntry->second; KeyString::Value ks = entryInfo.keyString; auto indexKey = KeyString::toBsonSafe(ks.getBuffer(), ks.getSize(), entryInfo.ord, ks.getTypeBits()); @@ -238,8 +253,9 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx, entryInfo.idKey); numMissingIndexEntriesSizeBytes += entry.objsize(); - if (numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) { + if (first || numMissingIndexEntriesSizeBytes <= kErrorSizeBytes) { results->missingIndexEntries.push_back(entry); + first = false; } else if (!missingIndexEntrySizeLimitWarning) { StringBuilder ss; ss << "Not all missing index entry inconsistencies are listed due to size limitations."; @@ -260,33 +276,46 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx, results->indexResultsMap.at(indexName).valid = false; } - bool extraIndexEntrySizeLimitWarning = false; + // Sort extra index entries by size so we can process in order of increasing size and return as + // many as possible within memory limits. + using ExtraIt = SimpleBSONObjSet::const_iterator; + std::vector<ExtraIt> extraIndexEntriesBySize; for (const auto& extraIndexEntry : _extraIndexEntries) { const SimpleBSONObjSet& entries = extraIndexEntry.second; - for (const auto& entry : entries) { - numExtraIndexEntriesSizeBytes += entry.objsize(); - if (numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) { - results->extraIndexEntries.push_back(entry); - } else if (!extraIndexEntrySizeLimitWarning) { - StringBuilder ss; - ss << "Not all extra index entry inconsistencies are listed due to size " - "limitations."; - results->errors.push_back(ss.str()); - - extraIndexEntrySizeLimitWarning = true; - } - - std::string indexName = entry["indexName"].String(); - if (!results->indexResultsMap.at(indexName).valid) { - continue; - } + extraIndexEntriesBySize.reserve(extraIndexEntriesBySize.size() + entries.size()); + for (auto it = entries.begin(); it != entries.end(); ++it) { + extraIndexEntriesBySize.push_back(it); + } + } + std::sort(extraIndexEntriesBySize.begin(), + extraIndexEntriesBySize.end(), + [](const ExtraIt& a, const ExtraIt& b) { return a->objsize() < b->objsize(); }); + bool extraIndexEntrySizeLimitWarning = false; + for (const auto& entry : extraIndexEntriesBySize) { + numExtraIndexEntriesSizeBytes += entry->objsize(); + if (first || numExtraIndexEntriesSizeBytes <= kErrorSizeBytes) { + results->extraIndexEntries.push_back(*entry); + first = false; + } else if (!extraIndexEntrySizeLimitWarning) { StringBuilder ss; - ss << "Index with name '" << indexName << "' has inconsistencies."; + ss << "Not all extra index entry inconsistencies are listed due to size " + "limitations."; results->errors.push_back(ss.str()); - results->indexResultsMap.at(indexName).valid = false; + extraIndexEntrySizeLimitWarning = true; } + + std::string indexName = (*entry)["indexName"].String(); + if (!results->indexResultsMap.at(indexName).valid) { + continue; + } + + StringBuilder ss; + ss << "Index with name '" << indexName << "' has inconsistencies."; + results->errors.push_back(ss.str()); + + results->indexResultsMap.at(indexName).valid = false; } // Inform how many inconsistencies were detected. @@ -476,48 +505,57 @@ bool KeyStringIndexConsistency::limitMemoryUsageForSecondPhase(ValidateResults* return true; } - bool hasNonZeroBucket = false; - uint64_t memoryUsedSoFarBytes = 0; - uint32_t smallestBucketBytes = std::numeric_limits<uint32_t>::max(); - // Zero out any nonzero buckets that would put us over maxMemoryUsageBytes. - std::for_each(_indexKeyBuckets.begin(), _indexKeyBuckets.end(), [&](IndexKeyBucket& bucket) { - if (bucket.indexKeyCount == 0) { - return; - } - - smallestBucketBytes = std::min(smallestBucketBytes, bucket.bucketSizeBytes); - if (bucket.bucketSizeBytes + memoryUsedSoFarBytes > maxMemoryUsageBytes) { - // Including this bucket would put us over the memory limit, so zero this bucket. We - // don't want to keep any entry that will exceed the memory limit in the second phase so - // we don't double the 'maxMemoryUsageBytes' here. - bucket.indexKeyCount = 0; - return; - } - memoryUsedSoFarBytes += bucket.bucketSizeBytes; - hasNonZeroBucket = true; - }); + // At this point we know we'll exceed the memory limit, and will pare back some of the buckets. + // First we'll see what the smallest bucket is, and if that's over the limit by itself, then + // we can zero out all the other buckets. Otherwise we'll keep as many buckets as we can. - StringBuilder memoryLimitMessage; - memoryLimitMessage << "Memory limit for validation is currently set to " - << maxValidateMemoryUsageMB.load() - << "MB and can be configured via the 'maxValidateMemoryUsageMB' parameter."; + auto smallestBucketWithAnInconsistency = std::min_element( + _indexKeyBuckets.begin(), + _indexKeyBuckets.end(), + [](const IndexKeyBucket& lhs, const IndexKeyBucket& rhs) { + if (lhs.indexKeyCount != 0) { + return rhs.indexKeyCount == 0 || lhs.bucketSizeBytes < rhs.bucketSizeBytes; + } + return false; + }); + invariant(smallestBucketWithAnInconsistency->indexKeyCount != 0); + + if (smallestBucketWithAnInconsistency->bucketSizeBytes > maxMemoryUsageBytes) { + // We're going to just keep the smallest bucket, and zero everything else. + std::for_each( + _indexKeyBuckets.begin(), _indexKeyBuckets.end(), [&](IndexKeyBucket& bucket) { + if (&bucket == &(*smallestBucketWithAnInconsistency)) { + // We keep the smallest bucket. + return; + } - if (!hasNonZeroBucket) { - const uint32_t minMemoryNeededMB = (smallestBucketBytes / (1024 * 1024)) + 1; - StringBuilder ss; - ss << "Unable to report index entry inconsistencies due to memory limitations. Need at " - "least " - << minMemoryNeededMB << "MB to report at least one index entry inconsistency. " - << memoryLimitMessage.str(); - result->errors.push_back(ss.str()); - result->valid = false; + bucket.indexKeyCount = 0; + }); + } else { + // We're going to scan through the buckets and keep as many as we can. + std::uint32_t memoryUsedSoFarBytes = 0; + std::for_each( + _indexKeyBuckets.begin(), _indexKeyBuckets.end(), [&](IndexKeyBucket& bucket) { + if (bucket.indexKeyCount == 0) { + return; + } - return false; + if (bucket.bucketSizeBytes + memoryUsedSoFarBytes > maxMemoryUsageBytes) { + // Including this bucket would put us over the memory limit, so zero this + // bucket. We don't want to keep any entry that will exceed the memory limit in + // the second phase so we don't double the 'maxMemoryUsageBytes' here. + bucket.indexKeyCount = 0; + return; + } + memoryUsedSoFarBytes += bucket.bucketSizeBytes; + }); } StringBuilder ss; - ss << "Not all index entry inconsistencies are reported due to memory limitations. " - << memoryLimitMessage.str(); + ss << "Not all index entry inconsistencies are reported due to memory limitations. Memory " + "limit for validation is currently set to " + << maxValidateMemoryUsageMB.load() + << "MB and can be configured via the 'maxValidateMemoryUsageMB' parameter."; result->errors.push_back(ss.str()); result->valid = false; |