diff options
author | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2019-04-29 20:18:42 -0400 |
---|---|---|
committer | Gregory Wlodarek <gregory.wlodarek@mongodb.com> | 2019-04-29 20:25:09 -0400 |
commit | abf37f69a45009f44f275e03e2f07d2d496d5b8d (patch) | |
tree | 5f58d5c227e57b44341b1be900b0feb1dc0fb3de | |
parent | 7ea05d8684052198c595dee0b9a9cabf652e904d (diff) | |
download | mongo-abf37f69a45009f44f275e03e2f07d2d496d5b8d.tar.gz |
SERVER-30356 Improve error reporting for validation
-rw-r--r-- | jstests/core/geo_s2sparse.js | 4 | ||||
-rw-r--r-- | jstests/core/index_partial_create_drop.js | 2 | ||||
-rw-r--r-- | jstests/core/index_partial_write_ops.js | 2 | ||||
-rw-r--r-- | src/mongo/db/catalog/collection_impl.cpp | 85 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_consistency.cpp | 273 | ||||
-rw-r--r-- | src/mongo/db/catalog/index_consistency.h | 119 | ||||
-rw-r--r-- | src/mongo/db/catalog/private/record_store_validate_adaptor.cpp | 34 | ||||
-rw-r--r-- | src/mongo/db/commands/validate.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/storage/record_store.h | 2 |
9 files changed, 403 insertions, 120 deletions
diff --git a/jstests/core/geo_s2sparse.js b/jstests/core/geo_s2sparse.js index 3677e98f84c..57f4f73fa3a 100644 --- a/jstests/core/geo_s2sparse.js +++ b/jstests/core/geo_s2sparse.js @@ -7,7 +7,7 @@ var coll = db.geo_s2sparse; var point = {type: "Point", coordinates: [5, 5]}; var indexSpec = {geo: "2dsphere", nonGeo: 1}; - var indexName = 'test.geo_s2sparse.$geo_2dsphere_nonGeo_1'; + var indexName = 'geo_2dsphere_nonGeo_1'; // // V2 indices are "geo sparse" always. @@ -103,7 +103,7 @@ coll.drop(); coll.ensureIndex({geo: "2dsphere", otherGeo: "2dsphere"}); - indexName = 'test.geo_s2sparse.$geo_2dsphere_otherGeo_2dsphere'; + indexName = 'geo_2dsphere_otherGeo_2dsphere'; // Insert N documents with the first geo field. bulkInsertDocs(coll, N, function(i) { diff --git a/jstests/core/index_partial_create_drop.js b/jstests/core/index_partial_create_drop.js index bf25fa3a6b1..55a6b06d117 100644 --- a/jstests/core/index_partial_create_drop.js +++ b/jstests/core/index_partial_create_drop.js @@ -23,7 +23,7 @@ } else { kpi = res.keysPerIndex; } - return kpi[coll.getFullName() + ".$" + idxName]; + return kpi[idxName]; }; coll.drop(); diff --git a/jstests/core/index_partial_write_ops.js b/jstests/core/index_partial_write_ops.js index a88989883d1..730bcca5318 100644 --- a/jstests/core/index_partial_write_ops.js +++ b/jstests/core/index_partial_write_ops.js @@ -15,7 +15,7 @@ } else { kpi = res.keysPerIndex; } - return kpi[coll.getFullName() + ".$" + idxName]; + return kpi[idxName]; }; coll.drop(); diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp index 98b11660c74..57f1f5fa173 100644 --- a/src/mongo/db/catalog/collection_impl.cpp +++ b/src/mongo/db/catalog/collection_impl.cpp @@ -184,7 +184,6 @@ StatusWith<CollectionImpl::ValidationAction> _parseValidationAction(StringData n } // namespace -using std::endl; using std::string; using std::unique_ptr; using std::vector; @@ -412,8 +411,8 @@ Status CollectionImpl::insertDocuments(OperationContext* opCtx, string whenFirst = firstIdElem ? (string(" when first _id is ") + firstIdElem.str()) : ""; while (MONGO_FAIL_POINT(hangAfterCollectionInserts)) { - log() << "hangAfterCollectionInserts fail point enabled for " << _ns.toString() - << whenFirst << ". Blocking until fail point is disabled."; + log() << "hangAfterCollectionInserts fail point enabled for " << _ns << whenFirst + << ". Blocking until fail point is disabled."; mongo::sleepsecs(1); opCtx->checkForInterrupt(); } @@ -1095,8 +1094,9 @@ void _validateIndexes(OperationContext* opCtx, const IndexDescriptor* descriptor = entry->descriptor(); const IndexAccessMethod* iam = entry->accessMethod(); - log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl; - ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()]; + log(LogComponent::kIndex) << "validating index " << descriptor->indexName() + << " on collection " << descriptor->parentNS(); + ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexName()]; bool checkCounts = false; int64_t numTraversedKeys; int64_t numValidatedKeys; @@ -1120,7 +1120,7 @@ void _validateIndexes(OperationContext* opCtx, } if (curIndexResults.valid) { - keysPerIndex->appendNumber(descriptor->indexNamespace(), + keysPerIndex->appendNumber(descriptor->indexName(), static_cast<long long>(numTraversedKeys)); } else { results->valid = false; @@ -1131,19 +1131,54 @@ void _validateIndexes(OperationContext* opCtx, } } -void _markIndexEntriesInvalid(ValidateResultsMap* indexNsResultsMap, ValidateResults* results) { +/** + * Executes the second phase of validation for improved error reporting. This is only done if + * any index inconsistencies are found during the first phase of validation. + */ +void _gatherIndexEntryErrors(OperationContext* opCtx, + RecordStore* recordStore, + IndexCatalog* indexCatalog, + IndexConsistency* indexConsistency, + RecordStoreValidateAdaptor* indexValidator, + ValidateResultsMap* indexNsResultsMap, + ValidateResults* result) { + indexConsistency->setSecondPhase(); + + log(LogComponent::kIndex) << "Starting to traverse through all the document key sets."; + + // During the second phase of validation, iterate through each documents key set and only record + // the keys that were inconsistent during the first phase of validation. + std::unique_ptr<SeekableRecordCursor> cursor = recordStore->getCursor(opCtx, true); + while (auto record = cursor->next()) { + opCtx->checkForInterrupt(); - // The error message can't be more specific because even though the index is - // invalid, we won't know if the corruption occurred on the index entry or in - // the document. - for (auto& it : *indexNsResultsMap) { - // Marking all indexes as invalid since we don't know which one failed. - ValidateResults& r = it.second; - r.valid = false; + // We can ignore the status of validate as it was already checked during the first phase. + size_t validatedSize; + indexValidator->validate(record->id, record->data, &validatedSize).ignore(); } - string msg = "one or more indexes contain invalid index entries."; - results->errors.push_back(msg); - results->valid = false; + + log(LogComponent::kIndex) << "Finished traversing through all the document key sets."; + log(LogComponent::kIndex) << "Starting to traverse through all the indexes."; + + // Iterate through all the indexes in the collection and only record the index entry keys that + // had inconsistencies during the first phase. + std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false); + while (it->more()) { + opCtx->checkForInterrupt(); + + const IndexCatalogEntry* entry = it->next(); + const IndexDescriptor* descriptor = entry->descriptor(); + const IndexAccessMethod* iam = entry->accessMethod(); + + log(LogComponent::kIndex) << "Traversing through the index entries for index " + << descriptor->indexName() << "."; + indexValidator->traverseIndex( + iam, descriptor, /*ValidateResults=*/nullptr, /*numTraversedKeys=*/nullptr); + } + + log(LogComponent::kIndex) << "Finished traversing through all the indexes."; + + indexConsistency->addIndexEntryErrors(indexNsResultsMap, result); } void _validateIndexKeyCount(OperationContext* opCtx, @@ -1156,7 +1191,7 @@ void _validateIndexKeyCount(OperationContext* opCtx, indexCatalog->getIndexIterator(opCtx, false); while (indexIterator->more()) { const IndexDescriptor* descriptor = indexIterator->next()->descriptor(); - ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()]; + ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexName()]; if (curIndexResults.valid) { indexValidator->validateIndexKeyCount( @@ -1277,8 +1312,7 @@ Status CollectionImpl::validate(OperationContext* opCtx, // Validate the record store std::string uuidString = str::stream() << " (UUID: " << (uuid() ? uuid()->toString() : "none") << ")"; - log(LogComponent::kIndex) << "validating collection " << ns().toString() << uuidString - << endl; + log(LogComponent::kIndex) << "validating collection " << ns() << uuidString; _validateRecordStore( opCtx, _recordStore, level, background, &indexValidator, results, output); @@ -1296,7 +1330,16 @@ Status CollectionImpl::validate(OperationContext* opCtx, results); if (indexConsistency.haveEntryMismatch()) { - _markIndexEntriesInvalid(&indexNsResultsMap, results); + log(LogComponent::kIndex) + << "Index inconsistencies were detected on collection " << ns() + << ". Starting the second phase of index validation to gather concise errors."; + _gatherIndexEntryErrors(opCtx, + _recordStore, + _indexCatalog.get(), + &indexConsistency, + &indexValidator, + &indexNsResultsMap, + results); } } diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp index a10e63dee3a..1ff0f14d65a 100644 --- a/src/mongo/db/catalog/index_consistency.cpp +++ b/src/mongo/db/catalog/index_consistency.cpp @@ -41,6 +41,7 @@ #include "mongo/db/index_names.h" #include "mongo/db/server_options.h" #include "mongo/db/storage/key_string.h" +#include "mongo/db/storage/record_store.h" #include "mongo/db/storage/sorted_data_interface.h" #include "mongo/util/elapsed_tracker.h" @@ -68,7 +69,8 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx, _recordStore(recordStore), _tracker(opCtx->getServiceContext()->getFastClockSource(), internalQueryExecYieldIterations.load(), - Milliseconds(internalQueryExecYieldPeriodMS.load())) { + Milliseconds(internalQueryExecYieldPeriodMS.load())), + _firstPhase(true) { IndexCatalog* indexCatalog = _collection->getIndexCatalog(); std::unique_ptr<IndexCatalog::IndexIterator> indexIterator = @@ -78,18 +80,18 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx, while (indexIterator->more()) { const IndexDescriptor* descriptor = indexIterator->next()->descriptor(); - std::string indexNs = descriptor->indexNamespace(); - - _indexNumber[descriptor->indexNamespace()] = indexNumber; + std::string indexName = descriptor->indexName(); + _indexNumber[indexName] = indexNumber; IndexInfo indexInfo; - indexInfo.isReady = - _collection->getCatalogEntry()->isIndexReady(opCtx, descriptor->indexName()); + indexInfo.indexName = indexName; + indexInfo.keyPattern = descriptor->keyPattern(); + indexInfo.isReady = _collection->getCatalogEntry()->isIndexReady(opCtx, indexName); - uint32_t indexNsHash; - MurmurHash3_x86_32(indexNs.c_str(), indexNs.size(), 0, &indexNsHash); - indexInfo.indexNsHash = indexNsHash; + uint32_t indexNameHash; + MurmurHash3_x86_32(indexName.c_str(), indexName.size(), 0, &indexNameHash); + indexInfo.indexNameHash = indexNameHash; indexInfo.indexScanFinished = false; indexInfo.numKeys = 0; @@ -103,24 +105,30 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx, } } -void IndexConsistency::addDocKey(const KeyString& ks, int indexNumber) { +void IndexConsistency::addDocKey(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey) { if (indexNumber < 0 || indexNumber >= static_cast<int>(_indexesInfo.size())) { return; } stdx::lock_guard<stdx::mutex> lock(_classMutex); - _addDocKey_inlock(ks, indexNumber); + _addDocKey_inlock(ks, indexNumber, recordId, indexKey); } -void IndexConsistency::addIndexKey(const KeyString& ks, int indexNumber) { +void IndexConsistency::addIndexKey(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey) { if (indexNumber < 0 || indexNumber >= static_cast<int>(_indexesInfo.size())) { return; } stdx::lock_guard<stdx::mutex> lock(_classMutex); - _addIndexKey_inlock(ks, indexNumber); + _addIndexKey_inlock(ks, indexNumber, recordId, indexKey); } void IndexConsistency::addMultikeyMetadataPath(const KeyString& ks, int indexNumber) { @@ -216,33 +224,123 @@ int64_t IndexConsistency::getNumExtraIndexKeys(int indexNumber) const { return _indexesInfo.at(indexNumber).numExtraIndexKeys; } -void IndexConsistency::nextStage() { +int IndexConsistency::getIndexNumber(const std::string& indexName) { - stdx::lock_guard<stdx::mutex> lock(_classMutex); - if (_stage == ValidationStage::DOCUMENT) { - _stage = ValidationStage::INDEX; - } else if (_stage == ValidationStage::INDEX) { - _stage = ValidationStage::NONE; + auto search = _indexNumber.find(indexName); + if (search != _indexNumber.end()) { + return search->second; } + + return -1; } -ValidationStage IndexConsistency::getStage() const { +void IndexConsistency::setSecondPhase() { stdx::lock_guard<stdx::mutex> lock(_classMutex); - return _stage; + invariant(_firstPhase); + _firstPhase = false; } -int IndexConsistency::getIndexNumber(const std::string& indexNs) { +void IndexConsistency::addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap, + ValidateResults* results) { + stdx::lock_guard<stdx::mutex> lock(_classMutex); + invariant(!_firstPhase); - auto search = _indexNumber.find(indexNs); - if (search != _indexNumber.end()) { - return search->second; + // We'll report up to 1MB for extra index entry errors and missing index entry errors. + const int kErrorSizeMB = 1 * 1024 * 1024; + int numMissingIndexEntriesSizeMB = 0; + int numExtraIndexEntriesSizeMB = 0; + + int numMissingIndexEntryErrors = _missingIndexEntries.size(); + int numExtraIndexEntryErrors = 0; + for (const auto& item : _extraIndexEntries) { + numExtraIndexEntryErrors += item.second.size(); } - return -1; + // Inform which indexes have inconsistences and add the BSON objects of the inconsistent index + // entries to the results vector. + bool missingIndexEntrySizeLimitWarning = false; + for (const auto& missingIndexEntry : _missingIndexEntries) { + const BSONObj& entry = missingIndexEntry.second; + + // Only count the indexKey and idKey fields towards the total size. + numMissingIndexEntriesSizeMB += entry["indexKey"].size(); + if (entry.hasField("idKey")) { + numMissingIndexEntriesSizeMB += entry["idKey"].size(); + } + + if (numMissingIndexEntriesSizeMB <= kErrorSizeMB) { + results->missingIndexEntries.push_back(entry); + } else if (!missingIndexEntrySizeLimitWarning) { + StringBuilder ss; + ss << "Not all missing index entry inconsistencies are listed due to size limitations."; + results->errors.push_back(ss.str()); + + missingIndexEntrySizeLimitWarning = true; + } + + std::string indexName = entry["indexName"].String(); + if (!indexNsResultsMap->at(indexName).valid) { + continue; + } + + StringBuilder ss; + ss << "Index with name '" << indexName << "' has inconsistencies."; + results->errors.push_back(ss.str()); + + indexNsResultsMap->at(indexName).valid = false; + } + + bool extraIndexEntrySizeLimitWarning = false; + for (const auto& extraIndexEntry : _extraIndexEntries) { + const SimpleBSONObjSet& entries = extraIndexEntry.second; + for (const auto& entry : entries) { + // Only count the indexKey field towards the total size. + numExtraIndexEntriesSizeMB += entry["indexKey"].size(); + if (numExtraIndexEntriesSizeMB <= kErrorSizeMB) { + results->extraIndexEntries.push_back(entry); + } else if (!extraIndexEntrySizeLimitWarning) { + StringBuilder ss; + ss << "Not all extra index entry inconsistencies are listed due to size " + "limitations."; + results->errors.push_back(ss.str()); + + extraIndexEntrySizeLimitWarning = true; + } + + std::string indexName = entry["indexName"].String(); + if (!indexNsResultsMap->at(indexName).valid) { + continue; + } + + StringBuilder ss; + ss << "Index with name '" << indexName << "' has inconsistencies."; + results->errors.push_back(ss.str()); + + indexNsResultsMap->at(indexName).valid = false; + } + } + + // Inform how many inconsistencies were detected. + if (numMissingIndexEntryErrors > 0) { + StringBuilder ss; + ss << "Detected " << numMissingIndexEntryErrors << " missing index entries."; + results->warnings.push_back(ss.str()); + } + + if (numExtraIndexEntryErrors > 0) { + StringBuilder ss; + ss << "Detected " << numExtraIndexEntryErrors << " extra index entries."; + results->warnings.push_back(ss.str()); + } + + results->valid = false; } -void IndexConsistency::_addDocKey_inlock(const KeyString& ks, int indexNumber) { +void IndexConsistency::_addDocKey_inlock(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey) { // Ignore indexes that weren't ready before we started validation. if (!_indexesInfo.at(indexNumber).isReady) { @@ -250,11 +348,46 @@ void IndexConsistency::_addDocKey_inlock(const KeyString& ks, int indexNumber) { } const uint32_t hash = _hashKeyString(ks, indexNumber); - _indexKeyCount[hash]++; - _indexesInfo.at(indexNumber).numRecords++; + + if (_firstPhase) { + // During the first phase of validation we only keep track of the count for the document + // keys encountered. + _indexKeyCount[hash]++; + _indexesInfo.at(indexNumber).numRecords++; + } else { + // For the second phase of validation, we keep track of the document keys that mapped to + // an inconsistent hash bucket during the first phase. + auto searchBuckets = _indexKeyCount.find(hash); + invariant(searchBuckets != _indexKeyCount.end()); + if (searchBuckets->second == 0) { + // No inconsistencies in this hash bucket during the first phase. + return; + } + + // Get the documents _id index key. + auto cursor = _recordStore->getCursor(_opCtx); + auto record = cursor->seekExact(recordId); + invariant(record); + + BSONObj data = record->data.toBson(); + boost::optional<BSONElement> idKey = boost::none; + if (data.hasField("_id")) { + idKey = data["_id"]; + } + + std::string key = std::string(ks.getBuffer(), ks.getSize()); + BSONObj info = _generateInfo(indexNumber, recordId, indexKey, idKey); + + // Cannot have duplicate KeyStrings during the document scan phase. + invariant(_missingIndexEntries.count(key) == 0); + _missingIndexEntries.insert(std::make_pair(key, info)); + } } -void IndexConsistency::_addIndexKey_inlock(const KeyString& ks, int indexNumber) { +void IndexConsistency::_addIndexKey_inlock(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey) { // Ignore indexes that weren't ready before we started validation. if (!_indexesInfo.at(indexNumber).isReady) { @@ -262,16 +395,84 @@ void IndexConsistency::_addIndexKey_inlock(const KeyString& ks, int indexNumber) } const uint32_t hash = _hashKeyString(ks, indexNumber); - _indexKeyCount[hash]--; - _indexesInfo.at(indexNumber).numKeys++; + + if (_firstPhase) { + // During the first phase of validation we only keep track of the count for the index entry + // keys encountered. + _indexKeyCount[hash]--; + _indexesInfo.at(indexNumber).numKeys++; + } else { + // For the second phase of validation, on the buckets that were inconsistent during the + // first phase, we see if there was a corresponding document key for the index entry key + // we have. + // If there is a corresponding document key for the index entry key, we remove the key from + // the '_missingIndexEntries' map. However if there was no document key for the index entry + // key, we add the key to the '_extraIndexEntries' map. + auto searchBuckets = _indexKeyCount.find(hash); + invariant(searchBuckets != _indexKeyCount.end()); + if (searchBuckets->second == 0) { + // No inconsistencies in this hash bucket during the first phase. + return; + } + + std::string key = std::string(ks.getBuffer(), ks.getSize()); + BSONObj info = _generateInfo(indexNumber, recordId, indexKey, boost::none); + + if (_missingIndexEntries.count(key) == 0) { + // We may have multiple extra index entries for a given KeyString. + auto search = _extraIndexEntries.find(key); + if (search == _extraIndexEntries.end()) { + SimpleBSONObjSet infoSet = {info}; + _extraIndexEntries.insert(std::make_pair(key, infoSet)); + return; + } + + search->second.insert(info); + } else { + _missingIndexEntries.erase(key); + } + } +} + +BSONObj IndexConsistency::_generateInfo(const int& indexNumber, + const RecordId& recordId, + const BSONObj& indexKey, + boost::optional<BSONElement> idKey) { + const std::string& indexName = _indexesInfo.at(indexNumber).indexName; + const BSONObj& keyPattern = _indexesInfo.at(indexNumber).keyPattern; + + // We need to rehydrate the indexKey for improved readability. + // {"": ObjectId(...)} -> {"_id": ObjectId(...)} + auto keysIt = keyPattern.begin(); + auto valuesIt = indexKey.begin(); + + BSONObjBuilder b; + while (keysIt != keyPattern.end()) { + // keysIt and valuesIt must have the same number of elements. + invariant(valuesIt != indexKey.end()); + b.appendAs(*valuesIt, keysIt->fieldName()); + keysIt++; + valuesIt++; + } + + BSONObj rehydratedKey = b.done(); + + if (idKey) { + return BSON("indexName" << indexName << "recordId" << recordId.repr() << "idKey" << *idKey + << "indexKey" + << rehydratedKey); + } else { + return BSON("indexName" << indexName << "recordId" << recordId.repr() << "indexKey" + << rehydratedKey); + } } uint32_t IndexConsistency::_hashKeyString(const KeyString& ks, int indexNumber) const { - uint32_t indexNsHash = _indexesInfo.at(indexNumber).indexNsHash; + uint32_t indexNameHash = _indexesInfo.at(indexNumber).indexNameHash; MurmurHash3_x86_32( - ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize(), indexNsHash, &indexNsHash); - MurmurHash3_x86_32(ks.getBuffer(), ks.getSize(), indexNsHash, &indexNsHash); - return indexNsHash % (1U << 22); + ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize(), indexNameHash, &indexNameHash); + MurmurHash3_x86_32(ks.getBuffer(), ks.getSize(), indexNameHash, &indexNameHash); + return indexNameHash % (1U << 22); } } // namespace mongo diff --git a/src/mongo/db/catalog/index_consistency.h b/src/mongo/db/catalog/index_consistency.h index d22696c5b0c..348f282e0af 100644 --- a/src/mongo/db/catalog/index_consistency.h +++ b/src/mongo/db/catalog/index_consistency.h @@ -39,20 +39,6 @@ namespace mongo { /** - * The ValidationStage allows the IndexConsistency class to perform - * the correct operations that depend on where we are in the validation. - */ -enum class ValidationStage { DOCUMENT, INDEX, NONE }; - -/** - * The ValidationOperation is used by classes using the IndexObserver to let us know what operation - * was associated with it. - * The `UPDATE` operation can be seen as two independent operations (`REMOVE` operation followed - * by an `INSERT` operation). - */ -enum class ValidationOperation { INSERT, REMOVE }; - -/** * The IndexConsistency class is used to keep track of the index consistency. * It does this by using the index keys from index entries and index keys generated from the * document to ensure there is a one-to-one mapping for each key. @@ -64,10 +50,14 @@ enum class ValidationOperation { INSERT, REMOVE }; * Contains all the index information and stats throughout the validation. */ struct IndexInfo { + // The name of the index. + std::string indexName; + // The index key pattern. + BSONObj keyPattern; // Informs us if the index was ready or not for consumption during the start of validation. bool isReady; - // Contains the pre-computed hashed of the index namespace. - uint32_t indexNsHash; + // Contains the pre-computed hash of the index name. + uint32_t indexNameHash; // True if the index has finished scanning from the index scan stage, otherwise false. bool indexScanFinished; // The number of index entries belonging to the index. @@ -85,6 +75,8 @@ struct IndexInfo { }; class IndexConsistency final { + using ValidateResultsMap = std::map<std::string, ValidateResults>; + public: IndexConsistency(OperationContext* opCtx, Collection* collection, @@ -95,8 +87,14 @@ public: /** * Helper functions for `_addDocKey` and `_addIndexKey` for concurrency control. */ - void addDocKey(const KeyString& ks, int indexNumber); - void addIndexKey(const KeyString& ks, int indexNumber); + void addDocKey(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey); + void addIndexKey(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey); /** * To validate $** multikey metadata paths, we first scan the collection and add a hash of all @@ -146,30 +144,34 @@ public: int64_t getNumExtraIndexKeys(int indexNumber) const; /** - * Moves the `_stage` variable to the next corresponding stage in the following order: - * `DOCUMENT` -> `INDEX` - * `INDEX` -> `NONE` - * `NONE` -> `NONE` + * Returns the index number for the corresponding index name. */ - void nextStage(); + int getIndexNumber(const std::string& indexName); /** - * Returns the `_stage` that the validation is on. + * Informs the IndexConsistency object that we're advancing to the second phase of index + * validation. */ - ValidationStage getStage() const; + void setSecondPhase(); /** - * Returns the index number for the corresponding index namespace's. + * Records the errors gathered from the second phase of index validation into the provided + * ValidateResultsMap and ValidateResults. */ - int getIndexNumber(const std::string& indexNs); + void addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap, ValidateResults* results); private: + IndexConsistency() = delete; + OperationContext* _opCtx; Collection* _collection; const NamespaceString _nss; const RecordStore* _recordStore; ElapsedTracker _tracker; + // Protects the variables below. + mutable stdx::mutex _classMutex; + // We map the hashed KeyString values to a bucket which contain the count of how many // index keys and document keys we've seen in each bucket. // Count rules: @@ -179,7 +181,7 @@ private: // are too few index entries. // - If the count is < 0 in the bucket at the end of the validation pass, then there // are too many index entries. - std::map<uint32_t, uint32_t> _indexKeyCount; + std::map<uint32_t, int32_t> _indexKeyCount; // Contains the corresponding index number for each index namespace std::map<std::string, int> _indexNumber; @@ -187,32 +189,59 @@ private: // A mapping of index numbers to IndexInfo std::map<int, IndexInfo> _indexesInfo; - // The current index namespace being scanned in the index scan phase. - int _currentIndex = -1; - - // The stage that the validation is currently on. - ValidationStage _stage = ValidationStage::DOCUMENT; + // Whether we're in the first or second phase of index validation. + bool _firstPhase; - // Threshold for the number of errors to record before returning "There are too many errors". - static const int _kErrorThreshold = 100; + // Populated during the second phase of validation, this map contains the index entries that + // were pointing at an invalid document key. + // The map contains a KeyString pointing at a set of BSON objects as there may be multiple + // extra index entries for the same KeyString. + std::map<std::string, SimpleBSONObjSet> _extraIndexEntries; - // The current number of errors that are recorded. - int _numErrorsRecorded = 0; + // Populated during the second phase of validation, this map contains the index entries that + // were missing while the document key was in place. + // The map contains a KeyString pointing to a BSON object as there can only be one missing index + // entry for a given KeyString. + std::map<std::string, BSONObj> _missingIndexEntries; - // Only one thread can use the class at a time - mutable stdx::mutex _classMutex; + /** + * During the first phase of validation, given the document's key KeyString, increment the + * corresponding `_indexKeyCount` by hashing it. + * For the second phase of validation, keep track of the document keys that hashed to + * inconsistent hash buckets during the first phase of validation. + */ + void _addDocKey_inlock(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey); /** - * Given the document's key KeyString, increment the corresponding `_indexKeyCount` - * by hashing it. + * During the first phase of validation, given the index entry's KeyString, decrement the + * corresponding `_indexKeyCount` by hashing it. + * For the second phase of validation, try to match the index entry keys that hashed to + * inconsistent hash buckets during the first phase of validation to document keys. */ - void _addDocKey_inlock(const KeyString& ks, int indexNumber); + void _addIndexKey_inlock(const KeyString& ks, + int indexNumber, + const RecordId& recordId, + const BSONObj& indexKey); /** - * Given the index entry's KeyString, decrement the corresponding `_indexKeyCount` - * by hashing it. + * Generates a key for the second phase of validation. The keys format is the following: + * { + * indexName: <string>, + * recordId: <number>, + * idKey: <object>, // Only available for missing index entries. + * indexKey: { + * <key>: <value>, + * ... + * } + * } */ - void _addIndexKey_inlock(const KeyString& ks, int indexNumber); + BSONObj _generateInfo(const int& indexNumber, + const RecordId& recordId, + const BSONObj& indexKey, + boost::optional<BSONElement> idKey); /** * Returns a hashed value from the given KeyString and index namespace. diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp index 3151fca3268..bb19f9fd11a 100644 --- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp +++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp @@ -89,8 +89,8 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId, while (it->more()) { const IndexDescriptor* descriptor = it->next()->descriptor(); - const std::string indexNs = descriptor->indexNamespace(); - int indexNumber = _indexConsistency->getIndexNumber(indexNs); + const std::string indexName = descriptor->indexName(); + int indexNumber = _indexConsistency->getIndexNumber(indexName); ValidateResults curRecordResults; const IndexAccessMethod* iam = _indexCatalog->getEntry(descriptor)->accessMethod(); @@ -98,7 +98,7 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId, if (descriptor->isPartial()) { const IndexCatalogEntry* ice = _indexCatalog->getEntry(descriptor); if (!ice->getFilterExpression()->matchesBSON(recordBson)) { - (*_indexNsResultsMap)[indexNs] = curRecordResults; + (*_indexNsResultsMap)[indexName] = curRecordResults; continue; } } @@ -140,9 +140,9 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId, // We want to use the latest version of KeyString here. KeyString ks(KeyString::kLatestVersion, key, ord, recordId); - _indexConsistency->addDocKey(ks, indexNumber); + _indexConsistency->addDocKey(ks, indexNumber, recordId, key); } - (*_indexNsResultsMap)[indexNs] = curRecordResults; + (*_indexNsResultsMap)[indexName] = curRecordResults; } return status; } @@ -151,8 +151,8 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam, const IndexDescriptor* descriptor, ValidateResults* results, int64_t* numTraversedKeys) { - auto indexNs = descriptor->indexNamespace(); - int indexNumber = _indexConsistency->getIndexNumber(indexNs); + auto indexName = descriptor->indexName(); + int indexNumber = _indexConsistency->getIndexNumber(indexName); int64_t numKeys = 0; const auto& key = descriptor->keyPattern(); @@ -170,12 +170,15 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam, stdx::make_unique<KeyString>(version, indexEntry->key, ord, indexEntry->loc); // Ensure that the index entries are in increasing or decreasing order. if (!isFirstEntry && *indexKeyString < *prevIndexKeyString) { - if (results->valid) { + if (results && results->valid) { results->errors.push_back( "one or more indexes are not in strictly ascending or descending " "order"); } - results->valid = false; + + if (results) { + results->valid = false; + } } const RecordId kWildcardMultikeyMetadataRecordId{ @@ -188,21 +191,24 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam, continue; } - _indexConsistency->addIndexKey(*indexKeyString, indexNumber); + _indexConsistency->addIndexKey( + *indexKeyString, indexNumber, indexEntry->loc, indexEntry->key); numKeys++; isFirstEntry = false; prevIndexKeyString.swap(indexKeyString); } - if (_indexConsistency->getMultikeyMetadataPathCount(indexNumber) > 0) { + if (results && _indexConsistency->getMultikeyMetadataPathCount(indexNumber) > 0) { results->errors.push_back( str::stream() << "Index '" << descriptor->indexName() << "' has one or more missing multikey metadata index keys"); results->valid = false; } - *numTraversedKeys = numKeys; + if (numTraversedKeys) { + *numTraversedKeys = numKeys; + } } void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore, @@ -261,8 +267,8 @@ void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore, void RecordStoreValidateAdaptor::validateIndexKeyCount(const IndexDescriptor* idx, int64_t numRecs, ValidateResults& results) { - const std::string indexNs = idx->indexNamespace(); - int indexNumber = _indexConsistency->getIndexNumber(indexNs); + const std::string indexName = idx->indexName(); + int indexNumber = _indexConsistency->getIndexNumber(indexName); int64_t numIndexedKeys = _indexConsistency->getNumKeys(indexNumber); int64_t numLongKeys = _indexConsistency->getNumLongKeys(indexNumber); auto totalKeys = numLongKeys + numIndexedKeys; diff --git a/src/mongo/db/commands/validate.cpp b/src/mongo/db/commands/validate.cpp index df8abe7b222..a15d7c7b46b 100644 --- a/src/mongo/db/commands/validate.cpp +++ b/src/mongo/db/commands/validate.cpp @@ -186,6 +186,8 @@ public: result.appendBool("valid", results.valid); result.append("warnings", results.warnings); result.append("errors", results.errors); + result.append("extraIndexEntries", results.extraIndexEntries); + result.append("missingIndexEntries", results.missingIndexEntries); if (!results.valid) { result.append("advice", diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h index 5389e8f6439..3dce47699cf 100644 --- a/src/mongo/db/storage/record_store.h +++ b/src/mongo/db/storage/record_store.h @@ -610,6 +610,8 @@ struct ValidateResults { bool valid; std::vector<std::string> errors; std::vector<std::string> warnings; + std::vector<BSONObj> extraIndexEntries; + std::vector<BSONObj> missingIndexEntries; }; /** |