SERVER-30356 Improve error reporting for validation

author: Gregory Wlodarek <gregory.wlodarek@mongodb.com> 2019-04-29 20:18:42 -0400
committer: Gregory Wlodarek <gregory.wlodarek@mongodb.com> 2019-04-29 20:25:09 -0400
commit: abf37f69a45009f44f275e03e2f07d2d496d5b8d (patch)
tree: 5f58d5c227e57b44341b1be900b0feb1dc0fb3de
parent: 7ea05d8684052198c595dee0b9a9cabf652e904d (diff)
download: mongo-abf37f69a45009f44f275e03e2f07d2d496d5b8d.tar.gz
9 files changed, 403 insertions, 120 deletions
diff --git a/jstests/core/geo_s2sparse.js b/jstests/core/geo_s2sparse.js
index 3677e98f84c..57f4f73fa3a 100644
--- a/jstests/core/geo_s2sparse.js
+++ b/jstests/core/geo_s2sparse.js
@@ -7,7 +7,7 @@
     var coll = db.geo_s2sparse;
     var point = {type: "Point", coordinates: [5, 5]};
     var indexSpec = {geo: "2dsphere", nonGeo: 1};
-    var indexName = 'test.geo_s2sparse.$geo_2dsphere_nonGeo_1';
+    var indexName = 'geo_2dsphere_nonGeo_1';
 
     //
     // V2 indices are "geo sparse" always.
@@ -103,7 +103,7 @@
     coll.drop();
     coll.ensureIndex({geo: "2dsphere", otherGeo: "2dsphere"});
 
-    indexName = 'test.geo_s2sparse.$geo_2dsphere_otherGeo_2dsphere';
+    indexName = 'geo_2dsphere_otherGeo_2dsphere';
 
     // Insert N documents with the first geo field.
     bulkInsertDocs(coll, N, function(i) {
diff --git a/jstests/core/index_partial_create_drop.js b/jstests/core/index_partial_create_drop.js
index bf25fa3a6b1..55a6b06d117 100644
--- a/jstests/core/index_partial_create_drop.js
+++ b/jstests/core/index_partial_create_drop.js
@@ -23,7 +23,7 @@
         } else {
             kpi = res.keysPerIndex;
         }
-        return kpi[coll.getFullName() + ".$" + idxName];
+        return kpi[idxName];
     };
 
     coll.drop();
diff --git a/jstests/core/index_partial_write_ops.js b/jstests/core/index_partial_write_ops.js
index a88989883d1..730bcca5318 100644
--- a/jstests/core/index_partial_write_ops.js
+++ b/jstests/core/index_partial_write_ops.js
@@ -15,7 +15,7 @@
         } else {
             kpi = res.keysPerIndex;
         }
-        return kpi[coll.getFullName() + ".$" + idxName];
+        return kpi[idxName];
     };
 
     coll.drop();
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index 98b11660c74..57f1f5fa173 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -184,7 +184,6 @@ StatusWith<CollectionImpl::ValidationAction> _parseValidationAction(StringData n
 
 }  // namespace
 
-using std::endl;
 using std::string;
 using std::unique_ptr;
 using std::vector;
@@ -412,8 +411,8 @@ Status CollectionImpl::insertDocuments(OperationContext* opCtx,
             string whenFirst =
                 firstIdElem ? (string(" when first _id is ") + firstIdElem.str()) : "";
             while (MONGO_FAIL_POINT(hangAfterCollectionInserts)) {
-                log() << "hangAfterCollectionInserts fail point enabled for " << _ns.toString()
-                      << whenFirst << ". Blocking until fail point is disabled.";
+                log() << "hangAfterCollectionInserts fail point enabled for " << _ns << whenFirst
+                      << ". Blocking until fail point is disabled.";
                 mongo::sleepsecs(1);
                 opCtx->checkForInterrupt();
             }
@@ -1095,8 +1094,9 @@ void _validateIndexes(OperationContext* opCtx,
         const IndexDescriptor* descriptor = entry->descriptor();
         const IndexAccessMethod* iam = entry->accessMethod();
 
-        log(LogComponent::kIndex) << "validating index " << descriptor->indexNamespace() << endl;
-        ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()];
+        log(LogComponent::kIndex) << "validating index " << descriptor->indexName()
+                                  << " on collection " << descriptor->parentNS();
+        ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexName()];
         bool checkCounts = false;
         int64_t numTraversedKeys;
         int64_t numValidatedKeys;
@@ -1120,7 +1120,7 @@ void _validateIndexes(OperationContext* opCtx,
             }
 
             if (curIndexResults.valid) {
-                keysPerIndex->appendNumber(descriptor->indexNamespace(),
+                keysPerIndex->appendNumber(descriptor->indexName(),
                                            static_cast<long long>(numTraversedKeys));
             } else {
                 results->valid = false;
@@ -1131,19 +1131,54 @@ void _validateIndexes(OperationContext* opCtx,
     }
 }
 
-void _markIndexEntriesInvalid(ValidateResultsMap* indexNsResultsMap, ValidateResults* results) {
+/**
+ * Executes the second phase of validation for improved error reporting. This is only done if
+ * any index inconsistencies are found during the first phase of validation.
+ */
+void _gatherIndexEntryErrors(OperationContext* opCtx,
+                             RecordStore* recordStore,
+                             IndexCatalog* indexCatalog,
+                             IndexConsistency* indexConsistency,
+                             RecordStoreValidateAdaptor* indexValidator,
+                             ValidateResultsMap* indexNsResultsMap,
+                             ValidateResults* result) {
+    indexConsistency->setSecondPhase();
+
+    log(LogComponent::kIndex) << "Starting to traverse through all the document key sets.";
+
+    // During the second phase of validation, iterate through each documents key set and only record
+    // the keys that were inconsistent during the first phase of validation.
+    std::unique_ptr<SeekableRecordCursor> cursor = recordStore->getCursor(opCtx, true);
+    while (auto record = cursor->next()) {
+        opCtx->checkForInterrupt();
 
-    // The error message can't be more specific because even though the index is
-    // invalid, we won't know if the corruption occurred on the index entry or in
-    // the document.
-    for (auto& it : *indexNsResultsMap) {
-        // Marking all indexes as invalid since we don't know which one failed.
-        ValidateResults& r = it.second;
-        r.valid = false;
+        // We can ignore the status of validate as it was already checked during the first phase.
+        size_t validatedSize;
+        indexValidator->validate(record->id, record->data, &validatedSize).ignore();
     }
-    string msg = "one or more indexes contain invalid index entries.";
-    results->errors.push_back(msg);
-    results->valid = false;
+
+    log(LogComponent::kIndex) << "Finished traversing through all the document key sets.";
+    log(LogComponent::kIndex) << "Starting to traverse through all the indexes.";
+
+    // Iterate through all the indexes in the collection and only record the index entry keys that
+    // had inconsistencies during the first phase.
+    std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false);
+    while (it->more()) {
+        opCtx->checkForInterrupt();
+
+        const IndexCatalogEntry* entry = it->next();
+        const IndexDescriptor* descriptor = entry->descriptor();
+        const IndexAccessMethod* iam = entry->accessMethod();
+
+        log(LogComponent::kIndex) << "Traversing through the index entries for index "
+                                  << descriptor->indexName() << ".";
+        indexValidator->traverseIndex(
+            iam, descriptor, /*ValidateResults=*/nullptr, /*numTraversedKeys=*/nullptr);
+    }
+
+    log(LogComponent::kIndex) << "Finished traversing through all the indexes.";
+
+    indexConsistency->addIndexEntryErrors(indexNsResultsMap, result);
 }
 
 void _validateIndexKeyCount(OperationContext* opCtx,
@@ -1156,7 +1191,7 @@ void _validateIndexKeyCount(OperationContext* opCtx,
         indexCatalog->getIndexIterator(opCtx, false);
     while (indexIterator->more()) {
         const IndexDescriptor* descriptor = indexIterator->next()->descriptor();
-        ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexNamespace()];
+        ValidateResults& curIndexResults = (*indexNsResultsMap)[descriptor->indexName()];
 
         if (curIndexResults.valid) {
             indexValidator->validateIndexKeyCount(
@@ -1277,8 +1312,7 @@ Status CollectionImpl::validate(OperationContext* opCtx,
         // Validate the record store
         std::string uuidString = str::stream()
             << " (UUID: " << (uuid() ? uuid()->toString() : "none") << ")";
-        log(LogComponent::kIndex) << "validating collection " << ns().toString() << uuidString
-                                  << endl;
+        log(LogComponent::kIndex) << "validating collection " << ns() << uuidString;
         _validateRecordStore(
             opCtx, _recordStore, level, background, &indexValidator, results, output);
 
@@ -1296,7 +1330,16 @@ Status CollectionImpl::validate(OperationContext* opCtx,
                              results);
 
             if (indexConsistency.haveEntryMismatch()) {
-                _markIndexEntriesInvalid(&indexNsResultsMap, results);
+                log(LogComponent::kIndex)
+                    << "Index inconsistencies were detected on collection " << ns()
+                    << ". Starting the second phase of index validation to gather concise errors.";
+                _gatherIndexEntryErrors(opCtx,
+                                        _recordStore,
+                                        _indexCatalog.get(),
+                                        &indexConsistency,
+                                        &indexValidator,
+                                        &indexNsResultsMap,
+                                        results);
             }
         }
 
diff --git a/src/mongo/db/catalog/index_consistency.cpp b/src/mongo/db/catalog/index_consistency.cpp
index a10e63dee3a..1ff0f14d65a 100644
--- a/src/mongo/db/catalog/index_consistency.cpp
+++ b/src/mongo/db/catalog/index_consistency.cpp
@@ -41,6 +41,7 @@
 #include "mongo/db/index_names.h"
 #include "mongo/db/server_options.h"
 #include "mongo/db/storage/key_string.h"
+#include "mongo/db/storage/record_store.h"
 #include "mongo/db/storage/sorted_data_interface.h"
 #include "mongo/util/elapsed_tracker.h"
 
@@ -68,7 +69,8 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx,
       _recordStore(recordStore),
       _tracker(opCtx->getServiceContext()->getFastClockSource(),
                internalQueryExecYieldIterations.load(),
-               Milliseconds(internalQueryExecYieldPeriodMS.load())) {
+               Milliseconds(internalQueryExecYieldPeriodMS.load())),
+      _firstPhase(true) {
 
     IndexCatalog* indexCatalog = _collection->getIndexCatalog();
     std::unique_ptr<IndexCatalog::IndexIterator> indexIterator =
@@ -78,18 +80,18 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx,
     while (indexIterator->more()) {
 
         const IndexDescriptor* descriptor = indexIterator->next()->descriptor();
-        std::string indexNs = descriptor->indexNamespace();
-
-        _indexNumber[descriptor->indexNamespace()] = indexNumber;
+        std::string indexName = descriptor->indexName();
+        _indexNumber[indexName] = indexNumber;
 
         IndexInfo indexInfo;
 
-        indexInfo.isReady =
-            _collection->getCatalogEntry()->isIndexReady(opCtx, descriptor->indexName());
+        indexInfo.indexName = indexName;
+        indexInfo.keyPattern = descriptor->keyPattern();
+        indexInfo.isReady = _collection->getCatalogEntry()->isIndexReady(opCtx, indexName);
 
-        uint32_t indexNsHash;
-        MurmurHash3_x86_32(indexNs.c_str(), indexNs.size(), 0, &indexNsHash);
-        indexInfo.indexNsHash = indexNsHash;
+        uint32_t indexNameHash;
+        MurmurHash3_x86_32(indexName.c_str(), indexName.size(), 0, &indexNameHash);
+        indexInfo.indexNameHash = indexNameHash;
         indexInfo.indexScanFinished = false;
 
         indexInfo.numKeys = 0;
@@ -103,24 +105,30 @@ IndexConsistency::IndexConsistency(OperationContext* opCtx,
     }
 }
 
-void IndexConsistency::addDocKey(const KeyString& ks, int indexNumber) {
+void IndexConsistency::addDocKey(const KeyString& ks,
+                                 int indexNumber,
+                                 const RecordId& recordId,
+                                 const BSONObj& indexKey) {
 
     if (indexNumber < 0 || indexNumber >= static_cast<int>(_indexesInfo.size())) {
         return;
     }
 
     stdx::lock_guard<stdx::mutex> lock(_classMutex);
-    _addDocKey_inlock(ks, indexNumber);
+    _addDocKey_inlock(ks, indexNumber, recordId, indexKey);
 }
 
-void IndexConsistency::addIndexKey(const KeyString& ks, int indexNumber) {
+void IndexConsistency::addIndexKey(const KeyString& ks,
+                                   int indexNumber,
+                                   const RecordId& recordId,
+                                   const BSONObj& indexKey) {
 
     if (indexNumber < 0 || indexNumber >= static_cast<int>(_indexesInfo.size())) {
         return;
     }
 
     stdx::lock_guard<stdx::mutex> lock(_classMutex);
-    _addIndexKey_inlock(ks, indexNumber);
+    _addIndexKey_inlock(ks, indexNumber, recordId, indexKey);
 }
 
 void IndexConsistency::addMultikeyMetadataPath(const KeyString& ks, int indexNumber) {
@@ -216,33 +224,123 @@ int64_t IndexConsistency::getNumExtraIndexKeys(int indexNumber) const {
     return _indexesInfo.at(indexNumber).numExtraIndexKeys;
 }
 
-void IndexConsistency::nextStage() {
+int IndexConsistency::getIndexNumber(const std::string& indexName) {
 
-    stdx::lock_guard<stdx::mutex> lock(_classMutex);
-    if (_stage == ValidationStage::DOCUMENT) {
-        _stage = ValidationStage::INDEX;
-    } else if (_stage == ValidationStage::INDEX) {
-        _stage = ValidationStage::NONE;
+    auto search = _indexNumber.find(indexName);
+    if (search != _indexNumber.end()) {
+        return search->second;
     }
+
+    return -1;
 }
 
-ValidationStage IndexConsistency::getStage() const {
+void IndexConsistency::setSecondPhase() {
 
     stdx::lock_guard<stdx::mutex> lock(_classMutex);
-    return _stage;
+    invariant(_firstPhase);
+    _firstPhase = false;
 }
 
-int IndexConsistency::getIndexNumber(const std::string& indexNs) {
+void IndexConsistency::addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap,
+                                           ValidateResults* results) {
+    stdx::lock_guard<stdx::mutex> lock(_classMutex);
+    invariant(!_firstPhase);
 
-    auto search = _indexNumber.find(indexNs);
-    if (search != _indexNumber.end()) {
-        return search->second;
+    // We'll report up to 1MB for extra index entry errors and missing index entry errors.
+    const int kErrorSizeMB = 1 * 1024 * 1024;
+    int numMissingIndexEntriesSizeMB = 0;
+    int numExtraIndexEntriesSizeMB = 0;
+
+    int numMissingIndexEntryErrors = _missingIndexEntries.size();
+    int numExtraIndexEntryErrors = 0;
+    for (const auto& item : _extraIndexEntries) {
+        numExtraIndexEntryErrors += item.second.size();
     }
 
-    return -1;
+    // Inform which indexes have inconsistences and add the BSON objects of the inconsistent index
+    // entries to the results vector.
+    bool missingIndexEntrySizeLimitWarning = false;
+    for (const auto& missingIndexEntry : _missingIndexEntries) {
+        const BSONObj& entry = missingIndexEntry.second;
+
+        // Only count the indexKey and idKey fields towards the total size.
+        numMissingIndexEntriesSizeMB += entry["indexKey"].size();
+        if (entry.hasField("idKey")) {
+            numMissingIndexEntriesSizeMB += entry["idKey"].size();
+        }
+
+        if (numMissingIndexEntriesSizeMB <= kErrorSizeMB) {
+            results->missingIndexEntries.push_back(entry);
+        } else if (!missingIndexEntrySizeLimitWarning) {
+            StringBuilder ss;
+            ss << "Not all missing index entry inconsistencies are listed due to size limitations.";
+            results->errors.push_back(ss.str());
+
+            missingIndexEntrySizeLimitWarning = true;
+        }
+
+        std::string indexName = entry["indexName"].String();
+        if (!indexNsResultsMap->at(indexName).valid) {
+            continue;
+        }
+
+        StringBuilder ss;
+        ss << "Index with name '" << indexName << "' has inconsistencies.";
+        results->errors.push_back(ss.str());
+
+        indexNsResultsMap->at(indexName).valid = false;
+    }
+
+    bool extraIndexEntrySizeLimitWarning = false;
+    for (const auto& extraIndexEntry : _extraIndexEntries) {
+        const SimpleBSONObjSet& entries = extraIndexEntry.second;
+        for (const auto& entry : entries) {
+            // Only count the indexKey field towards the total size.
+            numExtraIndexEntriesSizeMB += entry["indexKey"].size();
+            if (numExtraIndexEntriesSizeMB <= kErrorSizeMB) {
+                results->extraIndexEntries.push_back(entry);
+            } else if (!extraIndexEntrySizeLimitWarning) {
+                StringBuilder ss;
+                ss << "Not all extra index entry inconsistencies are listed due to size "
+                      "limitations.";
+                results->errors.push_back(ss.str());
+
+                extraIndexEntrySizeLimitWarning = true;
+            }
+
+            std::string indexName = entry["indexName"].String();
+            if (!indexNsResultsMap->at(indexName).valid) {
+                continue;
+            }
+
+            StringBuilder ss;
+            ss << "Index with name '" << indexName << "' has inconsistencies.";
+            results->errors.push_back(ss.str());
+
+            indexNsResultsMap->at(indexName).valid = false;
+        }
+    }
+
+    // Inform how many inconsistencies were detected.
+    if (numMissingIndexEntryErrors > 0) {
+        StringBuilder ss;
+        ss << "Detected " << numMissingIndexEntryErrors << " missing index entries.";
+        results->warnings.push_back(ss.str());
+    }
+
+    if (numExtraIndexEntryErrors > 0) {
+        StringBuilder ss;
+        ss << "Detected " << numExtraIndexEntryErrors << " extra index entries.";
+        results->warnings.push_back(ss.str());
+    }
+
+    results->valid = false;
 }
 
-void IndexConsistency::_addDocKey_inlock(const KeyString& ks, int indexNumber) {
+void IndexConsistency::_addDocKey_inlock(const KeyString& ks,
+                                         int indexNumber,
+                                         const RecordId& recordId,
+                                         const BSONObj& indexKey) {
 
     // Ignore indexes that weren't ready before we started validation.
     if (!_indexesInfo.at(indexNumber).isReady) {
@@ -250,11 +348,46 @@ void IndexConsistency::_addDocKey_inlock(const KeyString& ks, int indexNumber) {
     }
 
     const uint32_t hash = _hashKeyString(ks, indexNumber);
-    _indexKeyCount[hash]++;
-    _indexesInfo.at(indexNumber).numRecords++;
+
+    if (_firstPhase) {
+        // During the first phase of validation we only keep track of the count for the document
+        // keys encountered.
+        _indexKeyCount[hash]++;
+        _indexesInfo.at(indexNumber).numRecords++;
+    } else {
+        // For the second phase of validation, we keep track of the document keys that mapped to
+        // an inconsistent hash bucket during the first phase.
+        auto searchBuckets = _indexKeyCount.find(hash);
+        invariant(searchBuckets != _indexKeyCount.end());
+        if (searchBuckets->second == 0) {
+            // No inconsistencies in this hash bucket during the first phase.
+            return;
+        }
+
+        // Get the documents _id index key.
+        auto cursor = _recordStore->getCursor(_opCtx);
+        auto record = cursor->seekExact(recordId);
+        invariant(record);
+
+        BSONObj data = record->data.toBson();
+        boost::optional<BSONElement> idKey = boost::none;
+        if (data.hasField("_id")) {
+            idKey = data["_id"];
+        }
+
+        std::string key = std::string(ks.getBuffer(), ks.getSize());
+        BSONObj info = _generateInfo(indexNumber, recordId, indexKey, idKey);
+
+        // Cannot have duplicate KeyStrings during the document scan phase.
+        invariant(_missingIndexEntries.count(key) == 0);
+        _missingIndexEntries.insert(std::make_pair(key, info));
+    }
 }
 
-void IndexConsistency::_addIndexKey_inlock(const KeyString& ks, int indexNumber) {
+void IndexConsistency::_addIndexKey_inlock(const KeyString& ks,
+                                           int indexNumber,
+                                           const RecordId& recordId,
+                                           const BSONObj& indexKey) {
 
     // Ignore indexes that weren't ready before we started validation.
     if (!_indexesInfo.at(indexNumber).isReady) {
@@ -262,16 +395,84 @@ void IndexConsistency::_addIndexKey_inlock(const KeyString& ks, int indexNumber)
     }
 
     const uint32_t hash = _hashKeyString(ks, indexNumber);
-    _indexKeyCount[hash]--;
-    _indexesInfo.at(indexNumber).numKeys++;
+
+    if (_firstPhase) {
+        // During the first phase of validation we only keep track of the count for the index entry
+        // keys encountered.
+        _indexKeyCount[hash]--;
+        _indexesInfo.at(indexNumber).numKeys++;
+    } else {
+        // For the second phase of validation, on the buckets that were inconsistent during the
+        // first phase, we see if there was a corresponding document key for the index entry key
+        // we have.
+        // If there is a corresponding document key for the index entry key, we remove the key from
+        // the '_missingIndexEntries' map. However if there was no document key for the index entry
+        // key, we add the key to the '_extraIndexEntries' map.
+        auto searchBuckets = _indexKeyCount.find(hash);
+        invariant(searchBuckets != _indexKeyCount.end());
+        if (searchBuckets->second == 0) {
+            // No inconsistencies in this hash bucket during the first phase.
+            return;
+        }
+
+        std::string key = std::string(ks.getBuffer(), ks.getSize());
+        BSONObj info = _generateInfo(indexNumber, recordId, indexKey, boost::none);
+
+        if (_missingIndexEntries.count(key) == 0) {
+            // We may have multiple extra index entries for a given KeyString.
+            auto search = _extraIndexEntries.find(key);
+            if (search == _extraIndexEntries.end()) {
+                SimpleBSONObjSet infoSet = {info};
+                _extraIndexEntries.insert(std::make_pair(key, infoSet));
+                return;
+            }
+
+            search->second.insert(info);
+        } else {
+            _missingIndexEntries.erase(key);
+        }
+    }
+}
+
+BSONObj IndexConsistency::_generateInfo(const int& indexNumber,
+                                        const RecordId& recordId,
+                                        const BSONObj& indexKey,
+                                        boost::optional<BSONElement> idKey) {
+    const std::string& indexName = _indexesInfo.at(indexNumber).indexName;
+    const BSONObj& keyPattern = _indexesInfo.at(indexNumber).keyPattern;
+
+    // We need to rehydrate the indexKey for improved readability.
+    // {"": ObjectId(...)} -> {"_id": ObjectId(...)}
+    auto keysIt = keyPattern.begin();
+    auto valuesIt = indexKey.begin();
+
+    BSONObjBuilder b;
+    while (keysIt != keyPattern.end()) {
+        // keysIt and valuesIt must have the same number of elements.
+        invariant(valuesIt != indexKey.end());
+        b.appendAs(*valuesIt, keysIt->fieldName());
+        keysIt++;
+        valuesIt++;
+    }
+
+    BSONObj rehydratedKey = b.done();
+
+    if (idKey) {
+        return BSON("indexName" << indexName << "recordId" << recordId.repr() << "idKey" << *idKey
+                                << "indexKey"
+                                << rehydratedKey);
+    } else {
+        return BSON("indexName" << indexName << "recordId" << recordId.repr() << "indexKey"
+                                << rehydratedKey);
+    }
 }
 
 uint32_t IndexConsistency::_hashKeyString(const KeyString& ks, int indexNumber) const {
 
-    uint32_t indexNsHash = _indexesInfo.at(indexNumber).indexNsHash;
+    uint32_t indexNameHash = _indexesInfo.at(indexNumber).indexNameHash;
     MurmurHash3_x86_32(
-        ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize(), indexNsHash, &indexNsHash);
-    MurmurHash3_x86_32(ks.getBuffer(), ks.getSize(), indexNsHash, &indexNsHash);
-    return indexNsHash % (1U << 22);
+        ks.getTypeBits().getBuffer(), ks.getTypeBits().getSize(), indexNameHash, &indexNameHash);
+    MurmurHash3_x86_32(ks.getBuffer(), ks.getSize(), indexNameHash, &indexNameHash);
+    return indexNameHash % (1U << 22);
 }
 }  // namespace mongo
diff --git a/src/mongo/db/catalog/index_consistency.h b/src/mongo/db/catalog/index_consistency.h
index d22696c5b0c..348f282e0af 100644
--- a/src/mongo/db/catalog/index_consistency.h
+++ b/src/mongo/db/catalog/index_consistency.h
@@ -39,20 +39,6 @@
 namespace mongo {
 
 /**
- * The ValidationStage allows the IndexConsistency class to perform
- * the correct operations that depend on where we are in the validation.
- */
-enum class ValidationStage { DOCUMENT, INDEX, NONE };
-
-/**
- * The ValidationOperation is used by classes using the IndexObserver to let us know what operation
- * was associated with it.
- * The `UPDATE` operation can be seen as two independent operations (`REMOVE` operation followed
- * by an `INSERT` operation).
- */
-enum class ValidationOperation { INSERT, REMOVE };
-
-/**
  * The IndexConsistency class is used to keep track of the index consistency.
  * It does this by using the index keys from index entries and index keys generated from the
  * document to ensure there is a one-to-one mapping for each key.
@@ -64,10 +50,14 @@ enum class ValidationOperation { INSERT, REMOVE };
  * Contains all the index information and stats throughout the validation.
  */
 struct IndexInfo {
+    // The name of the index.
+    std::string indexName;
+    // The index key pattern.
+    BSONObj keyPattern;
     // Informs us if the index was ready or not for consumption during the start of validation.
     bool isReady;
-    // Contains the pre-computed hashed of the index namespace.
-    uint32_t indexNsHash;
+    // Contains the pre-computed hash of the index name.
+    uint32_t indexNameHash;
     // True if the index has finished scanning from the index scan stage, otherwise false.
     bool indexScanFinished;
     // The number of index entries belonging to the index.
@@ -85,6 +75,8 @@ struct IndexInfo {
 };
 
 class IndexConsistency final {
+    using ValidateResultsMap = std::map<std::string, ValidateResults>;
+
 public:
     IndexConsistency(OperationContext* opCtx,
                      Collection* collection,
@@ -95,8 +87,14 @@ public:
     /**
      * Helper functions for `_addDocKey` and `_addIndexKey` for concurrency control.
      */
-    void addDocKey(const KeyString& ks, int indexNumber);
-    void addIndexKey(const KeyString& ks, int indexNumber);
+    void addDocKey(const KeyString& ks,
+                   int indexNumber,
+                   const RecordId& recordId,
+                   const BSONObj& indexKey);
+    void addIndexKey(const KeyString& ks,
+                     int indexNumber,
+                     const RecordId& recordId,
+                     const BSONObj& indexKey);
 
     /**
      * To validate $** multikey metadata paths, we first scan the collection and add a hash of all
@@ -146,30 +144,34 @@ public:
     int64_t getNumExtraIndexKeys(int indexNumber) const;
 
     /**
-     * Moves the `_stage` variable to the next corresponding stage in the following order:
-     * `DOCUMENT` -> `INDEX`
-     * `INDEX` -> `NONE`
-     * `NONE` -> `NONE`
+     * Returns the index number for the corresponding index name.
      */
-    void nextStage();
+    int getIndexNumber(const std::string& indexName);
 
     /**
-     * Returns the `_stage` that the validation is on.
+     * Informs the IndexConsistency object that we're advancing to the second phase of index
+     * validation.
      */
-    ValidationStage getStage() const;
+    void setSecondPhase();
 
     /**
-     * Returns the index number for the corresponding index namespace's.
+     * Records the errors gathered from the second phase of index validation into the provided
+     * ValidateResultsMap and ValidateResults.
      */
-    int getIndexNumber(const std::string& indexNs);
+    void addIndexEntryErrors(ValidateResultsMap* indexNsResultsMap, ValidateResults* results);
 
 private:
+    IndexConsistency() = delete;
+
     OperationContext* _opCtx;
     Collection* _collection;
     const NamespaceString _nss;
     const RecordStore* _recordStore;
     ElapsedTracker _tracker;
 
+    // Protects the variables below.
+    mutable stdx::mutex _classMutex;
+
     // We map the hashed KeyString values to a bucket which contain the count of how many
     // index keys and document keys we've seen in each bucket.
     // Count rules:
@@ -179,7 +181,7 @@ private:
     //       are too few index entries.
     //     - If the count is < 0 in the bucket at the end of the validation pass, then there
     //       are too many index entries.
-    std::map<uint32_t, uint32_t> _indexKeyCount;
+    std::map<uint32_t, int32_t> _indexKeyCount;
 
     // Contains the corresponding index number for each index namespace
     std::map<std::string, int> _indexNumber;
@@ -187,32 +189,59 @@ private:
     // A mapping of index numbers to IndexInfo
     std::map<int, IndexInfo> _indexesInfo;
 
-    // The current index namespace being scanned in the index scan phase.
-    int _currentIndex = -1;
-
-    // The stage that the validation is currently on.
-    ValidationStage _stage = ValidationStage::DOCUMENT;
+    // Whether we're in the first or second phase of index validation.
+    bool _firstPhase;
 
-    // Threshold for the number of errors to record before returning "There are too many errors".
-    static const int _kErrorThreshold = 100;
+    // Populated during the second phase of validation, this map contains the index entries that
+    // were pointing at an invalid document key.
+    // The map contains a KeyString pointing at a set of BSON objects as there may be multiple
+    // extra index entries for the same KeyString.
+    std::map<std::string, SimpleBSONObjSet> _extraIndexEntries;
 
-    // The current number of errors that are recorded.
-    int _numErrorsRecorded = 0;
+    // Populated during the second phase of validation, this map contains the index entries that
+    // were missing while the document key was in place.
+    // The map contains a KeyString pointing to a BSON object as there can only be one missing index
+    // entry for a given KeyString.
+    std::map<std::string, BSONObj> _missingIndexEntries;
 
-    // Only one thread can use the class at a time
-    mutable stdx::mutex _classMutex;
+    /**
+     * During the first phase of validation, given the document's key KeyString, increment the
+     * corresponding `_indexKeyCount` by hashing it.
+     * For the second phase of validation, keep track of the document keys that hashed to
+     * inconsistent hash buckets during the first phase of validation.
+     */
+    void _addDocKey_inlock(const KeyString& ks,
+                           int indexNumber,
+                           const RecordId& recordId,
+                           const BSONObj& indexKey);
 
     /**
-     * Given the document's key KeyString, increment the corresponding `_indexKeyCount`
-     * by hashing it.
+     * During the first phase of validation, given the index entry's KeyString, decrement the
+     * corresponding `_indexKeyCount` by hashing it.
+     * For the second phase of validation, try to match the index entry keys that hashed to
+     * inconsistent hash buckets during the first phase of validation to document keys.
      */
-    void _addDocKey_inlock(const KeyString& ks, int indexNumber);
+    void _addIndexKey_inlock(const KeyString& ks,
+                             int indexNumber,
+                             const RecordId& recordId,
+                             const BSONObj& indexKey);
 
     /**
-     * Given the index entry's KeyString, decrement the corresponding `_indexKeyCount`
-     * by hashing it.
+     * Generates a key for the second phase of validation. The keys format is the following:
+     * {
+     *     indexName: <string>,
+     *     recordId: <number>,
+     *     idKey: <object>,  // Only available for missing index entries.
+     *     indexKey: {
+     *         <key>: <value>,
+     *         ...
+     *     }
+     * }
      */
-    void _addIndexKey_inlock(const KeyString& ks, int indexNumber);
+    BSONObj _generateInfo(const int& indexNumber,
+                          const RecordId& recordId,
+                          const BSONObj& indexKey,
+                          boost::optional<BSONElement> idKey);
 
     /**
      * Returns a hashed value from the given KeyString and index namespace.
diff --git a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
index 3151fca3268..bb19f9fd11a 100644
--- a/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
+++ b/src/mongo/db/catalog/private/record_store_validate_adaptor.cpp
@@ -89,8 +89,8 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
 
     while (it->more()) {
         const IndexDescriptor* descriptor = it->next()->descriptor();
-        const std::string indexNs = descriptor->indexNamespace();
-        int indexNumber = _indexConsistency->getIndexNumber(indexNs);
+        const std::string indexName = descriptor->indexName();
+        int indexNumber = _indexConsistency->getIndexNumber(indexName);
         ValidateResults curRecordResults;
 
         const IndexAccessMethod* iam = _indexCatalog->getEntry(descriptor)->accessMethod();
@@ -98,7 +98,7 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
         if (descriptor->isPartial()) {
             const IndexCatalogEntry* ice = _indexCatalog->getEntry(descriptor);
             if (!ice->getFilterExpression()->matchesBSON(recordBson)) {
-                (*_indexNsResultsMap)[indexNs] = curRecordResults;
+                (*_indexNsResultsMap)[indexName] = curRecordResults;
                 continue;
             }
         }
@@ -140,9 +140,9 @@ Status RecordStoreValidateAdaptor::validate(const RecordId& recordId,
 
             // We want to use the latest version of KeyString here.
             KeyString ks(KeyString::kLatestVersion, key, ord, recordId);
-            _indexConsistency->addDocKey(ks, indexNumber);
+            _indexConsistency->addDocKey(ks, indexNumber, recordId, key);
         }
-        (*_indexNsResultsMap)[indexNs] = curRecordResults;
+        (*_indexNsResultsMap)[indexName] = curRecordResults;
     }
     return status;
 }
@@ -151,8 +151,8 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam,
                                                const IndexDescriptor* descriptor,
                                                ValidateResults* results,
                                                int64_t* numTraversedKeys) {
-    auto indexNs = descriptor->indexNamespace();
-    int indexNumber = _indexConsistency->getIndexNumber(indexNs);
+    auto indexName = descriptor->indexName();
+    int indexNumber = _indexConsistency->getIndexNumber(indexName);
     int64_t numKeys = 0;
 
     const auto& key = descriptor->keyPattern();
@@ -170,12 +170,15 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam,
             stdx::make_unique<KeyString>(version, indexEntry->key, ord, indexEntry->loc);
         // Ensure that the index entries are in increasing or decreasing order.
         if (!isFirstEntry && *indexKeyString < *prevIndexKeyString) {
-            if (results->valid) {
+            if (results && results->valid) {
                 results->errors.push_back(
                     "one or more indexes are not in strictly ascending or descending "
                     "order");
             }
-            results->valid = false;
+
+            if (results) {
+                results->valid = false;
+            }
         }
 
         const RecordId kWildcardMultikeyMetadataRecordId{
@@ -188,21 +191,24 @@ void RecordStoreValidateAdaptor::traverseIndex(const IndexAccessMethod* iam,
             continue;
         }
 
-        _indexConsistency->addIndexKey(*indexKeyString, indexNumber);
+        _indexConsistency->addIndexKey(
+            *indexKeyString, indexNumber, indexEntry->loc, indexEntry->key);
 
         numKeys++;
         isFirstEntry = false;
         prevIndexKeyString.swap(indexKeyString);
     }
 
-    if (_indexConsistency->getMultikeyMetadataPathCount(indexNumber) > 0) {
+    if (results && _indexConsistency->getMultikeyMetadataPathCount(indexNumber) > 0) {
         results->errors.push_back(
             str::stream() << "Index '" << descriptor->indexName()
                           << "' has one or more missing multikey metadata index keys");
         results->valid = false;
     }
 
-    *numTraversedKeys = numKeys;
+    if (numTraversedKeys) {
+        *numTraversedKeys = numKeys;
+    }
 }
 
 void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore,
@@ -261,8 +267,8 @@ void RecordStoreValidateAdaptor::traverseRecordStore(RecordStore* recordStore,
 void RecordStoreValidateAdaptor::validateIndexKeyCount(const IndexDescriptor* idx,
                                                        int64_t numRecs,
                                                        ValidateResults& results) {
-    const std::string indexNs = idx->indexNamespace();
-    int indexNumber = _indexConsistency->getIndexNumber(indexNs);
+    const std::string indexName = idx->indexName();
+    int indexNumber = _indexConsistency->getIndexNumber(indexName);
     int64_t numIndexedKeys = _indexConsistency->getNumKeys(indexNumber);
     int64_t numLongKeys = _indexConsistency->getNumLongKeys(indexNumber);
     auto totalKeys = numLongKeys + numIndexedKeys;
diff --git a/src/mongo/db/commands/validate.cpp b/src/mongo/db/commands/validate.cpp
index df8abe7b222..a15d7c7b46b 100644
--- a/src/mongo/db/commands/validate.cpp
+++ b/src/mongo/db/commands/validate.cpp
@@ -186,6 +186,8 @@ public:
         result.appendBool("valid", results.valid);
         result.append("warnings", results.warnings);
         result.append("errors", results.errors);
+        result.append("extraIndexEntries", results.extraIndexEntries);
+        result.append("missingIndexEntries", results.missingIndexEntries);
 
         if (!results.valid) {
             result.append("advice",
diff --git a/src/mongo/db/storage/record_store.h b/src/mongo/db/storage/record_store.h
index 5389e8f6439..3dce47699cf 100644
--- a/src/mongo/db/storage/record_store.h
+++ b/src/mongo/db/storage/record_store.h
@@ -610,6 +610,8 @@ struct ValidateResults {
     bool valid;
     std::vector<std::string> errors;
     std::vector<std::string> warnings;
+    std::vector<BSONObj> extraIndexEntries;
+    std::vector<BSONObj> missingIndexEntries;
 };
 
 /**
author	Gregory Wlodarek <gregory.wlodarek@mongodb.com>	2019-04-29 20:18:42 -0400
committer	Gregory Wlodarek <gregory.wlodarek@mongodb.com>	2019-04-29 20:25:09 -0400
commit	abf37f69a45009f44f275e03e2f07d2d496d5b8d (patch)
tree	5f58d5c227e57b44341b1be900b0feb1dc0fb3de
parent	7ea05d8684052198c595dee0b9a9cabf652e904d (diff)
download	mongo-abf37f69a45009f44f275e03e2f07d2d496d5b8d.tar.gz